Package rdkit :: Package Chem :: Module PandasTools
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.PandasTools

  1  ''' 
  2  Importing pandasTools enables several features that allow for using RDKit molecules as columns of a 
  3  Pandas dataframe. 
  4  If the dataframe is containing a molecule format in a column (e.g. smiles), like in this example: 
  5  >>> from rdkit.Chem import PandasTools 
  6  >>> import pandas as pd 
  7  >>> import os 
  8  >>> from rdkit import RDConfig 
  9  >>> antibiotics = pd.DataFrame(columns=['Name','Smiles']) 
 10  >>> antibiotics = antibiotics.append({'Smiles':'CC1(C(N2C(S1)C(C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)C', 
 11  ...   'Name':'Penicilline G'}, ignore_index=True)#Penicilline G 
 12  >>> antibiotics = antibiotics.append({ 
 13  ...   'Smiles':'CC1(C2CC3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C1C=CC=C4O)O)O)O)C(=O)N)N(C)C)O', 
 14  ...   'Name':'Tetracycline'}, ignore_index=True)#Tetracycline 
 15  >>> antibiotics = antibiotics.append({ 
 16  ...   'Smiles':'CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)N)C(=O)O)C', 
 17  ...   'Name':'Ampicilline'}, ignore_index=True)#Ampicilline 
 18  >>> print([str(x) for x in  antibiotics.columns]) 
 19  ['Name', 'Smiles'] 
 20  >>> print(antibiotics) 
 21              Name                                             Smiles 
 22  0  Penicilline G    CC1(C(N2C(S1)C(C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)C 
 23  1   Tetracycline  CC1(C2CC3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C1C=CC=C4... 
 24  2  Ampicilline  CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)N)C(=O... 
 25   
 26  a new column can be created holding the respective RDKit molecule objects. The fingerprint can be 
 27  included to accelerate substructure searches on the dataframe. 
 28   
 29  >>> PandasTools.AddMoleculeColumnToFrame(antibiotics,'Smiles','Molecule',includeFingerprints=True) 
 30  >>> print([str(x) for x in  antibiotics.columns]) 
 31  ['Name', 'Smiles', 'Molecule'] 
 32   
 33  A substructure filter can be applied on the dataframe using the RDKit molecule column, 
 34  because the ">=" operator has been modified to work as a substructure check. 
 35  Such the antibiotics containing the beta-lactam ring "C1C(=O)NC1" can be obtained by 
 36   
 37  >>> beta_lactam = Chem.MolFromSmiles('C1C(=O)NC1') 
 38  >>> beta_lactam_antibiotics = antibiotics[antibiotics['Molecule'] >= beta_lactam] 
 39  >>> print(beta_lactam_antibiotics[['Name','Smiles']]) 
 40              Name                                             Smiles 
 41  0  Penicilline G    CC1(C(N2C(S1)C(C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)C 
 42  2  Ampicilline  CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)N)C(=O... 
 43   
 44   
 45  It is also possible to load an SDF file can be load into a dataframe. 
 46   
 47  >>> sdfFile = os.path.join(RDConfig.RDDataDir,'NCI/first_200.props.sdf') 
 48  >>> frame = PandasTools.LoadSDF(sdfFile,smilesName='SMILES',molColName='Molecule', 
 49  ...            includeFingerprints=True) 
 50  >>> frame.info # doctest: +SKIP 
 51  <bound method DataFrame.info of <class 'pandas.core.frame.DataFrame'> 
 52  Int64Index: 200 entries, 0 to 199 
 53  Data columns: 
 54  AMW                       200  non-null values 
 55  CLOGP                     200  non-null values 
 56  CP                        200  non-null values 
 57  CR                        200  non-null values 
 58  DAYLIGHT.FPG              200  non-null values 
 59  DAYLIGHT_CLOGP            200  non-null values 
 60  FP                        200  non-null values 
 61  ID                        200  non-null values 
 62  ISM                       200  non-null values 
 63  LIPINSKI_VIOLATIONS       200  non-null values 
 64  NUM_HACCEPTORS            200  non-null values 
 65  NUM_HDONORS               200  non-null values 
 66  NUM_HETEROATOMS           200  non-null values 
 67  NUM_LIPINSKIHACCEPTORS    200  non-null values 
 68  NUM_LIPINSKIHDONORS       200  non-null values 
 69  NUM_RINGS                 200  non-null values 
 70  NUM_ROTATABLEBONDS        200  non-null values 
 71  P1                        30  non-null values 
 72  SMILES                    200  non-null values 
 73  Molecule                  200  non-null values 
 74  dtypes: object(20)> 
 75   
 76  Conversion to html is quite easy: 
 77  >>> htm = frame.to_html() 
 78  >>> str(htm[:36]) 
 79  '<table border="1" class="dataframe">' 
 80   
 81  In order to support rendering the molecules as images in the HTML export of the dataframe, 
 82  the __str__ method is monkey-patched to return a base64 encoded PNG: 
 83  >>> molX = Chem.MolFromSmiles('Fc1cNc2ccccc12') 
 84  >>> print(molX) # doctest: +SKIP 
 85  <img src="data:image/png;base64,..." alt="Mol"/> 
 86  This can be reverted using the ChangeMoleculeRendering method 
 87  >>> ChangeMoleculeRendering(renderer='String') 
 88  >>> print(molX) # doctest: +SKIP 
 89  <rdkit.Chem.rdchem.Mol object at 0x10d179440> 
 90  >>> ChangeMoleculeRendering(renderer='PNG') 
 91  >>> print(molX) # doctest: +SKIP 
 92  <img src="data:image/png;base64,..." alt="Mol"/> 
 93   
 94  ''' 
 95  from __future__ import print_function 
 96   
 97  from base64 import b64encode 
 98  import sys 
 99  import types 
100   
101  import numpy as np 
102  from rdkit import Chem 
103  from rdkit import DataStructs 
104  from rdkit.Chem import AllChem 
105  from rdkit.Chem import Draw 
106  from rdkit.Chem.Draw import rdMolDraw2D 
107  from rdkit.Chem import SDWriter 
108  from rdkit.Chem import rdchem 
109  from rdkit.Chem.Scaffolds import MurckoScaffold 
110  from rdkit.six import BytesIO, string_types, PY3 
111   
112  try: 
113    import pandas as pd 
114 115 - def _getPandasVersion():
116 """ Get the pandas version as a tuple """ 117 import re 118 try: 119 v = pd.__version__ 120 except AttributeError: 121 v = pd.version.version 122 v = re.split(r'[^0-9,.]', v)[0].split('.') 123 return tuple(int(vi) for vi in v)
124 125 if _getPandasVersion() < (0, 10): 126 print("Pandas version {0} not compatible with tests".format(_getPandasVersion()), 127 file=sys.stderr) 128 pd = None 129 else: 130 if 'display.width' in pd.core.config._registered_options: 131 pd.set_option('display.width', 1000000000) 132 if 'display.max_rows' in pd.core.config._registered_options: 133 pd.set_option('display.max_rows', 1000000000) 134 elif 'display.height' in pd.core.config._registered_options: 135 pd.set_option('display.height', 1000000000) 136 if 'display.max_colwidth' in pd.core.config._registered_options: 137 pd.set_option('display.max_colwidth', 1000000000) 138 # saves the default pandas rendering to allow restoration 139 defPandasRendering = pd.core.frame.DataFrame.to_html 140 except ImportError: 141 import traceback 142 traceback.print_exc() 143 pd = None 144 145 except Exception as e: 146 import traceback 147 traceback.print_exc() 148 pd = None 149 150 if pd: 151 try: 152 from pandas.io.formats import format as fmt 153 except: 154 try: 155 from pandas.formats import format as fmt 156 except ImportError: 157 from pandas.core import format as fmt # older versions 158 else: 159 fmt = 'Pandas not available' 160 161 highlightSubstructures = True 162 molRepresentation = 'png' # supports also SVG 163 molSize = (200, 200)
164 165 166 -def patchPandasHTMLrepr(self, **kwargs):
167 ''' 168 Patched default escaping of HTML control characters to allow molecule image rendering dataframes 169 ''' 170 formatter = fmt.DataFrameFormatter( 171 self, buf=None, columns=None, col_space=None, colSpace=None, header=True, index=True, 172 na_rep='NaN', formatters=None, float_format=None, sparsify=None, index_names=True, justify=None, 173 force_unicode=None, bold_rows=True, classes=None, escape=False) 174 formatter.to_html() 175 html = formatter.buf.getvalue() 176 return html
177
178 179 -def patchPandasHeadMethod(self, n=5):
180 '''Ensure inheritance of patched to_html in "head" subframe 181 ''' 182 df = self[:n] 183 df.to_html = types.MethodType(patchPandasHTMLrepr, df) 184 df.head = types.MethodType(patchPandasHeadMethod, df) 185 return df
186
187 188 -def _get_image(x):
189 """displayhook function for PNG data""" 190 s = b64encode(x).decode('ascii') 191 pd.set_option('display.max_columns', len(s) + 1000) 192 pd.set_option('display.max_rows', len(s) + 1000) 193 if len(s) + 100 > pd.get_option("display.max_colwidth"): 194 pd.set_option("display.max_colwidth", len(s) + 1000) 195 return s
196
197 198 -def _get_svg_image(mol, size=(200, 200), highlightAtoms=[]):
199 """ mol rendered as SVG """ 200 from IPython.display import SVG 201 from rdkit.Chem import rdDepictor 202 from rdkit.Chem.Draw import rdMolDraw2D 203 try: 204 # If no coordinates, calculate 2D 205 mol.GetConformer(-1) 206 except ValueError: 207 rdDepictor.Compute2DCoords(mol) 208 drawer = rdMolDraw2D.MolDraw2DSVG(*size) 209 drawer.DrawMolecule(mol, highlightAtoms=highlightAtoms) 210 drawer.FinishDrawing() 211 svg = drawer.GetDrawingText().replace('svg:', '') 212 return SVG(svg).data # IPython's SVG clears the svg text
213 214 215 try: 216 from rdkit.Avalon import pyAvalonTools as pyAvalonTools 217 # Calculate the Avalon fingerprint 218 _fingerprinter = lambda x, y: pyAvalonTools.GetAvalonFP(x, isQuery=y, bitFlags=pyAvalonTools.avalonSSSBits) 219 except ImportError: 220 # Calculate fingerprint using SMARTS patterns 221 _fingerprinter = lambda x, y: Chem.PatternFingerprint(x, fpSize=2048)
222 223 224 -def _molge(x, y):
225 """Allows for substructure check using the >= operator (X has substructure Y -> X >= Y) by 226 monkey-patching the __ge__ function 227 This has the effect that the pandas/numpy rowfilter can be used for substructure filtering 228 (filtered = dframe[dframe['RDKitColumn'] >= SubstructureMolecule]) 229 """ 230 if x is None or y is None: 231 return False 232 if hasattr(x, '_substructfp'): 233 if not hasattr(y, '_substructfp'): 234 y._substructfp = _fingerprinter(y, True) 235 if not DataStructs.AllProbeBitsMatch(y._substructfp, x._substructfp): 236 return False 237 match = x.GetSubstructMatch(y) 238 x.__sssAtoms = [] 239 if match: 240 if highlightSubstructures: 241 x.__sssAtoms = list(match) 242 return True 243 else: 244 return False
245
246 247 -def PrintAsBase64PNGString(x, renderer=None):
248 '''returns the molecules as base64 encoded PNG image 249 ''' 250 if highlightSubstructures and hasattr(x, '__sssAtoms'): 251 highlightAtoms = x.__sssAtoms 252 else: 253 highlightAtoms = [] 254 if molRepresentation.lower() == 'svg': 255 from IPython.display import SVG 256 svg = Draw._moltoSVG(x, molSize, highlightAtoms, "", True) 257 return SVG(svg).data 258 else: 259 data = Draw._moltoimg(x,molSize,highlightAtoms,"",returnPNG=True, kekulize=True) 260 return '<img src="data:image/png;base64,%s" alt="Mol"/>' % _get_image(data)
261
262 263 -def PrintDefaultMolRep(x):
264 return str(x.__repr__())
265
266 267 -def _MolPlusFingerprint(m):
268 '''Precomputes fingerprints and stores results in molecule objects to accelerate 269 substructure matching 270 ''' 271 if m is not None: 272 m._substructfp = _fingerprinter(m, False) 273 return m
274
275 276 -def RenderImagesInAllDataFrames(images=True):
277 '''Changes the default dataframe rendering to not escape HTML characters, thus allowing 278 rendered images in all dataframes. 279 IMPORTANT: THIS IS A GLOBAL CHANGE THAT WILL AFFECT TO COMPLETE PYTHON SESSION. If you want 280 to change the rendering only for a single dataframe use the "ChangeMoleculeRendering" method 281 instead. 282 ''' 283 if images: 284 pd.core.frame.DataFrame.to_html = patchPandasHTMLrepr 285 else: 286 pd.core.frame.DataFrame.to_html = defPandasRendering
287
288 289 -def AddMoleculeColumnToFrame(frame, smilesCol='Smiles', molCol='ROMol', includeFingerprints=False):
290 '''Converts the molecules contains in "smilesCol" to RDKit molecules and appends them to the 291 dataframe "frame" using the specified column name. 292 If desired, a fingerprint can be computed and stored with the molecule objects to accelerate 293 substructure matching 294 ''' 295 if not includeFingerprints: 296 frame[molCol] = frame[smilesCol].map(Chem.MolFromSmiles) 297 else: 298 frame[molCol] = frame[smilesCol].map( 299 lambda smiles: _MolPlusFingerprint(Chem.MolFromSmiles(smiles))) 300 RenderImagesInAllDataFrames(images=True)
301
302 303 -def ChangeMoleculeRendering(frame=None, renderer='PNG'):
304 '''Allows to change the rendering of the molecules between base64 PNG images and string 305 representations. 306 This serves two purposes: First it allows to avoid the generation of images if this is 307 not desired and, secondly, it allows to enable image rendering for newly created dataframe 308 that already contains molecules, without having to rerun the time-consuming 309 AddMoleculeColumnToFrame. Note: this behaviour is, because some pandas methods, e.g. head() 310 returns a new dataframe instance that uses the default pandas rendering (thus not drawing 311 images for molecules) instead of the monkey-patched one. 312 ''' 313 if renderer == 'String': 314 Chem.Mol.__str__ = PrintDefaultMolRep 315 else: 316 Chem.Mol.__str__ = PrintAsBase64PNGString 317 if frame is not None: 318 frame.to_html = types.MethodType(patchPandasHTMLrepr, frame)
319
320 321 -def LoadSDF(filename, idName='ID', molColName='ROMol', includeFingerprints=False, 322 isomericSmiles=False, smilesName=None, embedProps=False):
323 '''Read file in SDF format and return as Pandas data frame. 324 If embedProps=True all properties also get embedded in Mol objects in the molecule column. 325 If molColName=None molecules would not be present in resulting DataFrame (only properties 326 would be read). 327 ''' 328 if isinstance(filename, string_types): 329 if filename.lower()[-3:] == ".gz": 330 import gzip 331 f = gzip.open(filename, "rb") 332 else: 333 f = open(filename, 'rb') 334 close = f.close 335 else: 336 f = filename 337 close = None # don't close an open file that was passed in 338 records = [] 339 indices = [] 340 for i, mol in enumerate(Chem.ForwardSDMolSupplier(f, sanitize=(molColName is not None))): 341 if mol is None: 342 continue 343 row = dict((k, mol.GetProp(k)) for k in mol.GetPropNames()) 344 if molColName is not None and not embedProps: 345 for prop in mol.GetPropNames(): 346 mol.ClearProp(prop) 347 if mol.HasProp('_Name'): 348 row[idName] = mol.GetProp('_Name') 349 if smilesName is not None: 350 row[smilesName] = Chem.MolToSmiles(mol, isomericSmiles=isomericSmiles) 351 if molColName is not None and not includeFingerprints: 352 row[molColName] = mol 353 elif molColName is not None: 354 row[molColName] = _MolPlusFingerprint(mol) 355 records.append(row) 356 indices.append(i) 357 358 if close is not None: 359 close() 360 RenderImagesInAllDataFrames(images=True) 361 return pd.DataFrame(records, index=indices)
362
363 364 -def WriteSDF(df, out, molColName='ROMol', idName=None, properties=None, allNumeric=False):
365 '''Write an SD file for the molecules in the dataframe. Dataframe columns can be exported as 366 SDF tags if specified in the "properties" list. "properties=list(df.columns)" would export 367 all columns. 368 The "allNumeric" flag allows to automatically include all numeric columns in the output. 369 User has to make sure that correct data type is assigned to column. 370 "idName" can be used to select a column to serve as molecule title. It can be set to 371 "RowID" to use the dataframe row key as title. 372 ''' 373 close = None 374 if isinstance(out, string_types): 375 if out.lower()[-3:] == ".gz": 376 import gzip 377 if PY3: 378 out = gzip.open(out, "wt") 379 else: 380 out = gzip.open(out, "wb") 381 close = out.close 382 383 writer = SDWriter(out) 384 if properties is None: 385 properties = [] 386 else: 387 properties = list(properties) 388 if allNumeric: 389 properties.extend( 390 [dt for dt in df.dtypes.keys() 391 if (np.issubdtype(df.dtypes[dt], float) or np.issubdtype(df.dtypes[dt], int))]) 392 393 if molColName in properties: 394 properties.remove(molColName) 395 if idName in properties: 396 properties.remove(idName) 397 writer.SetProps(properties) 398 for row in df.iterrows(): 399 # make a local copy I can modify 400 mol = Chem.Mol(row[1][molColName]) 401 402 if idName is not None: 403 if idName == 'RowID': 404 mol.SetProp('_Name', str(row[0])) 405 else: 406 mol.SetProp('_Name', str(row[1][idName])) 407 for p in properties: 408 cell_value = row[1][p] 409 # Make sure float does not get formatted in E notation 410 if np.issubdtype(type(cell_value), float): 411 s = '{:f}'.format(cell_value).rstrip("0") # "f" will show 7.0 as 7.00000 412 if s[-1] == ".": 413 s += "0" # put the "0" back on if it's something like "7." 414 mol.SetProp(p, s) 415 else: 416 mol.SetProp(p, str(cell_value)) 417 writer.write(mol) 418 writer.close() 419 if close is not None: 420 close()
421 422 423 _saltRemover = None
424 425 426 -def RemoveSaltsFromFrame(frame, molCol='ROMol'):
427 ''' 428 Removes salts from mols in pandas DataFrame's ROMol column 429 ''' 430 global _saltRemover 431 if _saltRemover is None: 432 from rdkit.Chem import SaltRemover 433 _saltRemover = SaltRemover.SaltRemover() 434 frame[molCol] = frame.apply(lambda x: _saltRemover.StripMol(x[molCol]), axis=1)
435
436 437 -def SaveSMILESFromFrame(frame, outFile, molCol='ROMol', NamesCol='', isomericSmiles=False):
438 ''' 439 Saves smi file. SMILES are generated from column with RDKit molecules. Column 440 with names is optional. 441 ''' 442 w = Chem.SmilesWriter(outFile, isomericSmiles=isomericSmiles) 443 if NamesCol != '': 444 for m, n in zip(frame[molCol], (str(c) for c in frame[NamesCol])): 445 m.SetProp('_Name', n) 446 w.write(m) 447 w.close() 448 else: 449 for m in frame[molCol]: 450 w.write(m) 451 w.close()
452
453 454 -def SaveXlsxFromFrame(frame, outFile, molCol='ROMol', size=(300, 300)):
455 """ 456 Saves pandas DataFrame as a xlsx file with embedded images. 457 It maps numpy data types to excel cell types: 458 int, float -> number 459 datetime -> datetime 460 object -> string (limited to 32k character - xlsx limitations) 461 462 Cells with compound images are a bit larger than images due to excel. 463 Column width weirdness explained (from xlsxwriter docs): 464 The width corresponds to the column width value that is specified in Excel. 465 It is approximately equal to the length of a string in the default font of Calibri 11. 466 Unfortunately, there is no way to specify "AutoFit" for a column in the Excel file format. 467 This feature is only available at runtime from within Excel. 468 """ 469 470 import xlsxwriter # don't want to make this a RDKit dependency 471 472 cols = list(frame.columns) 473 cols.remove(molCol) 474 dataTypes = dict(frame.dtypes) 475 476 workbook = xlsxwriter.Workbook(outFile) # New workbook 477 worksheet = workbook.add_worksheet() # New work sheet 478 worksheet.set_column('A:A', size[0] / 6.) # column width 479 480 # Write first row with column names 481 c2 = 1 482 for x in cols: 483 worksheet.write_string(0, c2, x) 484 c2 += 1 485 486 c = 1 487 for _, row in frame.iterrows(): 488 image_data = BytesIO() 489 img = Draw.MolToImage(row[molCol], size=size) 490 img.save(image_data, format='PNG') 491 492 worksheet.set_row(c, height=size[1]) # looks like height is not in px? 493 worksheet.insert_image(c, 0, "f", {'image_data': image_data}) 494 495 c2 = 1 496 for x in cols: 497 if str(dataTypes[x]) == "object": 498 worksheet.write_string(c, c2, str(row[x])[:32000]) # string length is limited in xlsx 499 elif ('float' in str(dataTypes[x])) or ('int' in str(dataTypes[x])): 500 if (row[x] != np.nan) or (row[x] != np.inf): 501 worksheet.write_number(c, c2, row[x]) 502 elif 'datetime' in str(dataTypes[x]): 503 worksheet.write_datetime(c, c2, row[x]) 504 c2 += 1 505 c += 1 506 507 workbook.close() 508 image_data.close()
509
510 511 -def FrameToGridImage(frame, column='ROMol', legendsCol=None, **kwargs):
512 ''' 513 Draw grid image of mols in pandas DataFrame. 514 ''' 515 if legendsCol: 516 if legendsCol == frame.index.name: 517 kwargs['legends'] = [str(c) for c in frame.index] 518 else: 519 kwargs['legends'] = [str(c) for c in frame[legendsCol]] 520 return Draw.MolsToGridImage(frame[column], **kwargs)
521
522 523 -def AddMurckoToFrame(frame, molCol='ROMol', MurckoCol='Murcko_SMILES', Generic=False):
524 ''' 525 Adds column with SMILES of Murcko scaffolds to pandas DataFrame. 526 527 Generic set to true results in SMILES of generic framework. 528 ''' 529 if Generic: 530 func = lambda x: Chem.MolToSmiles(MurckoScaffold.MakeScaffoldGeneric( 531 MurckoScaffold.GetScaffoldForMol(x[molCol]))) 532 else: 533 func = lambda x: Chem.MolToSmiles(MurckoScaffold.GetScaffoldForMol(x[molCol])) 534 frame[MurckoCol] = frame.apply(func, axis=1)
535
536 537 -def AlignMol(mol, scaffold):
538 """ 539 Aligns mol (RDKit mol object) to scaffold (SMILES string) 540 """ 541 scaffold = Chem.MolFromSmiles(scaffold) 542 AllChem.Compute2DCoords(scaffold) 543 AllChem.GenerateDepictionMatching2DStructure(mol, scaffold) 544 return mol
545
546 547 -def AlignToScaffold(frame, molCol='ROMol', scaffoldCol='Murcko_SMILES'):
548 ''' 549 Aligns molecules in molCol to scaffolds in scaffoldCol 550 ''' 551 frame[molCol] = frame.apply(lambda x: AlignMol(x[molCol], x[scaffoldCol]), axis=1)
552
553 554 # ========================================================================================== 555 # Monkey patching RDkit functionality 556 -def InstallPandasTools():
557 """ Monkey patch a few RDkit methods of Chem.Mol """ 558 global _originalSettings 559 if len(_originalSettings) == 0: 560 _originalSettings['Chem.Mol.__ge__'] = Chem.Mol.__ge__ 561 _originalSettings['Chem.Mol.__str__'] = Chem.Mol.__str__ 562 rdchem.Mol.__ge__ = _molge 563 rdchem.Mol.__str__ = PrintAsBase64PNGString
564
565 566 -def UninstallPandasTools():
567 """ Monkey patch a few RDkit methods of Chem.Mol """ 568 global _originalSettings 569 Chem.Mol.__ge__ = _originalSettings['Chem.Mol.__ge__'] 570 Chem.Mol.__str__ = _originalSettings['Chem.Mol.__str__']
571 572 573 _originalSettings = {} 574 InstallPandasTools()
575 576 577 # ------------------------------------ 578 # 579 # doctest boilerplate 580 # 581 -def _runDoctests(verbose=None): # pragma: nocover
582 import doctest 583 failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS + doctest.NORMALIZE_WHITESPACE, 584 verbose=verbose) 585 if(failed): 586 sys.exit(failed) 587 588 if __name__ == '__main__': # pragma: nocover 589 import unittest 590 try: 591 import xlsxwriter 592 except ImportError: 593 print('not there') 594 xlsxwriter = None
595 - class TestCase(unittest.TestCase):
596 @unittest.skipIf(xlsxwriter is None,'xlsxwriter not installed')
597 - def testGithub1507(self):
598 import os 599 from rdkit import RDConfig 600 sdfFile = os.path.join(RDConfig.RDDataDir,'NCI/first_200.props.sdf') 601 frame = LoadSDF(sdfFile) 602 SaveXlsxFromFrame(frame,'foo.xlsx')
603 604 if pd is None: 605 print("pandas installation not found, skipping tests", file=sys.stderr) 606 elif _getPandasVersion() < (0, 10): 607 print("pandas installation >=0.10 not found, skipping tests", file=sys.stderr) 608 else: 609 _runDoctests(); 610 unittest.main() 611 612 613 614 615 # $Id$ 616 # 617 # Copyright (c) 2013, Novartis Institutes for BioMedical Research Inc. 618 # All rights reserved. 619 # 620 # Redistribution and use in source and binary forms, with or without 621 # modification, are permitted provided that the following conditions are 622 # met: 623 # 624 # * Redistributions of source code must retain the above copyright 625 # notice, this list of conditions and the following disclaimer. 626 # * Redistributions in binary form must reproduce the above 627 # copyright notice, this list of conditions and the following 628 # disclaimer in the documentation and/or other materials provided 629 # with the distribution. 630 # * Neither the name of Novartis Institutes for BioMedical Research Inc. 631 # nor the names of its contributors may be used to endorse or promote 632 # products derived from this software without specific prior written permission. 633 # 634 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 635 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 636 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 637 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 638 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 639 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 640 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 641 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 642 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 643 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 644 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 645 # 646