Package rdkit :: Package Chem :: Package ChemUtils :: Module TemplateExpand
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.ChemUtils.TemplateExpand

  1  # $Id: TemplateExpand.py 1053 2008-07-30 12:03:29Z landrgr1 $ 
  2  # 
  3  #  Created by Greg Landrum August, 2006 
  4  # 
  5  # 
  6  from __future__ import print_function 
  7  from rdkit import RDLogger as logging 
  8  logger = logging.logger() 
  9  logger.setLevel(logging.INFO) 
 10  from rdkit import Chem 
 11  from rdkit.Chem import Crippen 
 12  from rdkit.Chem import AllChem 
 13  from rdkit.Chem.ChemUtils.AlignDepict import AlignDepict 
 14   
 15  import sys 
 16  _version = "0.8.0" 
 17  _greet = "This is TemplateExpand version %s" % _version 
 18   
 19  _usage = """ 
 20  Usage: TemplateExpand [options] template <sidechains> 
 21   
 22   Unless otherwise indicated, the template and sidechains are assumed to be 
 23     Smiles 
 24   
 25   Each sidechain entry should be: 
 26     [-r] SMARTS filename 
 27       The SMARTS pattern is used to recognize the attachment point, 
 28       if the -r argument is not provided, then atoms matching the pattern 
 29       will be removed from the sidechains. 
 30     or 
 31     -n filename 
 32       where the attachment atom is the first atom in each molecule 
 33     The filename provides the list of potential sidechains. 
 34   
 35   options: 
 36     -o filename.sdf:      provides the name of the output file, otherwise 
 37                           stdout is used 
 38   
 39     --sdf :               expect the sidechains to be in SD files 
 40   
 41     --moltemplate:        the template(s) are in a mol/SD file, new depiction(s) 
 42                           will not be generated unless the --redraw argument is also 
 43                           provided 
 44   
 45     --smilesFileTemplate: extract the template(s) from a SMILES file instead of  
 46                           expecting SMILES on the command line. 
 47   
 48     --redraw:             generate a new depiction for the molecular template(s) 
 49   
 50     --useall: 
 51       or 
 52     --useallmatches:      generate a product for each possible match of the attachment 
 53                           pattern to each sidechain. If this is not provided, the first 
 54                           match (not canonically defined) will be used. 
 55   
 56     --force:              by default, the program prompts the user if the library is  
 57                           going to contain more than 1000 compounds. This argument  
 58                           disables the prompt. 
 59      
 60     --templateSmarts="smarts":  provides a space-delimited list containing the SMARTS  
 61                                 patterns to be used to recognize attachment points in 
 62                                                   the template 
 63                
 64     --autoNames:          when set this toggle causes the resulting compounds to be named 
 65                           based on there sequence id in the file, e.g.  
 66                           "TemplateEnum: Mol_1", "TemplateEnum: Mol_2", etc. 
 67                           otherwise the names of the template and building blocks (from 
 68                           the input files) will be combined to form a name for each 
 69                           product molecule. 
 70   
 71     --3D :                Generate 3d coordinates for the product molecules instead of 2d coordinates, 
 72                           requires the --moltemplate option 
 73   
 74     --tether :            refine the 3d conformations using a tethered minimization 
 75   
 76   
 77  """ 
 78   
 79   
80 -def Usage():
81 print(_usage, file=sys.stderr) 82 sys.exit(-1)
83 84 nDumped = 0 85 86
87 -def _exploder(mol, depth, sidechains, core, chainIndices, autoNames=True, templateName='', 88 resetCounter=True, do3D=False, useTethers=False):
89 global nDumped 90 if resetCounter: 91 nDumped = 0 92 ourChains = sidechains[depth] 93 patt = '[%d*]' % (depth + 1) 94 patt = Chem.MolFromSmiles(patt) 95 for i, (chainIdx, chain) in enumerate(ourChains): 96 tchain = chainIndices[:] 97 tchain.append((i, chainIdx)) 98 rs = Chem.ReplaceSubstructs(mol, patt, chain, replaceAll=True) 99 if rs: 100 r = rs[0] 101 if depth < len(sidechains) - 1: 102 for entry in _exploder(r, depth + 1, sidechains, core, tchain, autoNames=autoNames, 103 templateName=templateName, resetCounter=0, do3D=do3D, 104 useTethers=useTethers): 105 yield entry 106 else: 107 try: 108 Chem.SanitizeMol(r) 109 except ValueError: 110 import traceback 111 traceback.print_exc() 112 continue 113 if not do3D: 114 if r.HasSubstructMatch(core): 115 try: 116 AlignDepict(r, core) 117 except Exception: 118 import traceback 119 traceback.print_exc() 120 print(Chem.MolToSmiles(r), file=sys.stderr) 121 else: 122 print('>>> no match', file=sys.stderr) 123 AllChem.Compute2DCoords(r) 124 else: 125 r = Chem.AddHs(r) 126 AllChem.ConstrainedEmbed(r, core, useTethers) 127 Chem.Kekulize(r) 128 if autoNames: 129 tName = "TemplateEnum: Mol_%d" % (nDumped + 1) 130 else: 131 tName = templateName 132 for bbI, bb in enumerate(tchain): 133 bbMol = sidechains[bbI][bb[0]][1] 134 if bbMol.HasProp('_Name'): 135 bbNm = bbMol.GetProp('_Name') 136 else: 137 bbNm = str(bb[1]) 138 tName += '_' + bbNm 139 140 r.SetProp("_Name", tName) 141 r.SetProp('seq_num', str(nDumped + 1)) 142 r.SetProp('reagent_indices', '_'.join([str(x[1]) for x in tchain])) 143 for bbI, bb in enumerate(tchain): 144 bbMol = sidechains[bbI][bb[0]][1] 145 if bbMol.HasProp('_Name'): 146 bbNm = bbMol.GetProp('_Name') 147 else: 148 bbNm = str(bb[1]) 149 r.SetProp('building_block_%d' % (bbI + 1), bbNm) 150 r.SetIntProp('_idx_building_block_%d' % (bbI + 1), bb[1]) 151 for propN in bbMol.GetPropNames(): 152 r.SetProp('building_block_%d_%s' % (bbI + 1, propN), bbMol.GetProp(propN)) 153 nDumped += 1 154 if not nDumped % 100: 155 logger.info('Done %d molecules' % nDumped) 156 yield r
157 158
159 -def Explode(template, sidechains, outF, autoNames=True, do3D=False, useTethers=False):
160 chainIndices = [] 161 core = Chem.DeleteSubstructs(template, Chem.MolFromSmiles('[*]')) 162 try: 163 templateName = template.GetProp('_Name') 164 except KeyError: 165 templateName = "template" 166 for mol in _exploder(template, 0, sidechains, core, chainIndices, autoNames=autoNames, 167 templateName=templateName, do3D=do3D, useTethers=useTethers): 168 outF.write(Chem.MolToMolBlock(mol)) 169 for pN in mol.GetPropNames(): 170 print('> <%s>\n%s\n' % (pN, mol.GetProp(pN)), file=outF) 171 print('$$$$', file=outF)
172 173
174 -def MoveDummyNeighborsToBeginning(mol, useAll=False):
175 dummyPatt = Chem.MolFromSmiles('[*]') 176 matches = mol.GetSubstructMatches(dummyPatt) 177 res = [] 178 for match in matches: 179 matchIdx = match[0] 180 smi = Chem.MolToSmiles(mol, True, rootedAtAtom=matchIdx) 181 entry = Chem.MolFromSmiles(smi) 182 # entry now has [*] as atom 0 and the neighbor 183 # as atom 1. Cleave the [*]: 184 entry = Chem.DeleteSubstructs(entry, dummyPatt) 185 for propN in mol.GetPropNames(): 186 entry.SetProp(propN, mol.GetProp(propN)) 187 188 # now we have a molecule with the atom to be joined 189 # in position zero; Keep that: 190 res.append(entry) 191 if not useAll: 192 break 193 return res
194 195
196 -def ConstructSidechains(suppl, sma=None, replace=True, useAll=False):
197 if sma: 198 patt = Chem.MolFromSmarts(sma) 199 if patt is None: 200 logger.error('could not construct pattern from smarts: %s' % sma, exc_info=True) 201 return None 202 else: 203 patt = None 204 205 if replace: 206 replacement = Chem.MolFromSmiles('[*]') 207 208 res = [] 209 for idx, mol in enumerate(suppl): 210 if not mol: 211 continue 212 if patt: 213 if not mol.HasSubstructMatch(patt): 214 logger.warning( 215 'The substructure pattern did not match sidechain %d. This may result in errors.' % 216 (idx + 1)) 217 if replace: 218 tmp = list(Chem.ReplaceSubstructs(mol, patt, replacement)) 219 if not useAll: 220 tmp = [tmp[0]] 221 for i, entry in enumerate(tmp): 222 entry = MoveDummyNeighborsToBeginning(entry) 223 if not entry: 224 continue 225 entry = entry[0] 226 227 for propN in mol.GetPropNames(): 228 entry.SetProp(propN, mol.GetProp(propN)) 229 # now we have a molecule with the atom to be joined 230 # in position zero; Keep that: 231 tmp[i] = (idx + 1, entry) 232 else: 233 # no replacement, use the pattern to reorder 234 # atoms only: 235 matches = mol.GetSubstructMatches(patt) 236 if matches: 237 tmp = [] 238 for match in matches: 239 smi = Chem.MolToSmiles(mol, True, rootedAtAtom=match[0]) 240 entry = Chem.MolFromSmiles(smi) 241 for propN in mol.GetPropNames(): 242 entry.SetProp(propN, mol.GetProp(propN)) 243 244 # now we have a molecule with the atom to be joined 245 # in position zero; Keep that: 246 tmp.append((idx + 1, entry)) 247 else: 248 tmp = None 249 else: 250 tmp = [(idx + 1, mol)] 251 if tmp: 252 res.extend(tmp) 253 return res
254 255 256 if __name__ == '__main__': 257 import getopt 258 print(_greet, file=sys.stderr) 259 260 try: 261 args, extras = getopt.getopt(sys.argv[1:], 'o:h', [ 262 'sdf', 263 'moltemplate', 264 'molTemplate', 265 'smilesFileTemplate', 266 'templateSmarts=', 267 'redraw', 268 'force', 269 'useall', 270 'useallmatches', 271 'autoNames', 272 '3D', 273 '3d', 274 'tethers', 275 'tether', 276 ]) 277 except Exception: 278 import traceback 279 traceback.print_exc() 280 Usage() 281 282 if len(extras) < 3: 283 Usage() 284 285 tooLong = 1000 286 sdLigands = False 287 molTemplate = False 288 redrawTemplate = False 289 outF = None 290 forceIt = False 291 useAll = False 292 templateSmarts = [] 293 smilesFileTemplate = False 294 autoNames = False 295 do3D = False 296 useTethers = False 297 for arg, val in args: 298 if arg == '-o': 299 outF = val 300 elif arg == '--sdf': 301 sdLigands = True 302 elif arg in ('--moltemplate', '--molTemplate'): 303 molTemplate = True 304 elif arg == '--smilesFileTemplate': 305 smilesFileTemplate = True 306 elif arg == '--templateSmarts': 307 templateSmarts = val 308 elif arg == '--redraw': 309 redrawTemplate = True 310 elif arg == '--force': 311 forceIt = True 312 elif arg == '--autoNames': 313 autoNames = True 314 elif arg in ('--useall', '--useallmatches'): 315 useAll = True 316 elif arg in ('--3D', '--3d'): 317 do3D = True 318 elif arg in ('--tethers', '--tether'): 319 useTethers = True 320 elif arg == '-h': 321 Usage() 322 sys.exit(0) 323 324 if do3D: 325 if not molTemplate: 326 raise ValueError('the --3D option is only useable in combination with --moltemplate') 327 if redrawTemplate: 328 logger.warning( 329 '--redrawTemplate does not make sense in combination with --molTemplate. removing it') 330 redrawTemplate = False 331 332 if templateSmarts: 333 splitL = templateSmarts.split(' ') 334 templateSmarts = [] 335 for i, sma in enumerate(splitL): 336 patt = Chem.MolFromSmarts(sma) 337 if not patt: 338 raise ValueError('could not convert smarts "%s" to a query' % sma) 339 if i >= 4: 340 i += 1 341 replace = Chem.MolFromSmiles('[%d*]' % (i + 1)) 342 templateSmarts.append((patt, replace)) 343 344 if molTemplate: 345 removeHs = not do3D 346 try: 347 s = Chem.SDMolSupplier(extras[0], removeHs=removeHs) 348 templates = [x for x in s] 349 except Exception: 350 logger.error('Could not construct templates from input file: %s' % extras[0], exc_info=True) 351 sys.exit(1) 352 if redrawTemplate: 353 for template in templates: 354 AllChem.Compute2DCoords(template) 355 else: 356 if not smilesFileTemplate: 357 try: 358 templates = [Chem.MolFromSmiles(extras[0])] 359 except Exception: 360 logger.error('Could not construct template from smiles: %s' % extras[0], exc_info=True) 361 sys.exit(1) 362 else: 363 try: 364 s = Chem.SmilesMolSupplier(extras[0], titleLine=False) 365 templates = [x for x in s] 366 except Exception: 367 logger.error('Could not construct templates from input file: %s' % extras[0], exc_info=True) 368 sys.exit(1) 369 for template in templates: 370 AllChem.Compute2DCoords(template) 371 if templateSmarts: 372 finalTs = [] 373 for i, template in enumerate(templates): 374 for j, (patt, replace) in enumerate(templateSmarts): 375 if not template.HasSubstructMatch(patt): 376 logger.error('template %d did not match sidechain pattern %d, skipping it' % 377 (i + 1, j + 1)) 378 template = None 379 break 380 template = Chem.ReplaceSubstructs(template, patt, replace)[0] 381 if template: 382 Chem.SanitizeMol(template) 383 finalTs.append(template) 384 templates = finalTs 385 386 sidechains = [] 387 pos = 1 388 while pos < len(extras): 389 if extras[pos] == '-r': 390 replaceIt = False 391 pos += 1 392 else: 393 replaceIt = True 394 if extras[pos] == '-n': 395 sma = None 396 else: 397 sma = extras[pos] 398 pos += 1 399 try: 400 dat = extras[pos] 401 except IndexError: 402 logger.error('missing a sidechain filename') 403 sys.exit(-1) 404 pos += 1 405 if sdLigands: 406 try: 407 suppl = Chem.SDMolSupplier(dat) 408 except Exception: 409 logger.error('could not construct supplier from SD file: %s' % dat, exc_info=True) 410 suppl = [] 411 else: 412 tmpF = file(dat, 'r') 413 inL = tmpF.readline() 414 if len(inL.split(' ')) < 2: 415 nmCol = -1 416 else: 417 nmCol = 1 418 try: 419 suppl = Chem.SmilesMolSupplier(dat, nameColumn=nmCol) 420 except Exception: 421 logger.error('could not construct supplier from smiles file: %s' % dat, exc_info=True) 422 suppl = [] 423 suppl = [x for x in suppl] 424 chains = ConstructSidechains(suppl, sma=sma, replace=replaceIt, useAll=useAll) 425 if chains: 426 sidechains.append(chains) 427 count = 1 428 for chain in sidechains: 429 count *= len(chain) 430 count *= len(templates) 431 if not sidechains or not count: 432 print("No molecules to be generated.", file=sys.stderr) 433 sys.exit(0) 434 435 if not forceIt and count > tooLong: 436 print("This will generate %d molecules." % count, file=sys.stderr) 437 print("Continue anyway? [no] ", file=sys.stderr, end='') 438 sys.stderr.flush() 439 ans = sys.stdin.readline().strip() 440 if ans not in ('y', 'yes', 'Y', 'YES'): 441 sys.exit(0) 442 443 if outF and outF != "-": 444 try: 445 outF = file(outF, 'w+') 446 except IOError: 447 logger.error('could not open file %s for writing' % (outF), exc_info=True) 448 else: 449 outF = sys.stdout 450 451 for template in templates: 452 Explode(template, sidechains, outF, autoNames=autoNames, do3D=do3D, useTethers=useTethers) 453