Package rdkit :: Package Chem :: Module BRICS
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.BRICS

  1  # $Id$ 
  2  # 
  3  #  Copyright (c) 2009, Novartis Institutes for BioMedical Research Inc. 
  4  #  All rights reserved. 
  5  #  
  6  # Redistribution and use in source and binary forms, with or without 
  7  # modification, are permitted provided that the following conditions are 
  8  # met:  
  9  # 
 10  #     * Redistributions of source code must retain the above copyright  
 11  #       notice, this list of conditions and the following disclaimer. 
 12  #     * Redistributions in binary form must reproduce the above 
 13  #       copyright notice, this list of conditions and the following  
 14  #       disclaimer in the documentation and/or other materials provided  
 15  #       with the distribution. 
 16  #     * Neither the name of Novartis Institutes for BioMedical Research Inc.  
 17  #       nor the names of its contributors may be used to endorse or promote  
 18  #       products derived from this software without specific prior written permission. 
 19  # 
 20  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 21  # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 22  # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
 23  # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
 24  # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 25  # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 26  # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 27  # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 28  # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 29  # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 30  # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
 31  # 
 32  # Created by Greg Landrum, Nov 2008 
 33  """ Implementation of the BRICS algorithm from Degen et al. ChemMedChem *3* 1503-7 (2008) 
 34   
 35  """ 
 36  from __future__ import print_function 
 37  import sys, re, random 
 38  from rdkit import Chem 
 39  from rdkit.Chem import rdChemReactions as Reactions 
 40  from rdkit.six import iteritems, iterkeys, next 
 41  from rdkit.six.moves import range 
 42   
 43  # These are the definitions that will be applied to fragment molecules: 
 44  environs = { 
 45    'L1': '[C;D3]([#0,#6,#7,#8])(=O)', 
 46    # 
 47    # After some discussion, the L2 definitions ("N.pl3" in the original 
 48    # paper) have been removed and incorporated into a (almost) general 
 49    # purpose amine definition in L5 ("N.sp3" in the paper). 
 50    # 
 51    # The problem is one of consistency. 
 52    #    Based on the original definitions you should get the following 
 53    #    fragmentations: 
 54    #      C1CCCCC1NC(=O)C -> C1CCCCC1N[2*].[1*]C(=O)C 
 55    #      c1ccccc1NC(=O)C -> c1ccccc1[16*].[2*]N[2*].[1*]C(=O)C 
 56    #    This difference just didn't make sense to us. By switching to 
 57    #    the unified definition we end up with: 
 58    #      C1CCCCC1NC(=O)C -> C1CCCCC1[15*].[5*]N[5*].[1*]C(=O)C 
 59    #      c1ccccc1NC(=O)C -> c1ccccc1[16*].[5*]N[5*].[1*]C(=O)C 
 60    # 
 61    #'L2':'[N;!R;!D1;!$(N=*)]-;!@[#0,#6]', 
 62    # this one turned out to be too tricky to define above, so we set it off 
 63    # in its own definition: 
 64    #'L2a':'[N;D3;R;$(N(@[C;!$(C=*)])@[C;!$(C=*)])]', 
 65    'L3': '[O;D2]-;!@[#0,#6,#1]', 
 66    'L4': '[C;!D1;!$(C=*)]-;!@[#6]', 
 67    #'L5':'[N;!D1;!$(N*!-*);!$(N=*);!$(N-[!C;!#0])]-[#0,C]', 
 68    'L5': '[N;!D1;!$(N=*);!$(N-[!#6;!#16;!#0;!#1]);!$([N;R]@[C;R]=O)]', 
 69    'L6': '[C;D3;!R](=O)-;!@[#0,#6,#7,#8]', 
 70    'L7a': '[C;D2,D3]-[#6]', 
 71    'L7b': '[C;D2,D3]-[#6]', 
 72    '#L8': '[C;!R;!D1]-;!@[#6]', 
 73    'L8': '[C;!R;!D1;!$(C!-*)]', 
 74    'L9': '[n;+0;$(n(:[c,n,o,s]):[c,n,o,s])]', 
 75    'L10': '[N;R;$(N(@C(=O))@[C,N,O,S])]', 
 76    'L11': '[S;D2](-;!@[#0,#6])', 
 77    'L12': '[S;D4]([#6,#0])(=O)(=O)', 
 78    'L13': '[C;$(C(-;@[C,N,O,S])-;@[N,O,S])]', 
 79    'L14': '[c;$(c(:[c,n,o,s]):[n,o,s])]', 
 80    'L14b': '[c;$(c(:[c,n,o,s]):[n,o,s])]', 
 81    'L15': '[C;$(C(-;@C)-;@C)]', 
 82    'L16': '[c;$(c(:c):c)]', 
 83    'L16b': '[c;$(c(:c):c)]', 
 84  } 
 85  reactionDefs = ( 
 86    # L1 
 87    [ 
 88      ('1', '3', '-'), 
 89      ('1', '5', '-'), 
 90      ('1', '10', '-'), 
 91    ], 
 92   
 93    # L3  
 94    [ 
 95      ('3', '4', '-'), 
 96      ('3', '13', '-'), 
 97      ('3', '14', '-'), 
 98      ('3', '15', '-'), 
 99      ('3', '16', '-'), 
100    ], 
101   
102    # L4 
103    [ 
104      ('4', '5', '-'), 
105      ('4', '11', '-'), 
106    ], 
107   
108    # L5 
109    [ 
110      ('5', '12', '-'), 
111      ('5', '14', '-'), 
112      ('5', '16', '-'), 
113      ('5', '13', '-'), 
114      ('5', '15', '-'), 
115    ], 
116   
117    # L6 
118    [ 
119      ('6', '13', '-'), 
120      ('6', '14', '-'), 
121      ('6', '15', '-'), 
122      ('6', '16', '-'), 
123    ], 
124   
125    # L7 
126    [ 
127      ('7a', '7b', '='), 
128    ], 
129   
130    # L8 
131    [ 
132      ('8', '9', '-'), 
133      ('8', '10', '-'), 
134      ('8', '13', '-'), 
135      ('8', '14', '-'), 
136      ('8', '15', '-'), 
137      ('8', '16', '-'), 
138    ], 
139   
140    # L9 
141    [ 
142      ('9', '13', '-'),  # not in original paper 
143      ('9', '14', '-'),  # not in original paper 
144      ('9', '15', '-'), 
145      ('9', '16', '-'), 
146    ], 
147   
148    # L10 
149    [ 
150      ('10', '13', '-'), 
151      ('10', '14', '-'), 
152      ('10', '15', '-'), 
153      ('10', '16', '-'), 
154    ], 
155   
156    # L11 
157    [ 
158      ('11', '13', '-'), 
159      ('11', '14', '-'), 
160      ('11', '15', '-'), 
161      ('11', '16', '-'), 
162    ], 
163   
164    # L12 
165    # none left 
166   
167    # L13 
168    [ 
169      ('13', '14', '-'), 
170      ('13', '15', '-'), 
171      ('13', '16', '-'), 
172    ], 
173   
174    # L14 
175    [ 
176      ('14', '14', '-'),  # not in original paper 
177      ('14', '15', '-'), 
178      ('14', '16', '-'), 
179    ], 
180   
181    # L15 
182    [ 
183      ('15', '16', '-'), 
184    ], 
185   
186    # L16 
187    [ 
188      ('16', '16', '-'),  # not in original paper 
189    ], ) 
190  import copy 
191  smartsGps = copy.deepcopy(reactionDefs) 
192  for gp in smartsGps: 
193    for j, defn in enumerate(gp): 
194      g1, g2, bnd = defn 
195      r1 = environs['L' + g1] 
196      r2 = environs['L' + g2] 
197      g1 = re.sub('[a-z,A-Z]', '', g1) 
198      g2 = re.sub('[a-z,A-Z]', '', g2) 
199      sma = '[$(%s):1]%s;!@[$(%s):2]>>[%s*]-[*:1].[%s*]-[*:2]' % (r1, bnd, r2, g1, g2) 
200      gp[j] = sma 
201   
202  for gp in smartsGps: 
203    for defn in gp: 
204      try: 
205        t = Reactions.ReactionFromSmarts(defn) 
206        t.Initialize() 
207      except Exception: 
208        print(defn) 
209        raise 
210   
211  environMatchers = {} 
212  for env, sma in iteritems(environs): 
213    environMatchers[env] = Chem.MolFromSmarts(sma) 
214   
215  bondMatchers = [] 
216  for i, compats in enumerate(reactionDefs): 
217    tmp = [] 
218    for i1, i2, bType in compats: 
219      e1 = environs['L%s' % i1] 
220      e2 = environs['L%s' % i2] 
221      patt = '[$(%s)]%s;!@[$(%s)]' % (e1, bType, e2) 
222      patt = Chem.MolFromSmarts(patt) 
223      tmp.append((i1, i2, bType, patt)) 
224    bondMatchers.append(tmp) 
225   
226  reactions = tuple([[Reactions.ReactionFromSmarts(y) for y in x] for x in smartsGps]) 
227  reverseReactions = [] 
228  for i, rxnSet in enumerate(smartsGps): 
229    for j, sma in enumerate(rxnSet): 
230      rs, ps = sma.split('>>') 
231      sma = '%s>>%s' % (ps, rs) 
232      rxn = Reactions.ReactionFromSmarts(sma) 
233      labels = re.findall(r'\[([0-9]+?)\*\]', ps) 
234      rxn._matchers = [Chem.MolFromSmiles('[%s*]' % x) for x in labels] 
235      reverseReactions.append(rxn) 
236   
237   
238 -def FindBRICSBonds(mol, randomizeOrder=False, silent=True):
239 """ returns the bonds in a molecule that BRICS would cleave 240 241 >>> from rdkit import Chem 242 >>> m = Chem.MolFromSmiles('CCCOCC') 243 >>> res = list(FindBRICSBonds(m)) 244 >>> res 245 [((3, 2), ('3', '4')), ((3, 4), ('3', '4'))] 246 247 a more complicated case: 248 >>> m = Chem.MolFromSmiles('CCCOCCC(=O)c1ccccc1') 249 >>> res = list(FindBRICSBonds(m)) 250 >>> res 251 [((3, 2), ('3', '4')), ((3, 4), ('3', '4')), ((6, 8), ('6', '16'))] 252 253 we can also randomize the order of the results: 254 >>> random.seed(23) 255 >>> res = list(FindBRICSBonds(m,randomizeOrder=True)) 256 >>> sorted(res) 257 [((3, 2), ('3', '4')), ((3, 4), ('3', '4')), ((6, 8), ('6', '16'))] 258 259 Note that this is a generator function : 260 >>> res = FindBRICSBonds(m) 261 >>> res 262 <generator object ...> 263 >>> next(res) 264 ((3, 2), ('3', '4')) 265 266 >>> m = Chem.MolFromSmiles('CC=CC') 267 >>> res = list(FindBRICSBonds(m)) 268 >>> sorted(res) 269 [((1, 2), ('7', '7'))] 270 271 make sure we don't match ring bonds: 272 >>> m = Chem.MolFromSmiles('O=C1NCCC1') 273 >>> list(FindBRICSBonds(m)) 274 [] 275 276 another nice one, make sure environment 8 doesn't match something connected 277 to a ring atom: 278 >>> m = Chem.MolFromSmiles('CC1(C)CCCCC1') 279 >>> list(FindBRICSBonds(m)) 280 [] 281 282 """ 283 letter = re.compile('[a-z,A-Z]') 284 indices = list(range(len(bondMatchers))) 285 bondsDone = set() 286 if randomizeOrder: 287 random.shuffle(indices, random=random.random) 288 289 envMatches = {} 290 for env, patt in iteritems(environMatchers): 291 envMatches[env] = mol.HasSubstructMatch(patt) 292 for gpIdx in indices: 293 if randomizeOrder: 294 compats = bondMatchers[gpIdx][:] 295 random.shuffle(compats, random=random.random) 296 else: 297 compats = bondMatchers[gpIdx] 298 for i1, i2, bType, patt in compats: 299 if not envMatches['L' + i1] or not envMatches['L' + i2]: 300 continue 301 matches = mol.GetSubstructMatches(patt) 302 i1 = letter.sub('', i1) 303 i2 = letter.sub('', i2) 304 for match in matches: 305 if match not in bondsDone and (match[1], match[0]) not in bondsDone: 306 bondsDone.add(match) 307 yield (((match[0], match[1]), (i1, i2)))
308 309
310 -def BreakBRICSBonds(mol, bonds=None, sanitize=True, silent=True):
311 """ breaks the BRICS bonds in a molecule and returns the results 312 313 >>> from rdkit import Chem 314 >>> m = Chem.MolFromSmiles('CCCOCC') 315 >>> m2=BreakBRICSBonds(m) 316 >>> Chem.MolToSmiles(m2,True) 317 '[3*]O[3*].[4*]CC.[4*]CCC' 318 319 a more complicated case: 320 >>> m = Chem.MolFromSmiles('CCCOCCC(=O)c1ccccc1') 321 >>> m2=BreakBRICSBonds(m) 322 >>> Chem.MolToSmiles(m2,True) 323 '[16*]c1ccccc1.[3*]O[3*].[4*]CCC.[4*]CCC([6*])=O' 324 325 326 can also specify a limited set of bonds to work with: 327 >>> m = Chem.MolFromSmiles('CCCOCC') 328 >>> m2 = BreakBRICSBonds(m,[((3, 2), ('3', '4'))]) 329 >>> Chem.MolToSmiles(m2,True) 330 '[3*]OCC.[4*]CCC' 331 332 this can be used as an alternate approach for doing a BRICS decomposition by 333 following BreakBRICSBonds with a call to Chem.GetMolFrags: 334 >>> m = Chem.MolFromSmiles('CCCOCC') 335 >>> m2=BreakBRICSBonds(m) 336 >>> frags = Chem.GetMolFrags(m2,asMols=True) 337 >>> [Chem.MolToSmiles(x,True) for x in frags] 338 ['[4*]CCC', '[3*]O[3*]', '[4*]CC'] 339 340 """ 341 if not bonds: 342 #bonds = FindBRICSBonds(mol) 343 res = Chem.FragmentOnBRICSBonds(mol) 344 if sanitize: 345 Chem.SanitizeMol(res) 346 return res 347 eMol = Chem.EditableMol(mol) 348 nAts = mol.GetNumAtoms() 349 350 dummyPositions = [] 351 for indices, dummyTypes in bonds: 352 ia, ib = indices 353 obond = mol.GetBondBetweenAtoms(ia, ib) 354 bondType = obond.GetBondType() 355 eMol.RemoveBond(ia, ib) 356 357 da, db = dummyTypes 358 atoma = Chem.Atom(0) 359 atoma.SetIsotope(int(da)) 360 atoma.SetNoImplicit(True) 361 idxa = nAts 362 nAts += 1 363 eMol.AddAtom(atoma) 364 eMol.AddBond(ia, idxa, bondType) 365 366 atomb = Chem.Atom(0) 367 atomb.SetIsotope(int(db)) 368 atomb.SetNoImplicit(True) 369 idxb = nAts 370 nAts += 1 371 eMol.AddAtom(atomb) 372 eMol.AddBond(ib, idxb, bondType) 373 if mol.GetNumConformers(): 374 dummyPositions.append((idxa, ib)) 375 dummyPositions.append((idxb, ia)) 376 res = eMol.GetMol() 377 if sanitize: 378 Chem.SanitizeMol(res) 379 if mol.GetNumConformers(): 380 for conf in mol.GetConformers(): 381 resConf = res.GetConformer(conf.GetId()) 382 for ia, pa in dummyPositions: 383 resConf.SetAtomPosition(ia, conf.GetAtomPosition(pa)) 384 return res
385 386
387 -def BRICSDecompose(mol, allNodes=None, minFragmentSize=1, onlyUseReactions=None, silent=True, 388 keepNonLeafNodes=False, singlePass=False, returnMols=False):
389 """ returns the BRICS decomposition for a molecule 390 391 >>> from rdkit import Chem 392 >>> m = Chem.MolFromSmiles('CCCOCc1cc(c2ncccc2)ccc1') 393 >>> res = list(BRICSDecompose(m)) 394 >>> sorted(res) 395 ['[14*]c1ccccn1', '[16*]c1cccc([16*])c1', '[3*]O[3*]', '[4*]CCC', '[4*]C[8*]'] 396 397 >>> res = list(BRICSDecompose(m,returnMols=True)) 398 >>> res[0] 399 <rdkit.Chem.rdchem.Mol object ...> 400 >>> smis = [Chem.MolToSmiles(x,True) for x in res] 401 >>> sorted(smis) 402 ['[14*]c1ccccn1', '[16*]c1cccc([16*])c1', '[3*]O[3*]', '[4*]CCC', '[4*]C[8*]'] 403 404 nexavar, an example from the paper (corrected): 405 >>> m = Chem.MolFromSmiles('CNC(=O)C1=NC=CC(OC2=CC=C(NC(=O)NC3=CC(=C(Cl)C=C3)C(F)(F)F)C=C2)=C1') 406 >>> res = list(BRICSDecompose(m)) 407 >>> sorted(res) 408 ['[1*]C([1*])=O', '[1*]C([6*])=O', '[14*]c1cc([16*])ccn1', '[16*]c1ccc(Cl)c([16*])c1', '[16*]c1ccc([16*])cc1', '[3*]O[3*]', '[5*]NC', '[5*]N[5*]', '[8*]C(F)(F)F'] 409 410 it's also possible to keep pieces that haven't been fully decomposed: 411 >>> m = Chem.MolFromSmiles('CCCOCC') 412 >>> res = list(BRICSDecompose(m,keepNonLeafNodes=True)) 413 >>> sorted(res) 414 ['CCCOCC', '[3*]OCC', '[3*]OCCC', '[3*]O[3*]', '[4*]CC', '[4*]CCC'] 415 416 >>> m = Chem.MolFromSmiles('CCCOCc1cc(c2ncccc2)ccc1') 417 >>> res = list(BRICSDecompose(m,keepNonLeafNodes=True)) 418 >>> sorted(res) 419 ['CCCOCc1cccc(-c2ccccn2)c1', '[14*]c1ccccn1', '[16*]c1cccc(-c2ccccn2)c1', '[16*]c1cccc(COCCC)c1', '[16*]c1cccc([16*])c1', '[3*]OCCC', '[3*]OC[8*]', '[3*]OCc1cccc(-c2ccccn2)c1', '[3*]OCc1cccc([16*])c1', '[3*]O[3*]', '[4*]CCC', '[4*]C[8*]', '[4*]Cc1cccc(-c2ccccn2)c1', '[4*]Cc1cccc([16*])c1', '[8*]COCCC'] 420 421 or to only do a single pass of decomposition: 422 >>> m = Chem.MolFromSmiles('CCCOCc1cc(c2ncccc2)ccc1') 423 >>> res = list(BRICSDecompose(m,singlePass=True)) 424 >>> sorted(res) 425 ['CCCOCc1cccc(-c2ccccn2)c1', '[14*]c1ccccn1', '[16*]c1cccc(-c2ccccn2)c1', '[16*]c1cccc(COCCC)c1', '[3*]OCCC', '[3*]OCc1cccc(-c2ccccn2)c1', '[4*]CCC', '[4*]Cc1cccc(-c2ccccn2)c1', '[8*]COCCC'] 426 427 setting a minimum size for the fragments: 428 >>> m = Chem.MolFromSmiles('CCCOCC') 429 >>> res = list(BRICSDecompose(m,keepNonLeafNodes=True,minFragmentSize=2)) 430 >>> sorted(res) 431 ['CCCOCC', '[3*]OCC', '[3*]OCCC', '[4*]CC', '[4*]CCC'] 432 >>> m = Chem.MolFromSmiles('CCCOCC') 433 >>> res = list(BRICSDecompose(m,keepNonLeafNodes=True,minFragmentSize=3)) 434 >>> sorted(res) 435 ['CCCOCC', '[3*]OCC', '[4*]CCC'] 436 >>> res = list(BRICSDecompose(m,minFragmentSize=2)) 437 >>> sorted(res) 438 ['[3*]OCC', '[3*]OCCC', '[4*]CC', '[4*]CCC'] 439 440 441 """ 442 global reactions 443 mSmi = Chem.MolToSmiles(mol, 1) 444 445 if allNodes is None: 446 allNodes = set() 447 448 if mSmi in allNodes: 449 return set() 450 451 activePool = {mSmi: mol} 452 allNodes.add(mSmi) 453 foundMols = {mSmi: mol} 454 for gpIdx, reactionGp in enumerate(reactions): 455 newPool = {} 456 while activePool: 457 matched = False 458 nSmi = next(iterkeys(activePool)) 459 mol = activePool.pop(nSmi) 460 for rxnIdx, reaction in enumerate(reactionGp): 461 if onlyUseReactions and (gpIdx, rxnIdx) not in onlyUseReactions: 462 continue 463 if not silent: 464 print('--------') 465 print(smartsGps[gpIdx][rxnIdx]) 466 ps = reaction.RunReactants((mol, )) 467 if ps: 468 if not silent: 469 print(nSmi, '->', len(ps), 'products') 470 for prodSeq in ps: 471 seqOk = True 472 # we want to disqualify small fragments, so sort the product sequence by size 473 tSeq = [(prod.GetNumAtoms(onlyExplicit=True), idx) for idx, prod in enumerate(prodSeq)] 474 tSeq.sort() 475 for nats, idx in tSeq: 476 prod = prodSeq[idx] 477 try: 478 Chem.SanitizeMol(prod) 479 except Exception: 480 continue 481 pSmi = Chem.MolToSmiles(prod, 1) 482 if minFragmentSize > 0: 483 nDummies = pSmi.count('*') 484 if nats - nDummies < minFragmentSize: 485 seqOk = False 486 break 487 prod.pSmi = pSmi 488 ts = [(x, prodSeq[y]) for x, y in tSeq] 489 prodSeq = ts 490 if seqOk: 491 matched = True 492 for nats, prod in prodSeq: 493 pSmi = prod.pSmi 494 #print('\t',nats,pSmi) 495 if pSmi not in allNodes: 496 if not singlePass: 497 activePool[pSmi] = prod 498 allNodes.add(pSmi) 499 foundMols[pSmi] = prod 500 if singlePass or keepNonLeafNodes or not matched: 501 newPool[nSmi] = mol 502 activePool = newPool 503 if not (singlePass or keepNonLeafNodes): 504 if not returnMols: 505 res = set(activePool.keys()) 506 else: 507 res = activePool.values() 508 else: 509 if not returnMols: 510 res = allNodes 511 else: 512 res = foundMols.values() 513 return res
514 515 516 import random 517 dummyPattern = Chem.MolFromSmiles('[*]') 518 519
520 -def BRICSBuild(fragments, onlyCompleteMols=True, seeds=None, uniquify=True, scrambleReagents=True, 521 maxDepth=3):
522 seen = set() 523 if not seeds: 524 seeds = list(fragments) 525 if scrambleReagents: 526 seeds = list(seeds) 527 random.shuffle(seeds, random=random.random) 528 if scrambleReagents: 529 tempReactions = list(reverseReactions) 530 random.shuffle(tempReactions, random=random.random) 531 else: 532 tempReactions = reverseReactions 533 for seed in seeds: 534 seedIsR1 = False 535 seedIsR2 = False 536 nextSteps = [] 537 for rxn in tempReactions: 538 if seed.HasSubstructMatch(rxn._matchers[0]): 539 seedIsR1 = True 540 if seed.HasSubstructMatch(rxn._matchers[1]): 541 seedIsR2 = True 542 for fragment in fragments: 543 ps = None 544 if fragment.HasSubstructMatch(rxn._matchers[0]): 545 if seedIsR2: 546 ps = rxn.RunReactants((fragment, seed)) 547 if fragment.HasSubstructMatch(rxn._matchers[1]): 548 if seedIsR1: 549 ps = rxn.RunReactants((seed, fragment)) 550 if ps: 551 for p in ps: 552 if uniquify: 553 pSmi = Chem.MolToSmiles(p[0], True) 554 if pSmi in seen: 555 continue 556 else: 557 seen.add(pSmi) 558 if p[0].HasSubstructMatch(dummyPattern): 559 nextSteps.append(p[0]) 560 if not onlyCompleteMols: 561 yield p[0] 562 else: 563 yield p[0] 564 if nextSteps and maxDepth > 0: 565 for p in BRICSBuild(fragments, onlyCompleteMols=onlyCompleteMols, seeds=nextSteps, 566 uniquify=uniquify, maxDepth=maxDepth - 1): 567 if uniquify: 568 pSmi = Chem.MolToSmiles(p, True) 569 if pSmi in seen: 570 continue 571 else: 572 seen.add(pSmi) 573 yield p
574 575 # ------- ------- ------- ------- ------- ------- ------- ------- 576 # Begin testing code 577 578 579 #------------------------------------ 580 # 581 # doctest boilerplate 582 #
583 -def _test():
584 import doctest, sys 585 return doctest.testmod(sys.modules["__main__"], 586 optionflags=doctest.ELLIPSIS + doctest.NORMALIZE_WHITESPACE)
587 588 589 if __name__ == '__main__': 590 import unittest 591
592 - class TestCase(unittest.TestCase):
593
594 - def test1(self):
595 m = Chem.MolFromSmiles('CC(=O)OC') 596 res = BRICSDecompose(m) 597 self.assertTrue(res) 598 self.assertTrue(len(res) == 2) 599 600 m = Chem.MolFromSmiles('CC(=O)N1CCC1=O') 601 res = BRICSDecompose(m) 602 self.assertTrue(res) 603 self.assertTrue(len(res) == 2, res) 604 605 m = Chem.MolFromSmiles('c1ccccc1N(C)C') 606 res = BRICSDecompose(m) 607 self.assertTrue(res) 608 self.assertTrue(len(res) == 2, res) 609 610 m = Chem.MolFromSmiles('c1cccnc1N(C)C') 611 res = BRICSDecompose(m) 612 self.assertTrue(res) 613 self.assertTrue(len(res) == 2, res) 614 615 m = Chem.MolFromSmiles('o1ccnc1N(C)C') 616 res = BRICSDecompose(m) 617 self.assertTrue(res) 618 self.assertTrue(len(res) == 2) 619 620 m = Chem.MolFromSmiles('c1ccccc1OC') 621 res = BRICSDecompose(m) 622 self.assertTrue(res) 623 self.assertTrue(len(res) == 2) 624 625 m = Chem.MolFromSmiles('o1ccnc1OC') 626 res = BRICSDecompose(m) 627 self.assertTrue(res) 628 self.assertTrue(len(res) == 2) 629 630 m = Chem.MolFromSmiles('O1CCNC1OC') 631 res = BRICSDecompose(m) 632 self.assertTrue(res) 633 self.assertTrue(len(res) == 2) 634 635 m = Chem.MolFromSmiles('CCCSCC') 636 res = BRICSDecompose(m) 637 self.assertTrue(res) 638 self.assertTrue(len(res) == 3, res) 639 self.assertTrue('[11*]S[11*]' in res, res) 640 641 m = Chem.MolFromSmiles('CCNC(=O)C1CC1') 642 res = BRICSDecompose(m) 643 self.assertTrue(res) 644 self.assertTrue(len(res) == 4, res) 645 self.assertTrue('[5*]N[5*]' in res, res)
646
647 - def test2(self):
648 # example from the paper, nexavar: 649 m = Chem.MolFromSmiles('CNC(=O)C1=NC=CC(OC2=CC=C(NC(=O)NC3=CC(=C(Cl)C=C3)C(F)(F)F)C=C2)=C1') 650 res = BRICSDecompose(m) 651 self.assertTrue(res) 652 self.assertTrue(len(res) == 9, res)
653
654 - def test3(self):
655 m = Chem.MolFromSmiles('FC(F)(F)C1=C(Cl)C=CC(NC(=O)NC2=CC=CC=C2)=C1') 656 res = BRICSDecompose(m) 657 self.assertTrue(res) 658 self.assertTrue(len(res) == 5, res) 659 self.assertTrue('[5*]N[5*]' in res, res) 660 self.assertTrue('[16*]c1ccccc1' in res, res) 661 self.assertTrue('[8*]C(F)(F)F' in res, res)
662
663 - def test4(self):
664 allNodes = set() 665 m = Chem.MolFromSmiles('c1ccccc1OCCC') 666 res = BRICSDecompose(m, allNodes=allNodes) 667 self.assertTrue(res) 668 leaves = res 669 self.assertTrue(len(leaves) == 3, leaves) 670 self.assertTrue(len(allNodes) == 6, allNodes) 671 res = BRICSDecompose(m, allNodes=allNodes) 672 self.assertFalse(res) 673 self.assertTrue(len(allNodes) == 6, allNodes) 674 675 m = Chem.MolFromSmiles('c1ccccc1OCCCC') 676 res = BRICSDecompose(m, allNodes=allNodes) 677 self.assertTrue(res) 678 leaves.update(res) 679 self.assertTrue(len(allNodes) == 9, allNodes) 680 self.assertTrue(len(leaves) == 4, leaves) 681 682 m = Chem.MolFromSmiles('c1cc(C(=O)NCC)ccc1OCCC') 683 res = BRICSDecompose(m, allNodes=allNodes) 684 self.assertTrue(res) 685 leaves.update(res) 686 self.assertTrue(len(leaves) == 8, leaves) 687 self.assertTrue(len(allNodes) == 18, allNodes)
688
689 - def test5(self):
690 allNodes = set() 691 frags = [ 692 '[14*]c1ncncn1', 693 '[16*]c1ccccc1', 694 '[14*]c1ncccc1', 695 ] 696 frags = [Chem.MolFromSmiles(x) for x in frags] 697 res = BRICSBuild(frags) 698 self.assertTrue(res) 699 res = list(res) 700 self.assertTrue(len(res) == 6) 701 smis = [Chem.MolToSmiles(x, True) for x in res] 702 self.assertTrue('c1ccc(-c2ccccc2)cc1' in smis) 703 self.assertTrue('c1ccc(-c2ccccn2)cc1' in smis)
704
705 - def test5a(self):
706 allNodes = set() 707 frags = [ 708 '[3*]O[3*]', 709 '[16*]c1ccccc1', 710 ] 711 frags = [Chem.MolFromSmiles(x) for x in frags] 712 res = BRICSBuild(frags) 713 self.assertTrue(res) 714 res = list(res) 715 smis = [Chem.MolToSmiles(x, True) for x in res] 716 self.assertTrue(len(smis) == 2, smis) 717 self.assertTrue('c1ccc(Oc2ccccc2)cc1' in smis) 718 self.assertTrue('c1ccc(-c2ccccc2)cc1' in smis)
719
720 - def test6(self):
721 allNodes = set() 722 frags = [ 723 '[16*]c1ccccc1', 724 '[3*]OC', 725 '[9*]n1cccc1', 726 ] 727 frags = [Chem.MolFromSmiles(x) for x in frags] 728 res = BRICSBuild(frags) 729 self.assertTrue(res) 730 res = list(res) 731 self.assertTrue(len(res) == 3) 732 smis = [Chem.MolToSmiles(x, True) for x in res] 733 self.assertTrue('c1ccc(-c2ccccc2)cc1' in smis) 734 self.assertTrue('COc1ccccc1' in smis) 735 self.assertTrue('c1ccc(-n2cccc2)cc1' in smis, smis)
736
737 - def test7(self):
738 allNodes = set() 739 frags = [ 740 '[16*]c1ccccc1', 741 '[3*]OC', 742 '[3*]OCC(=O)[6*]', 743 ] 744 frags = [Chem.MolFromSmiles(x) for x in frags] 745 res = BRICSBuild(frags) 746 self.assertTrue(res) 747 res = list(res) 748 smis = [Chem.MolToSmiles(x, True) for x in res] 749 self.assertTrue(len(res) == 3) 750 self.assertTrue('c1ccc(-c2ccccc2)cc1' in smis) 751 self.assertTrue('COc1ccccc1' in smis) 752 self.assertTrue('O=C(COc1ccccc1)c1ccccc1' in smis)
753
754 - def test8(self):
755 random.seed(23) 756 base = Chem.MolFromSmiles("n1cncnc1OCC(C1CC1)OC1CNC1") 757 catalog = BRICSDecompose(base) 758 self.assertTrue(len(catalog) == 5, catalog) 759 catalog = [Chem.MolFromSmiles(x) for x in catalog] 760 ms = list(BRICSBuild(catalog, maxDepth=4)) 761 for m in ms: 762 Chem.SanitizeMol(m) 763 ms = [Chem.MolToSmiles(x) for x in ms] 764 self.assertEqual(len(ms), 36) 765 766 ts = ['n1cnc(C2CNC2)nc1', 'n1cnc(-c2ncncn2)nc1', 'C(OC1CNC1)C(C1CC1)OC1CNC1', 767 'n1cnc(OC(COC2CNC2)C2CC2)nc1', 'n1cnc(OCC(OC2CNC2)C2CNC2)nc1'] 768 ts = [Chem.MolToSmiles(Chem.MolFromSmiles(x), True) for x in ts] 769 for t in ts: 770 self.assertTrue(t in ms, (t, ms))
771
772 - def test9(self):
773 m = Chem.MolFromSmiles('CCOc1ccccc1c1ncc(c2nc(NCCCC)ncn2)cc1') 774 res = BRICSDecompose(m) 775 self.assertEqual(len(res), 7) 776 self.assertTrue('[3*]O[3*]' in res) 777 self.assertFalse('[14*]c1ncnc(NCCCC)n1' in res) 778 res = BRICSDecompose(m, singlePass=True) 779 self.assertEqual(len(res), 13) 780 self.assertTrue('[3*]OCC' in res) 781 self.assertTrue('[14*]c1ncnc(NCCCC)n1' in res)
782
783 - def test10(self):
784 m = Chem.MolFromSmiles('C1CCCCN1c1ccccc1') 785 res = BRICSDecompose(m) 786 self.assertEqual(len(res), 2, res)
787
788 - def test11(self):
789 # test coordinate preservation: 790 molblock = """ 791 RDKit 3D 792 793 13 14 0 0 0 0 0 0 0 0999 V2000 794 -1.2004 0.5900 0.6110 C 0 0 0 0 0 0 0 0 0 0 0 0 795 -2.2328 1.3173 0.0343 C 0 0 0 0 0 0 0 0 0 0 0 0 796 -3.4299 0.6533 -0.1500 C 0 0 0 0 0 0 0 0 0 0 0 0 797 -3.3633 -0.7217 -0.3299 C 0 0 0 0 0 0 0 0 0 0 0 0 798 -2.1552 -1.3791 -0.2207 C 0 0 0 0 0 0 0 0 0 0 0 0 799 -1.1425 -0.7969 0.5335 C 0 0 0 0 0 0 0 0 0 0 0 0 800 0.1458 -1.4244 0.4108 O 0 0 0 0 0 0 0 0 0 0 0 0 801 1.2976 -0.7398 -0.1026 C 0 0 0 0 0 0 0 0 0 0 0 0 802 2.4889 -0.7939 0.5501 N 0 0 0 0 0 0 0 0 0 0 0 0 803 3.4615 0.1460 0.3535 C 0 0 0 0 0 0 0 0 0 0 0 0 804 3.0116 1.4034 -0.0296 C 0 0 0 0 0 0 0 0 0 0 0 0 805 1.9786 1.4264 -0.9435 C 0 0 0 0 0 0 0 0 0 0 0 0 806 1.1399 0.3193 -0.9885 C 0 0 0 0 0 0 0 0 0 0 0 0 807 1 2 2 0 808 2 3 1 0 809 3 4 2 0 810 4 5 1 0 811 5 6 2 0 812 6 7 1 0 813 7 8 1 0 814 8 9 2 0 815 9 10 1 0 816 10 11 2 0 817 11 12 1 0 818 12 13 2 0 819 6 1 1 0 820 13 8 1 0 821 M END 822 """ 823 m = Chem.MolFromMolBlock(molblock) 824 pieces = BreakBRICSBonds(m) 825 826 frags = Chem.GetMolFrags(pieces, asMols=True) 827 self.assertEqual(len(frags), 3) 828 self.assertEqual(frags[0].GetNumAtoms(), 7) 829 self.assertEqual(frags[1].GetNumAtoms(), 3) 830 self.assertEqual(frags[2].GetNumAtoms(), 7) 831 832 c1 = m.GetConformer() 833 c2 = frags[0].GetConformer() 834 for i in range(6): 835 p1 = c1.GetAtomPosition(i) 836 p2 = c2.GetAtomPosition(i) 837 self.assertEqual((p1 - p2).Length(), 0.0) 838 p1 = c1.GetAtomPosition(6) 839 p2 = c2.GetAtomPosition(6) 840 self.assertEqual((p1 - p2).Length(), 0.0) 841 842 c2 = frags[2].GetConformer() 843 for i in range(6): 844 p1 = c1.GetAtomPosition(i + 7) 845 p2 = c2.GetAtomPosition(i) 846 self.assertEqual((p1 - p2).Length(), 0.0) 847 p1 = c1.GetAtomPosition(6) 848 p2 = c2.GetAtomPosition(6) 849 self.assertEqual((p1 - p2).Length(), 0.0) 850 851 c2 = frags[1].GetConformer() 852 for i in range(1): 853 p1 = c1.GetAtomPosition(i + 6) 854 p2 = c2.GetAtomPosition(i) 855 self.assertEqual((p1 - p2).Length(), 0.0) 856 p1 = c1.GetAtomPosition(5) 857 p2 = c2.GetAtomPosition(1) 858 self.assertEqual((p1 - p2).Length(), 0.0) 859 p1 = c1.GetAtomPosition(6) 860 p2 = c2.GetAtomPosition(0) 861 self.assertEqual((p1 - p2).Length(), 0.0) 862 863 # make sure multiple conformations (include 2D) also work: 864 molblock = """ 865 RDKit 2D 866 867 13 14 0 0 0 0 0 0 0 0999 V2000 868 -1.2990 -0.8654 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 869 -2.5981 -1.6154 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 870 -3.8971 -0.8654 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 871 -3.8971 0.6346 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 872 -2.5981 1.3846 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 873 -1.2990 0.6346 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 874 -0.0000 1.3846 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 875 1.2990 0.6346 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 876 1.2990 -0.8654 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 877 2.5981 -1.6154 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 878 3.8971 -0.8654 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 879 3.8971 0.6346 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 880 2.5981 1.3846 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 881 1 2 2 0 882 2 3 1 0 883 3 4 2 0 884 4 5 1 0 885 5 6 2 0 886 6 7 1 0 887 7 8 1 0 888 8 9 2 0 889 9 10 1 0 890 10 11 2 0 891 11 12 1 0 892 12 13 2 0 893 6 1 1 0 894 13 8 1 0 895 M END 896 """ 897 m2 = Chem.MolFromMolBlock(molblock) 898 m.AddConformer(m2.GetConformer(), assignId=True) 899 self.assertEqual(m.GetNumConformers(), 2) 900 901 pieces = BreakBRICSBonds(m) 902 frags = Chem.GetMolFrags(pieces, asMols=True) 903 self.assertEqual(len(frags), 3) 904 self.assertEqual(frags[0].GetNumAtoms(), 7) 905 self.assertEqual(frags[1].GetNumAtoms(), 3) 906 self.assertEqual(frags[2].GetNumAtoms(), 7) 907 self.assertEqual(frags[0].GetNumConformers(), 2) 908 self.assertEqual(frags[1].GetNumConformers(), 2) 909 self.assertEqual(frags[2].GetNumConformers(), 2) 910 911 c1 = m.GetConformer(0) 912 c2 = frags[0].GetConformer(0) 913 for i in range(6): 914 p1 = c1.GetAtomPosition(i) 915 p2 = c2.GetAtomPosition(i) 916 self.assertEqual((p1 - p2).Length(), 0.0) 917 p1 = c1.GetAtomPosition(6) 918 p2 = c2.GetAtomPosition(6) 919 self.assertEqual((p1 - p2).Length(), 0.0) 920 921 c2 = frags[2].GetConformer(0) 922 for i in range(6): 923 p1 = c1.GetAtomPosition(i + 7) 924 p2 = c2.GetAtomPosition(i) 925 self.assertEqual((p1 - p2).Length(), 0.0) 926 p1 = c1.GetAtomPosition(6) 927 p2 = c2.GetAtomPosition(6) 928 self.assertEqual((p1 - p2).Length(), 0.0) 929 930 c2 = frags[1].GetConformer(0) 931 for i in range(1): 932 p1 = c1.GetAtomPosition(i + 6) 933 p2 = c2.GetAtomPosition(i) 934 self.assertEqual((p1 - p2).Length(), 0.0) 935 p1 = c1.GetAtomPosition(5) 936 p2 = c2.GetAtomPosition(1) 937 self.assertEqual((p1 - p2).Length(), 0.0) 938 p1 = c1.GetAtomPosition(6) 939 p2 = c2.GetAtomPosition(0) 940 self.assertEqual((p1 - p2).Length(), 0.0) 941 942 c1 = m.GetConformer(1) 943 c2 = frags[0].GetConformer(1) 944 for i in range(6): 945 p1 = c1.GetAtomPosition(i) 946 p2 = c2.GetAtomPosition(i) 947 self.assertEqual((p1 - p2).Length(), 0.0) 948 p1 = c1.GetAtomPosition(6) 949 p2 = c2.GetAtomPosition(6) 950 self.assertEqual((p1 - p2).Length(), 0.0) 951 952 c2 = frags[2].GetConformer(1) 953 for i in range(6): 954 p1 = c1.GetAtomPosition(i + 7) 955 p2 = c2.GetAtomPosition(i) 956 self.assertEqual((p1 - p2).Length(), 0.0) 957 p1 = c1.GetAtomPosition(6) 958 p2 = c2.GetAtomPosition(6) 959 self.assertEqual((p1 - p2).Length(), 0.0) 960 961 c2 = frags[1].GetConformer(1) 962 for i in range(1): 963 p1 = c1.GetAtomPosition(i + 6) 964 p2 = c2.GetAtomPosition(i) 965 self.assertEqual((p1 - p2).Length(), 0.0) 966 p1 = c1.GetAtomPosition(5) 967 p2 = c2.GetAtomPosition(1) 968 self.assertEqual((p1 - p2).Length(), 0.0) 969 p1 = c1.GetAtomPosition(6) 970 p2 = c2.GetAtomPosition(0) 971 self.assertEqual((p1 - p2).Length(), 0.0)
972
973 - def test12(self):
974 m = Chem.MolFromSmiles('CCS(=O)(=O)NCC') 975 res = list(FindBRICSBonds(m)) 976 self.assertEqual(len(res), 2, res) 977 atIds = [x[0] for x in res] 978 atIds.sort() 979 self.assertEqual(atIds, [(5, 2), (6, 5)])
980 981 failed, tried = _test() 982 if failed: 983 sys.exit(failed) 984 985 unittest.main() 986