1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33 """ Implementation of the BRICS algorithm from Degen et al. ChemMedChem *3* 1503-7 (2008)
34
35 """
36 from __future__ import print_function
37 import sys, re, random
38 from rdkit import Chem
39 from rdkit.Chem import rdChemReactions as Reactions
40 from rdkit.six import iteritems, iterkeys, next
41 from rdkit.six.moves import range
42
43
44 environs = {
45 'L1': '[C;D3]([#0,#6,#7,#8])(=O)',
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65 'L3': '[O;D2]-;!@[#0,#6,#1]',
66 'L4': '[C;!D1;!$(C=*)]-;!@[#6]',
67
68 'L5': '[N;!D1;!$(N=*);!$(N-[!#6;!#16;!#0;!#1]);!$([N;R]@[C;R]=O)]',
69 'L6': '[C;D3;!R](=O)-;!@[#0,#6,#7,#8]',
70 'L7a': '[C;D2,D3]-[#6]',
71 'L7b': '[C;D2,D3]-[#6]',
72 '#L8': '[C;!R;!D1]-;!@[#6]',
73 'L8': '[C;!R;!D1;!$(C!-*)]',
74 'L9': '[n;+0;$(n(:[c,n,o,s]):[c,n,o,s])]',
75 'L10': '[N;R;$(N(@C(=O))@[C,N,O,S])]',
76 'L11': '[S;D2](-;!@[#0,#6])',
77 'L12': '[S;D4]([#6,#0])(=O)(=O)',
78 'L13': '[C;$(C(-;@[C,N,O,S])-;@[N,O,S])]',
79 'L14': '[c;$(c(:[c,n,o,s]):[n,o,s])]',
80 'L14b': '[c;$(c(:[c,n,o,s]):[n,o,s])]',
81 'L15': '[C;$(C(-;@C)-;@C)]',
82 'L16': '[c;$(c(:c):c)]',
83 'L16b': '[c;$(c(:c):c)]',
84 }
85 reactionDefs = (
86
87 [
88 ('1', '3', '-'),
89 ('1', '5', '-'),
90 ('1', '10', '-'),
91 ],
92
93
94 [
95 ('3', '4', '-'),
96 ('3', '13', '-'),
97 ('3', '14', '-'),
98 ('3', '15', '-'),
99 ('3', '16', '-'),
100 ],
101
102
103 [
104 ('4', '5', '-'),
105 ('4', '11', '-'),
106 ],
107
108
109 [
110 ('5', '12', '-'),
111 ('5', '14', '-'),
112 ('5', '16', '-'),
113 ('5', '13', '-'),
114 ('5', '15', '-'),
115 ],
116
117
118 [
119 ('6', '13', '-'),
120 ('6', '14', '-'),
121 ('6', '15', '-'),
122 ('6', '16', '-'),
123 ],
124
125
126 [
127 ('7a', '7b', '='),
128 ],
129
130
131 [
132 ('8', '9', '-'),
133 ('8', '10', '-'),
134 ('8', '13', '-'),
135 ('8', '14', '-'),
136 ('8', '15', '-'),
137 ('8', '16', '-'),
138 ],
139
140
141 [
142 ('9', '13', '-'),
143 ('9', '14', '-'),
144 ('9', '15', '-'),
145 ('9', '16', '-'),
146 ],
147
148
149 [
150 ('10', '13', '-'),
151 ('10', '14', '-'),
152 ('10', '15', '-'),
153 ('10', '16', '-'),
154 ],
155
156
157 [
158 ('11', '13', '-'),
159 ('11', '14', '-'),
160 ('11', '15', '-'),
161 ('11', '16', '-'),
162 ],
163
164
165
166
167
168 [
169 ('13', '14', '-'),
170 ('13', '15', '-'),
171 ('13', '16', '-'),
172 ],
173
174
175 [
176 ('14', '14', '-'),
177 ('14', '15', '-'),
178 ('14', '16', '-'),
179 ],
180
181
182 [
183 ('15', '16', '-'),
184 ],
185
186
187 [
188 ('16', '16', '-'),
189 ], )
190 import copy
191 smartsGps = copy.deepcopy(reactionDefs)
192 for gp in smartsGps:
193 for j, defn in enumerate(gp):
194 g1, g2, bnd = defn
195 r1 = environs['L' + g1]
196 r2 = environs['L' + g2]
197 g1 = re.sub('[a-z,A-Z]', '', g1)
198 g2 = re.sub('[a-z,A-Z]', '', g2)
199 sma = '[$(%s):1]%s;!@[$(%s):2]>>[%s*]-[*:1].[%s*]-[*:2]' % (r1, bnd, r2, g1, g2)
200 gp[j] = sma
201
202 for gp in smartsGps:
203 for defn in gp:
204 try:
205 t = Reactions.ReactionFromSmarts(defn)
206 t.Initialize()
207 except Exception:
208 print(defn)
209 raise
210
211 environMatchers = {}
212 for env, sma in iteritems(environs):
213 environMatchers[env] = Chem.MolFromSmarts(sma)
214
215 bondMatchers = []
216 for i, compats in enumerate(reactionDefs):
217 tmp = []
218 for i1, i2, bType in compats:
219 e1 = environs['L%s' % i1]
220 e2 = environs['L%s' % i2]
221 patt = '[$(%s)]%s;!@[$(%s)]' % (e1, bType, e2)
222 patt = Chem.MolFromSmarts(patt)
223 tmp.append((i1, i2, bType, patt))
224 bondMatchers.append(tmp)
225
226 reactions = tuple([[Reactions.ReactionFromSmarts(y) for y in x] for x in smartsGps])
227 reverseReactions = []
228 for i, rxnSet in enumerate(smartsGps):
229 for j, sma in enumerate(rxnSet):
230 rs, ps = sma.split('>>')
231 sma = '%s>>%s' % (ps, rs)
232 rxn = Reactions.ReactionFromSmarts(sma)
233 labels = re.findall(r'\[([0-9]+?)\*\]', ps)
234 rxn._matchers = [Chem.MolFromSmiles('[%s*]' % x) for x in labels]
235 reverseReactions.append(rxn)
236
237
239 """ returns the bonds in a molecule that BRICS would cleave
240
241 >>> from rdkit import Chem
242 >>> m = Chem.MolFromSmiles('CCCOCC')
243 >>> res = list(FindBRICSBonds(m))
244 >>> res
245 [((3, 2), ('3', '4')), ((3, 4), ('3', '4'))]
246
247 a more complicated case:
248 >>> m = Chem.MolFromSmiles('CCCOCCC(=O)c1ccccc1')
249 >>> res = list(FindBRICSBonds(m))
250 >>> res
251 [((3, 2), ('3', '4')), ((3, 4), ('3', '4')), ((6, 8), ('6', '16'))]
252
253 we can also randomize the order of the results:
254 >>> random.seed(23)
255 >>> res = list(FindBRICSBonds(m,randomizeOrder=True))
256 >>> sorted(res)
257 [((3, 2), ('3', '4')), ((3, 4), ('3', '4')), ((6, 8), ('6', '16'))]
258
259 Note that this is a generator function :
260 >>> res = FindBRICSBonds(m)
261 >>> res
262 <generator object ...>
263 >>> next(res)
264 ((3, 2), ('3', '4'))
265
266 >>> m = Chem.MolFromSmiles('CC=CC')
267 >>> res = list(FindBRICSBonds(m))
268 >>> sorted(res)
269 [((1, 2), ('7', '7'))]
270
271 make sure we don't match ring bonds:
272 >>> m = Chem.MolFromSmiles('O=C1NCCC1')
273 >>> list(FindBRICSBonds(m))
274 []
275
276 another nice one, make sure environment 8 doesn't match something connected
277 to a ring atom:
278 >>> m = Chem.MolFromSmiles('CC1(C)CCCCC1')
279 >>> list(FindBRICSBonds(m))
280 []
281
282 """
283 letter = re.compile('[a-z,A-Z]')
284 indices = list(range(len(bondMatchers)))
285 bondsDone = set()
286 if randomizeOrder:
287 random.shuffle(indices, random=random.random)
288
289 envMatches = {}
290 for env, patt in iteritems(environMatchers):
291 envMatches[env] = mol.HasSubstructMatch(patt)
292 for gpIdx in indices:
293 if randomizeOrder:
294 compats = bondMatchers[gpIdx][:]
295 random.shuffle(compats, random=random.random)
296 else:
297 compats = bondMatchers[gpIdx]
298 for i1, i2, bType, patt in compats:
299 if not envMatches['L' + i1] or not envMatches['L' + i2]:
300 continue
301 matches = mol.GetSubstructMatches(patt)
302 i1 = letter.sub('', i1)
303 i2 = letter.sub('', i2)
304 for match in matches:
305 if match not in bondsDone and (match[1], match[0]) not in bondsDone:
306 bondsDone.add(match)
307 yield (((match[0], match[1]), (i1, i2)))
308
309
311 """ breaks the BRICS bonds in a molecule and returns the results
312
313 >>> from rdkit import Chem
314 >>> m = Chem.MolFromSmiles('CCCOCC')
315 >>> m2=BreakBRICSBonds(m)
316 >>> Chem.MolToSmiles(m2,True)
317 '[3*]O[3*].[4*]CC.[4*]CCC'
318
319 a more complicated case:
320 >>> m = Chem.MolFromSmiles('CCCOCCC(=O)c1ccccc1')
321 >>> m2=BreakBRICSBonds(m)
322 >>> Chem.MolToSmiles(m2,True)
323 '[16*]c1ccccc1.[3*]O[3*].[4*]CCC.[4*]CCC([6*])=O'
324
325
326 can also specify a limited set of bonds to work with:
327 >>> m = Chem.MolFromSmiles('CCCOCC')
328 >>> m2 = BreakBRICSBonds(m,[((3, 2), ('3', '4'))])
329 >>> Chem.MolToSmiles(m2,True)
330 '[3*]OCC.[4*]CCC'
331
332 this can be used as an alternate approach for doing a BRICS decomposition by
333 following BreakBRICSBonds with a call to Chem.GetMolFrags:
334 >>> m = Chem.MolFromSmiles('CCCOCC')
335 >>> m2=BreakBRICSBonds(m)
336 >>> frags = Chem.GetMolFrags(m2,asMols=True)
337 >>> [Chem.MolToSmiles(x,True) for x in frags]
338 ['[4*]CCC', '[3*]O[3*]', '[4*]CC']
339
340 """
341 if not bonds:
342
343 res = Chem.FragmentOnBRICSBonds(mol)
344 if sanitize:
345 Chem.SanitizeMol(res)
346 return res
347 eMol = Chem.EditableMol(mol)
348 nAts = mol.GetNumAtoms()
349
350 dummyPositions = []
351 for indices, dummyTypes in bonds:
352 ia, ib = indices
353 obond = mol.GetBondBetweenAtoms(ia, ib)
354 bondType = obond.GetBondType()
355 eMol.RemoveBond(ia, ib)
356
357 da, db = dummyTypes
358 atoma = Chem.Atom(0)
359 atoma.SetIsotope(int(da))
360 atoma.SetNoImplicit(True)
361 idxa = nAts
362 nAts += 1
363 eMol.AddAtom(atoma)
364 eMol.AddBond(ia, idxa, bondType)
365
366 atomb = Chem.Atom(0)
367 atomb.SetIsotope(int(db))
368 atomb.SetNoImplicit(True)
369 idxb = nAts
370 nAts += 1
371 eMol.AddAtom(atomb)
372 eMol.AddBond(ib, idxb, bondType)
373 if mol.GetNumConformers():
374 dummyPositions.append((idxa, ib))
375 dummyPositions.append((idxb, ia))
376 res = eMol.GetMol()
377 if sanitize:
378 Chem.SanitizeMol(res)
379 if mol.GetNumConformers():
380 for conf in mol.GetConformers():
381 resConf = res.GetConformer(conf.GetId())
382 for ia, pa in dummyPositions:
383 resConf.SetAtomPosition(ia, conf.GetAtomPosition(pa))
384 return res
385
386
387 -def BRICSDecompose(mol, allNodes=None, minFragmentSize=1, onlyUseReactions=None, silent=True,
388 keepNonLeafNodes=False, singlePass=False, returnMols=False):
389 """ returns the BRICS decomposition for a molecule
390
391 >>> from rdkit import Chem
392 >>> m = Chem.MolFromSmiles('CCCOCc1cc(c2ncccc2)ccc1')
393 >>> res = list(BRICSDecompose(m))
394 >>> sorted(res)
395 ['[14*]c1ccccn1', '[16*]c1cccc([16*])c1', '[3*]O[3*]', '[4*]CCC', '[4*]C[8*]']
396
397 >>> res = list(BRICSDecompose(m,returnMols=True))
398 >>> res[0]
399 <rdkit.Chem.rdchem.Mol object ...>
400 >>> smis = [Chem.MolToSmiles(x,True) for x in res]
401 >>> sorted(smis)
402 ['[14*]c1ccccn1', '[16*]c1cccc([16*])c1', '[3*]O[3*]', '[4*]CCC', '[4*]C[8*]']
403
404 nexavar, an example from the paper (corrected):
405 >>> m = Chem.MolFromSmiles('CNC(=O)C1=NC=CC(OC2=CC=C(NC(=O)NC3=CC(=C(Cl)C=C3)C(F)(F)F)C=C2)=C1')
406 >>> res = list(BRICSDecompose(m))
407 >>> sorted(res)
408 ['[1*]C([1*])=O', '[1*]C([6*])=O', '[14*]c1cc([16*])ccn1', '[16*]c1ccc(Cl)c([16*])c1', '[16*]c1ccc([16*])cc1', '[3*]O[3*]', '[5*]NC', '[5*]N[5*]', '[8*]C(F)(F)F']
409
410 it's also possible to keep pieces that haven't been fully decomposed:
411 >>> m = Chem.MolFromSmiles('CCCOCC')
412 >>> res = list(BRICSDecompose(m,keepNonLeafNodes=True))
413 >>> sorted(res)
414 ['CCCOCC', '[3*]OCC', '[3*]OCCC', '[3*]O[3*]', '[4*]CC', '[4*]CCC']
415
416 >>> m = Chem.MolFromSmiles('CCCOCc1cc(c2ncccc2)ccc1')
417 >>> res = list(BRICSDecompose(m,keepNonLeafNodes=True))
418 >>> sorted(res)
419 ['CCCOCc1cccc(-c2ccccn2)c1', '[14*]c1ccccn1', '[16*]c1cccc(-c2ccccn2)c1', '[16*]c1cccc(COCCC)c1', '[16*]c1cccc([16*])c1', '[3*]OCCC', '[3*]OC[8*]', '[3*]OCc1cccc(-c2ccccn2)c1', '[3*]OCc1cccc([16*])c1', '[3*]O[3*]', '[4*]CCC', '[4*]C[8*]', '[4*]Cc1cccc(-c2ccccn2)c1', '[4*]Cc1cccc([16*])c1', '[8*]COCCC']
420
421 or to only do a single pass of decomposition:
422 >>> m = Chem.MolFromSmiles('CCCOCc1cc(c2ncccc2)ccc1')
423 >>> res = list(BRICSDecompose(m,singlePass=True))
424 >>> sorted(res)
425 ['CCCOCc1cccc(-c2ccccn2)c1', '[14*]c1ccccn1', '[16*]c1cccc(-c2ccccn2)c1', '[16*]c1cccc(COCCC)c1', '[3*]OCCC', '[3*]OCc1cccc(-c2ccccn2)c1', '[4*]CCC', '[4*]Cc1cccc(-c2ccccn2)c1', '[8*]COCCC']
426
427 setting a minimum size for the fragments:
428 >>> m = Chem.MolFromSmiles('CCCOCC')
429 >>> res = list(BRICSDecompose(m,keepNonLeafNodes=True,minFragmentSize=2))
430 >>> sorted(res)
431 ['CCCOCC', '[3*]OCC', '[3*]OCCC', '[4*]CC', '[4*]CCC']
432 >>> m = Chem.MolFromSmiles('CCCOCC')
433 >>> res = list(BRICSDecompose(m,keepNonLeafNodes=True,minFragmentSize=3))
434 >>> sorted(res)
435 ['CCCOCC', '[3*]OCC', '[4*]CCC']
436 >>> res = list(BRICSDecompose(m,minFragmentSize=2))
437 >>> sorted(res)
438 ['[3*]OCC', '[3*]OCCC', '[4*]CC', '[4*]CCC']
439
440
441 """
442 global reactions
443 mSmi = Chem.MolToSmiles(mol, 1)
444
445 if allNodes is None:
446 allNodes = set()
447
448 if mSmi in allNodes:
449 return set()
450
451 activePool = {mSmi: mol}
452 allNodes.add(mSmi)
453 foundMols = {mSmi: mol}
454 for gpIdx, reactionGp in enumerate(reactions):
455 newPool = {}
456 while activePool:
457 matched = False
458 nSmi = next(iterkeys(activePool))
459 mol = activePool.pop(nSmi)
460 for rxnIdx, reaction in enumerate(reactionGp):
461 if onlyUseReactions and (gpIdx, rxnIdx) not in onlyUseReactions:
462 continue
463 if not silent:
464 print('--------')
465 print(smartsGps[gpIdx][rxnIdx])
466 ps = reaction.RunReactants((mol, ))
467 if ps:
468 if not silent:
469 print(nSmi, '->', len(ps), 'products')
470 for prodSeq in ps:
471 seqOk = True
472
473 tSeq = [(prod.GetNumAtoms(onlyExplicit=True), idx) for idx, prod in enumerate(prodSeq)]
474 tSeq.sort()
475 for nats, idx in tSeq:
476 prod = prodSeq[idx]
477 try:
478 Chem.SanitizeMol(prod)
479 except Exception:
480 continue
481 pSmi = Chem.MolToSmiles(prod, 1)
482 if minFragmentSize > 0:
483 nDummies = pSmi.count('*')
484 if nats - nDummies < minFragmentSize:
485 seqOk = False
486 break
487 prod.pSmi = pSmi
488 ts = [(x, prodSeq[y]) for x, y in tSeq]
489 prodSeq = ts
490 if seqOk:
491 matched = True
492 for nats, prod in prodSeq:
493 pSmi = prod.pSmi
494
495 if pSmi not in allNodes:
496 if not singlePass:
497 activePool[pSmi] = prod
498 allNodes.add(pSmi)
499 foundMols[pSmi] = prod
500 if singlePass or keepNonLeafNodes or not matched:
501 newPool[nSmi] = mol
502 activePool = newPool
503 if not (singlePass or keepNonLeafNodes):
504 if not returnMols:
505 res = set(activePool.keys())
506 else:
507 res = activePool.values()
508 else:
509 if not returnMols:
510 res = allNodes
511 else:
512 res = foundMols.values()
513 return res
514
515
516 import random
517 dummyPattern = Chem.MolFromSmiles('[*]')
518
519
520 -def BRICSBuild(fragments, onlyCompleteMols=True, seeds=None, uniquify=True, scrambleReagents=True,
521 maxDepth=3):
522 seen = set()
523 if not seeds:
524 seeds = list(fragments)
525 if scrambleReagents:
526 seeds = list(seeds)
527 random.shuffle(seeds, random=random.random)
528 if scrambleReagents:
529 tempReactions = list(reverseReactions)
530 random.shuffle(tempReactions, random=random.random)
531 else:
532 tempReactions = reverseReactions
533 for seed in seeds:
534 seedIsR1 = False
535 seedIsR2 = False
536 nextSteps = []
537 for rxn in tempReactions:
538 if seed.HasSubstructMatch(rxn._matchers[0]):
539 seedIsR1 = True
540 if seed.HasSubstructMatch(rxn._matchers[1]):
541 seedIsR2 = True
542 for fragment in fragments:
543 ps = None
544 if fragment.HasSubstructMatch(rxn._matchers[0]):
545 if seedIsR2:
546 ps = rxn.RunReactants((fragment, seed))
547 if fragment.HasSubstructMatch(rxn._matchers[1]):
548 if seedIsR1:
549 ps = rxn.RunReactants((seed, fragment))
550 if ps:
551 for p in ps:
552 if uniquify:
553 pSmi = Chem.MolToSmiles(p[0], True)
554 if pSmi in seen:
555 continue
556 else:
557 seen.add(pSmi)
558 if p[0].HasSubstructMatch(dummyPattern):
559 nextSteps.append(p[0])
560 if not onlyCompleteMols:
561 yield p[0]
562 else:
563 yield p[0]
564 if nextSteps and maxDepth > 0:
565 for p in BRICSBuild(fragments, onlyCompleteMols=onlyCompleteMols, seeds=nextSteps,
566 uniquify=uniquify, maxDepth=maxDepth - 1):
567 if uniquify:
568 pSmi = Chem.MolToSmiles(p, True)
569 if pSmi in seen:
570 continue
571 else:
572 seen.add(pSmi)
573 yield p
574
575
576
577
578
579
580
581
582
584 import doctest, sys
585 return doctest.testmod(sys.modules["__main__"],
586 optionflags=doctest.ELLIPSIS + doctest.NORMALIZE_WHITESPACE)
587
588
589 if __name__ == '__main__':
590 import unittest
591
593
595 m = Chem.MolFromSmiles('CC(=O)OC')
596 res = BRICSDecompose(m)
597 self.assertTrue(res)
598 self.assertTrue(len(res) == 2)
599
600 m = Chem.MolFromSmiles('CC(=O)N1CCC1=O')
601 res = BRICSDecompose(m)
602 self.assertTrue(res)
603 self.assertTrue(len(res) == 2, res)
604
605 m = Chem.MolFromSmiles('c1ccccc1N(C)C')
606 res = BRICSDecompose(m)
607 self.assertTrue(res)
608 self.assertTrue(len(res) == 2, res)
609
610 m = Chem.MolFromSmiles('c1cccnc1N(C)C')
611 res = BRICSDecompose(m)
612 self.assertTrue(res)
613 self.assertTrue(len(res) == 2, res)
614
615 m = Chem.MolFromSmiles('o1ccnc1N(C)C')
616 res = BRICSDecompose(m)
617 self.assertTrue(res)
618 self.assertTrue(len(res) == 2)
619
620 m = Chem.MolFromSmiles('c1ccccc1OC')
621 res = BRICSDecompose(m)
622 self.assertTrue(res)
623 self.assertTrue(len(res) == 2)
624
625 m = Chem.MolFromSmiles('o1ccnc1OC')
626 res = BRICSDecompose(m)
627 self.assertTrue(res)
628 self.assertTrue(len(res) == 2)
629
630 m = Chem.MolFromSmiles('O1CCNC1OC')
631 res = BRICSDecompose(m)
632 self.assertTrue(res)
633 self.assertTrue(len(res) == 2)
634
635 m = Chem.MolFromSmiles('CCCSCC')
636 res = BRICSDecompose(m)
637 self.assertTrue(res)
638 self.assertTrue(len(res) == 3, res)
639 self.assertTrue('[11*]S[11*]' in res, res)
640
641 m = Chem.MolFromSmiles('CCNC(=O)C1CC1')
642 res = BRICSDecompose(m)
643 self.assertTrue(res)
644 self.assertTrue(len(res) == 4, res)
645 self.assertTrue('[5*]N[5*]' in res, res)
646
648
649 m = Chem.MolFromSmiles('CNC(=O)C1=NC=CC(OC2=CC=C(NC(=O)NC3=CC(=C(Cl)C=C3)C(F)(F)F)C=C2)=C1')
650 res = BRICSDecompose(m)
651 self.assertTrue(res)
652 self.assertTrue(len(res) == 9, res)
653
655 m = Chem.MolFromSmiles('FC(F)(F)C1=C(Cl)C=CC(NC(=O)NC2=CC=CC=C2)=C1')
656 res = BRICSDecompose(m)
657 self.assertTrue(res)
658 self.assertTrue(len(res) == 5, res)
659 self.assertTrue('[5*]N[5*]' in res, res)
660 self.assertTrue('[16*]c1ccccc1' in res, res)
661 self.assertTrue('[8*]C(F)(F)F' in res, res)
662
664 allNodes = set()
665 m = Chem.MolFromSmiles('c1ccccc1OCCC')
666 res = BRICSDecompose(m, allNodes=allNodes)
667 self.assertTrue(res)
668 leaves = res
669 self.assertTrue(len(leaves) == 3, leaves)
670 self.assertTrue(len(allNodes) == 6, allNodes)
671 res = BRICSDecompose(m, allNodes=allNodes)
672 self.assertFalse(res)
673 self.assertTrue(len(allNodes) == 6, allNodes)
674
675 m = Chem.MolFromSmiles('c1ccccc1OCCCC')
676 res = BRICSDecompose(m, allNodes=allNodes)
677 self.assertTrue(res)
678 leaves.update(res)
679 self.assertTrue(len(allNodes) == 9, allNodes)
680 self.assertTrue(len(leaves) == 4, leaves)
681
682 m = Chem.MolFromSmiles('c1cc(C(=O)NCC)ccc1OCCC')
683 res = BRICSDecompose(m, allNodes=allNodes)
684 self.assertTrue(res)
685 leaves.update(res)
686 self.assertTrue(len(leaves) == 8, leaves)
687 self.assertTrue(len(allNodes) == 18, allNodes)
688
690 allNodes = set()
691 frags = [
692 '[14*]c1ncncn1',
693 '[16*]c1ccccc1',
694 '[14*]c1ncccc1',
695 ]
696 frags = [Chem.MolFromSmiles(x) for x in frags]
697 res = BRICSBuild(frags)
698 self.assertTrue(res)
699 res = list(res)
700 self.assertTrue(len(res) == 6)
701 smis = [Chem.MolToSmiles(x, True) for x in res]
702 self.assertTrue('c1ccc(-c2ccccc2)cc1' in smis)
703 self.assertTrue('c1ccc(-c2ccccn2)cc1' in smis)
704
706 allNodes = set()
707 frags = [
708 '[3*]O[3*]',
709 '[16*]c1ccccc1',
710 ]
711 frags = [Chem.MolFromSmiles(x) for x in frags]
712 res = BRICSBuild(frags)
713 self.assertTrue(res)
714 res = list(res)
715 smis = [Chem.MolToSmiles(x, True) for x in res]
716 self.assertTrue(len(smis) == 2, smis)
717 self.assertTrue('c1ccc(Oc2ccccc2)cc1' in smis)
718 self.assertTrue('c1ccc(-c2ccccc2)cc1' in smis)
719
721 allNodes = set()
722 frags = [
723 '[16*]c1ccccc1',
724 '[3*]OC',
725 '[9*]n1cccc1',
726 ]
727 frags = [Chem.MolFromSmiles(x) for x in frags]
728 res = BRICSBuild(frags)
729 self.assertTrue(res)
730 res = list(res)
731 self.assertTrue(len(res) == 3)
732 smis = [Chem.MolToSmiles(x, True) for x in res]
733 self.assertTrue('c1ccc(-c2ccccc2)cc1' in smis)
734 self.assertTrue('COc1ccccc1' in smis)
735 self.assertTrue('c1ccc(-n2cccc2)cc1' in smis, smis)
736
738 allNodes = set()
739 frags = [
740 '[16*]c1ccccc1',
741 '[3*]OC',
742 '[3*]OCC(=O)[6*]',
743 ]
744 frags = [Chem.MolFromSmiles(x) for x in frags]
745 res = BRICSBuild(frags)
746 self.assertTrue(res)
747 res = list(res)
748 smis = [Chem.MolToSmiles(x, True) for x in res]
749 self.assertTrue(len(res) == 3)
750 self.assertTrue('c1ccc(-c2ccccc2)cc1' in smis)
751 self.assertTrue('COc1ccccc1' in smis)
752 self.assertTrue('O=C(COc1ccccc1)c1ccccc1' in smis)
753
755 random.seed(23)
756 base = Chem.MolFromSmiles("n1cncnc1OCC(C1CC1)OC1CNC1")
757 catalog = BRICSDecompose(base)
758 self.assertTrue(len(catalog) == 5, catalog)
759 catalog = [Chem.MolFromSmiles(x) for x in catalog]
760 ms = list(BRICSBuild(catalog, maxDepth=4))
761 for m in ms:
762 Chem.SanitizeMol(m)
763 ms = [Chem.MolToSmiles(x) for x in ms]
764 self.assertEqual(len(ms), 36)
765
766 ts = ['n1cnc(C2CNC2)nc1', 'n1cnc(-c2ncncn2)nc1', 'C(OC1CNC1)C(C1CC1)OC1CNC1',
767 'n1cnc(OC(COC2CNC2)C2CC2)nc1', 'n1cnc(OCC(OC2CNC2)C2CNC2)nc1']
768 ts = [Chem.MolToSmiles(Chem.MolFromSmiles(x), True) for x in ts]
769 for t in ts:
770 self.assertTrue(t in ms, (t, ms))
771
773 m = Chem.MolFromSmiles('CCOc1ccccc1c1ncc(c2nc(NCCCC)ncn2)cc1')
774 res = BRICSDecompose(m)
775 self.assertEqual(len(res), 7)
776 self.assertTrue('[3*]O[3*]' in res)
777 self.assertFalse('[14*]c1ncnc(NCCCC)n1' in res)
778 res = BRICSDecompose(m, singlePass=True)
779 self.assertEqual(len(res), 13)
780 self.assertTrue('[3*]OCC' in res)
781 self.assertTrue('[14*]c1ncnc(NCCCC)n1' in res)
782
784 m = Chem.MolFromSmiles('C1CCCCN1c1ccccc1')
785 res = BRICSDecompose(m)
786 self.assertEqual(len(res), 2, res)
787
789
790 molblock = """
791 RDKit 3D
792
793 13 14 0 0 0 0 0 0 0 0999 V2000
794 -1.2004 0.5900 0.6110 C 0 0 0 0 0 0 0 0 0 0 0 0
795 -2.2328 1.3173 0.0343 C 0 0 0 0 0 0 0 0 0 0 0 0
796 -3.4299 0.6533 -0.1500 C 0 0 0 0 0 0 0 0 0 0 0 0
797 -3.3633 -0.7217 -0.3299 C 0 0 0 0 0 0 0 0 0 0 0 0
798 -2.1552 -1.3791 -0.2207 C 0 0 0 0 0 0 0 0 0 0 0 0
799 -1.1425 -0.7969 0.5335 C 0 0 0 0 0 0 0 0 0 0 0 0
800 0.1458 -1.4244 0.4108 O 0 0 0 0 0 0 0 0 0 0 0 0
801 1.2976 -0.7398 -0.1026 C 0 0 0 0 0 0 0 0 0 0 0 0
802 2.4889 -0.7939 0.5501 N 0 0 0 0 0 0 0 0 0 0 0 0
803 3.4615 0.1460 0.3535 C 0 0 0 0 0 0 0 0 0 0 0 0
804 3.0116 1.4034 -0.0296 C 0 0 0 0 0 0 0 0 0 0 0 0
805 1.9786 1.4264 -0.9435 C 0 0 0 0 0 0 0 0 0 0 0 0
806 1.1399 0.3193 -0.9885 C 0 0 0 0 0 0 0 0 0 0 0 0
807 1 2 2 0
808 2 3 1 0
809 3 4 2 0
810 4 5 1 0
811 5 6 2 0
812 6 7 1 0
813 7 8 1 0
814 8 9 2 0
815 9 10 1 0
816 10 11 2 0
817 11 12 1 0
818 12 13 2 0
819 6 1 1 0
820 13 8 1 0
821 M END
822 """
823 m = Chem.MolFromMolBlock(molblock)
824 pieces = BreakBRICSBonds(m)
825
826 frags = Chem.GetMolFrags(pieces, asMols=True)
827 self.assertEqual(len(frags), 3)
828 self.assertEqual(frags[0].GetNumAtoms(), 7)
829 self.assertEqual(frags[1].GetNumAtoms(), 3)
830 self.assertEqual(frags[2].GetNumAtoms(), 7)
831
832 c1 = m.GetConformer()
833 c2 = frags[0].GetConformer()
834 for i in range(6):
835 p1 = c1.GetAtomPosition(i)
836 p2 = c2.GetAtomPosition(i)
837 self.assertEqual((p1 - p2).Length(), 0.0)
838 p1 = c1.GetAtomPosition(6)
839 p2 = c2.GetAtomPosition(6)
840 self.assertEqual((p1 - p2).Length(), 0.0)
841
842 c2 = frags[2].GetConformer()
843 for i in range(6):
844 p1 = c1.GetAtomPosition(i + 7)
845 p2 = c2.GetAtomPosition(i)
846 self.assertEqual((p1 - p2).Length(), 0.0)
847 p1 = c1.GetAtomPosition(6)
848 p2 = c2.GetAtomPosition(6)
849 self.assertEqual((p1 - p2).Length(), 0.0)
850
851 c2 = frags[1].GetConformer()
852 for i in range(1):
853 p1 = c1.GetAtomPosition(i + 6)
854 p2 = c2.GetAtomPosition(i)
855 self.assertEqual((p1 - p2).Length(), 0.0)
856 p1 = c1.GetAtomPosition(5)
857 p2 = c2.GetAtomPosition(1)
858 self.assertEqual((p1 - p2).Length(), 0.0)
859 p1 = c1.GetAtomPosition(6)
860 p2 = c2.GetAtomPosition(0)
861 self.assertEqual((p1 - p2).Length(), 0.0)
862
863
864 molblock = """
865 RDKit 2D
866
867 13 14 0 0 0 0 0 0 0 0999 V2000
868 -1.2990 -0.8654 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
869 -2.5981 -1.6154 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
870 -3.8971 -0.8654 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
871 -3.8971 0.6346 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
872 -2.5981 1.3846 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
873 -1.2990 0.6346 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
874 -0.0000 1.3846 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
875 1.2990 0.6346 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
876 1.2990 -0.8654 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
877 2.5981 -1.6154 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
878 3.8971 -0.8654 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
879 3.8971 0.6346 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
880 2.5981 1.3846 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
881 1 2 2 0
882 2 3 1 0
883 3 4 2 0
884 4 5 1 0
885 5 6 2 0
886 6 7 1 0
887 7 8 1 0
888 8 9 2 0
889 9 10 1 0
890 10 11 2 0
891 11 12 1 0
892 12 13 2 0
893 6 1 1 0
894 13 8 1 0
895 M END
896 """
897 m2 = Chem.MolFromMolBlock(molblock)
898 m.AddConformer(m2.GetConformer(), assignId=True)
899 self.assertEqual(m.GetNumConformers(), 2)
900
901 pieces = BreakBRICSBonds(m)
902 frags = Chem.GetMolFrags(pieces, asMols=True)
903 self.assertEqual(len(frags), 3)
904 self.assertEqual(frags[0].GetNumAtoms(), 7)
905 self.assertEqual(frags[1].GetNumAtoms(), 3)
906 self.assertEqual(frags[2].GetNumAtoms(), 7)
907 self.assertEqual(frags[0].GetNumConformers(), 2)
908 self.assertEqual(frags[1].GetNumConformers(), 2)
909 self.assertEqual(frags[2].GetNumConformers(), 2)
910
911 c1 = m.GetConformer(0)
912 c2 = frags[0].GetConformer(0)
913 for i in range(6):
914 p1 = c1.GetAtomPosition(i)
915 p2 = c2.GetAtomPosition(i)
916 self.assertEqual((p1 - p2).Length(), 0.0)
917 p1 = c1.GetAtomPosition(6)
918 p2 = c2.GetAtomPosition(6)
919 self.assertEqual((p1 - p2).Length(), 0.0)
920
921 c2 = frags[2].GetConformer(0)
922 for i in range(6):
923 p1 = c1.GetAtomPosition(i + 7)
924 p2 = c2.GetAtomPosition(i)
925 self.assertEqual((p1 - p2).Length(), 0.0)
926 p1 = c1.GetAtomPosition(6)
927 p2 = c2.GetAtomPosition(6)
928 self.assertEqual((p1 - p2).Length(), 0.0)
929
930 c2 = frags[1].GetConformer(0)
931 for i in range(1):
932 p1 = c1.GetAtomPosition(i + 6)
933 p2 = c2.GetAtomPosition(i)
934 self.assertEqual((p1 - p2).Length(), 0.0)
935 p1 = c1.GetAtomPosition(5)
936 p2 = c2.GetAtomPosition(1)
937 self.assertEqual((p1 - p2).Length(), 0.0)
938 p1 = c1.GetAtomPosition(6)
939 p2 = c2.GetAtomPosition(0)
940 self.assertEqual((p1 - p2).Length(), 0.0)
941
942 c1 = m.GetConformer(1)
943 c2 = frags[0].GetConformer(1)
944 for i in range(6):
945 p1 = c1.GetAtomPosition(i)
946 p2 = c2.GetAtomPosition(i)
947 self.assertEqual((p1 - p2).Length(), 0.0)
948 p1 = c1.GetAtomPosition(6)
949 p2 = c2.GetAtomPosition(6)
950 self.assertEqual((p1 - p2).Length(), 0.0)
951
952 c2 = frags[2].GetConformer(1)
953 for i in range(6):
954 p1 = c1.GetAtomPosition(i + 7)
955 p2 = c2.GetAtomPosition(i)
956 self.assertEqual((p1 - p2).Length(), 0.0)
957 p1 = c1.GetAtomPosition(6)
958 p2 = c2.GetAtomPosition(6)
959 self.assertEqual((p1 - p2).Length(), 0.0)
960
961 c2 = frags[1].GetConformer(1)
962 for i in range(1):
963 p1 = c1.GetAtomPosition(i + 6)
964 p2 = c2.GetAtomPosition(i)
965 self.assertEqual((p1 - p2).Length(), 0.0)
966 p1 = c1.GetAtomPosition(5)
967 p2 = c2.GetAtomPosition(1)
968 self.assertEqual((p1 - p2).Length(), 0.0)
969 p1 = c1.GetAtomPosition(6)
970 p2 = c2.GetAtomPosition(0)
971 self.assertEqual((p1 - p2).Length(), 0.0)
972
974 m = Chem.MolFromSmiles('CCS(=O)(=O)NCC')
975 res = list(FindBRICSBonds(m))
976 self.assertEqual(len(res), 2, res)
977 atIds = [x[0] for x in res]
978 atIds.sort()
979 self.assertEqual(atIds, [(5, 2), (6, 5)])
980
981 failed, tried = _test()
982 if failed:
983 sys.exit(failed)
984
985 unittest.main()
986