1
2
3
4
5
6 from rdkit import Chem
7 from rdkit.VLib.Transform import TransformNode
8
9
11 """ transforms molecules by removing atoms matching smarts patterns
12
13 Assumptions:
14
15 - inputs are molecules
16
17
18 Sample Usage:
19 >>> smis = ['C1CCC1.C=O','C1CCC1C=O','CCC=O.C=O','NCC=O.C=O.CN']
20 >>> mols = [Chem.MolFromSmiles(x) for x in smis]
21 >>> from rdkit.VLib.Supply import SupplyNode
22 >>> suppl = SupplyNode(contents=mols)
23 >>> ms = [x for x in suppl]
24 >>> len(ms)
25 4
26
27 We can pass in SMARTS strings:
28 >>> smas = ['C=O','CN']
29 >>> tform = SmartsRemover(patterns=smas)
30 >>> tform.AddParent(suppl)
31 >>> ms = [x for x in tform]
32 >>> len(ms)
33 4
34 >>> Chem.MolToSmiles(ms[0])
35 'C1CCC1'
36 >>> Chem.MolToSmiles(ms[1])
37 'O=CC1CCC1'
38 >>> Chem.MolToSmiles(ms[2])
39 'CCC=O'
40 >>> Chem.MolToSmiles(ms[3])
41 'NCC=O'
42
43 We can also remove pieces of the molecule that are not complete
44 fragments:
45 >>> tform.Destroy()
46 >>> smas = ['C=O','CN']
47 >>> smas = [Chem.MolFromSmarts(x) for x in smas]
48 >>> tform = SmartsRemover(patterns=smas,wholeFragments=0)
49 >>> tform.AddParent(suppl)
50 >>> ms = [x for x in tform]
51 >>> len(ms)
52 4
53 >>> Chem.MolToSmiles(ms[0])
54 'C1CCC1'
55 >>> Chem.MolToSmiles(ms[1])
56 'C1CCC1'
57 >>> Chem.MolToSmiles(ms[3])
58 ''
59
60 Or patterns themselves:
61 >>> tform.Destroy()
62 >>> smas = ['C=O','CN']
63 >>> smas = [Chem.MolFromSmarts(x) for x in smas]
64 >>> tform = SmartsRemover(patterns=smas)
65 >>> tform.AddParent(suppl)
66 >>> ms = [x for x in tform]
67 >>> len(ms)
68 4
69 >>> Chem.MolToSmiles(ms[0])
70 'C1CCC1'
71 >>> Chem.MolToSmiles(ms[3])
72 'NCC=O'
73
74
75 """
76
77 - def __init__(self, patterns=[], wholeFragments=1, **kwargs):
81
83 nPatts = len(patterns)
84 targets = [None] * nPatts
85 for i in range(nPatts):
86 p = patterns[i]
87 if type(p) in (str, bytes):
88 m = Chem.MolFromSmarts(p)
89 if not m:
90 raise ValueError('bad smarts: %s' % (p))
91 p = m
92 targets[i] = p
93 self._patterns = tuple(targets)
94
102
103
104 biggerTest = """
105 >>> smis = ['CCOC','CCO.Cl','CC(=O)[O-].[Na+]','OCC','C[N+](C)(C)C.[Cl-]']
106 >>> mols = [Chem.MolFromSmiles(x) for x in smis]
107 >>> from rdkit.VLib.Supply import SupplyNode
108 >>> suppl = SupplyNode(contents=mols)
109 >>> ms = [x for x in suppl]
110 >>> len(ms)
111 5
112
113 #>>> salts = ['[Cl;H1&X1,-]','[Na+]','[O;H2,H1&-,X0&-2]']
114
115 >>> salts = ['[Cl;H1&X1,-]','[Na+]','[O;H2,H1&-,X0&-2]']
116 >>> m = mols[2]
117 >>> m.GetNumAtoms()
118 5
119 >>> patts = [Chem.MolFromSmarts(x) for x in salts]
120 >>> m2 = Chem.DeleteSubstructs(m,patts[0],1)
121 >>> m2.GetNumAtoms()
122 5
123 >>> m2 = Chem.DeleteSubstructs(m2,patts[1],1)
124 >>> m2.GetNumAtoms()
125 4
126 >>> m2 = Chem.DeleteSubstructs(m2,patts[2],1)
127 >>> m2.GetNumAtoms()
128 4
129
130 >>> tform = SmartsRemover(patterns=salts)
131 >>> tform.AddParent(suppl)
132 >>> ms = [x for x in tform]
133 >>> len(ms)
134 5
135
136 """
137
138
139
140
141
142 __test__ = {'bigger': biggerTest}
143
144
145
146
147
148
150 import sys
151 import doctest
152 failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose)
153 sys.exit(failed)
154
155
156 if __name__ == '__main__':
157 _runDoctests()
158