1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32 from __future__ import print_function
33
34 import os
35
36 from rdkit import Chem
37 from rdkit import RDConfig
38 from rdkit.Chem import AllChem
39 from rdkit.Chem import rdChemReactions
40
41
43 """
44 >>> from rdkit.Chem import AllChem
45 >>> testFile = os.path.join(RDConfig.RDCodeDir,'Chem','SimpleEnum','test_data','boronic1.rxn')
46 >>> rxn = AllChem.ReactionFromRxnFile(testFile)
47 >>> rxn.Initialize()
48 >>> nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn)
49 >>> nWarn
50 0
51 >>> nError
52 0
53 >>> nReacts
54 2
55 >>> nProds
56 1
57 >>> reactantLabels
58 (((0, 'halogen.bromine.aromatic'),), ((1, 'boronicacid'),))
59
60 If there are functional group labels in the input reaction (via atoms with molFileValue
61 properties), the corresponding atoms will have queries added to them so that they only
62 match such things. We can see this here:
63 >>> rxn = AllChem.ReactionFromRxnFile(testFile)
64 >>> rxn.Initialize()
65 >>> r1 = rxn.GetReactantTemplate(0)
66 >>> m1 = Chem.MolFromSmiles('CCBr')
67 >>> m2 = Chem.MolFromSmiles('c1ccccc1Br')
68
69 These both match because the reaction file itself just has R1-Br:
70 >>> m1.HasSubstructMatch(r1)
71 True
72 >>> m2.HasSubstructMatch(r1)
73 True
74
75 After preprocessing, we only match the aromatic Br:
76 >>> d = PreprocessReaction(rxn)
77 >>> m1.HasSubstructMatch(r1)
78 False
79 >>> m2.HasSubstructMatch(r1)
80 True
81
82 We also support or queries in the values field (separated by commas):
83 >>> testFile = os.path.join(RDConfig.RDCodeDir,'Chem','SimpleEnum','test_data','azide_reaction.rxn')
84 >>> rxn = AllChem.ReactionFromRxnFile(testFile)
85 >>> rxn.Initialize()
86 >>> reactantLabels = PreprocessReaction(rxn)[-1]
87 >>> reactantLabels
88 (((1, 'azide'),), ((1, 'carboxylicacid,acidchloride'),))
89 >>> m1 = Chem.MolFromSmiles('CC(=O)O')
90 >>> m2 = Chem.MolFromSmiles('CC(=O)Cl')
91 >>> m3 = Chem.MolFromSmiles('CC(=O)N')
92 >>> r2 = rxn.GetReactantTemplate(1)
93 >>> m1.HasSubstructMatch(r2)
94 True
95 >>> m2.HasSubstructMatch(r2)
96 True
97 >>> m3.HasSubstructMatch(r2)
98 False
99
100 unrecognized final group types are returned as None:
101 >>> testFile = os.path.join(RDConfig.RDCodeDir,'Chem','SimpleEnum','test_data','bad_value1.rxn')
102 >>> rxn = AllChem.ReactionFromRxnFile(testFile)
103 >>> rxn.Initialize()
104 >>> nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn)
105 Traceback (most recent call last):
106 File "/usr/prog/python/2.6.6_gnu/lib/python2.6/doctest.py", line 1253, in __run
107 compileflags, 1) in test.globs
108 File "<doctest __main__.PreprocessReaction[36]>", line 1, in <module>
109 nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn)
110 File "Enumerator.py", line 105, in PreprocessReaction
111 reactantLabels = reaction.AddRecursiveQueriesToReaction(queryDict, propName='molFileValue', getLabels=True)
112 RuntimeError: KeyErrorException
113
114 One unrecognized group type in a comma-separated list makes the whole thing fail:
115 >>> testFile = os.path.join(RDConfig.RDCodeDir,'Chem','SimpleEnum','test_data','bad_value2.rxn')
116 >>> rxn = AllChem.ReactionFromRxnFile(testFile)
117 >>> rxn.Initialize()
118 >>> nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn)
119 Traceback (most recent call last):
120 File "/usr/prog/python/2.6.6_gnu/lib/python2.6/doctest.py", line 1253, in __run
121 compileflags, 1) in test.globs
122 File "<doctest __main__.PreprocessReaction[36]>", line 1, in <module>
123 nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn)
124 File "Enumerator.py", line 105, in PreprocessReaction
125 reactantLabels = reaction.AddRecursiveQueriesToReaction(queryDict, propName='molFileValue', getLabels=True)
126 RuntimeError: KeyErrorException
127 >>> testFile = os.path.join(RDConfig.RDCodeDir,'Chem','SimpleEnum','test_data','bad_value3.rxn')
128 >>> rxn = AllChem.ReactionFromRxnFile(testFile)
129 >>> rxn.Initialize()
130 >>> nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn)
131 Traceback (most recent call last):
132 File "/usr/prog/python/2.6.6_gnu/lib/python2.6/doctest.py", line 1253, in __run
133 compileflags, 1) in test.globs
134 File "<doctest __main__.PreprocessReaction[36]>", line 1, in <module>
135 nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn)
136 File "Enumerator.py", line 105, in PreprocessReaction
137 reactantLabels = reaction.AddRecursiveQueriesToReaction(queryDict, propName='molFileValue', getLabels=True)
138 RuntimeError: KeyErrorException
139 >>> rxn = rdChemReactions.ChemicalReaction()
140 >>> rxn.Initialize()
141 >>> nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn)
142 >>> reactantLabels
143 ()
144 >>> reactantLabels == ()
145 True
146 """
147
148 if funcGroupFilename:
149 try:
150 queryDict = Chem.ParseMolQueryDefFile(funcGroupFilename)
151 except Exception:
152 raise IOError('cannot open', funcGroupFilename)
153
154 return rdChemReactions.PreprocessReaction(reaction, queryDict, propName)
155 return rdChemReactions.PreprocessReaction(reaction, propName=propName)
156
157
158 -def EnumerateReaction(
159 reaction, bbLists, uniqueProductsOnly=False,
160 funcGroupFilename=os.path.join(RDConfig.RDDataDir, 'Functional_Group_Hierarchy.txt'),
161 propName='molFileValue'):
162 """
163 >>> testFile = os.path.join(RDConfig.RDCodeDir,'Chem','SimpleEnum','test_data','boronic1.rxn')
164 >>> rxn = AllChem.ReactionFromRxnFile(testFile)
165 >>> rxn.Initialize()
166 >>> reacts1=['Brc1ccccc1','Brc1ncccc1','Brc1cnccc1']
167 >>> reacts1=[Chem.MolFromSmiles(x) for x in reacts1]
168 >>> reacts2=['CCB(O)O','CCCB(O)O']
169 >>> reacts2=[Chem.MolFromSmiles(x) for x in reacts2]
170
171 >>> prods = EnumerateReaction(rxn,(reacts1,reacts2))
172 >>> prods = list(prods)
173
174 This is a bit nasty because of the symmetry of the boronic acid:
175 >>> len(prods)
176 12
177
178 >>> smis = list(set([Chem.MolToSmiles(x[0]) for x in prods]))
179 >>> smis.sort()
180 >>> len(smis)
181 6
182 >>> print(smis)
183 ['CCCc1ccccc1', 'CCCc1ccccn1', 'CCCc1cccnc1', 'CCc1ccccc1', 'CCc1ccccn1', 'CCc1cccnc1']
184
185 The nastiness can be avoided at the cost of some memory by asking for only unique products:
186 >>> prods = EnumerateReaction(rxn,(reacts1,reacts2),uniqueProductsOnly=True)
187 >>> prods = list(prods)
188 >>> len(prods)
189 6
190 >>> print(sorted([Chem.MolToSmiles(x[0]) for x in prods]))
191 ['CCCc1ccccc1', 'CCCc1ccccn1', 'CCCc1cccnc1', 'CCc1ccccc1', 'CCc1ccccn1', 'CCc1cccnc1']
192
193
194 """
195 nWarn, nError, nReacts, nProds, reactantLabels = PreprocessReaction(reaction)
196 if nError:
197 raise ValueError('bad reaction')
198 if len(bbLists) != nReacts:
199 raise ValueError('%d reactants in reaction, %d bb lists supplied' % (nReacts, len(bbLists)))
200
201 def _uniqueOnly(lst):
202 seen = []
203 for entry in lst:
204 if entry:
205 smi = '.'.join(sorted([Chem.MolToSmiles(x, True) for x in entry]))
206 if smi not in seen:
207 seen.append(smi)
208 yield entry
209
210 ps = AllChem.EnumerateLibraryFromReaction(reaction, bbLists)
211 if not uniqueProductsOnly:
212 return ps
213 else:
214 return _uniqueOnly(ps)
215
216
217
218
219
220
222 import sys
223 import doctest
224 failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose)
225 sys.exit(failed)
226
227
228 if __name__ == '__main__':
229 _runDoctests()
230