Package rdkit ::
Package Chem ::
Module EnumerateStereoisomers
|
|
1 import six
2 import random
3 from rdkit import Chem
4 from rdkit.Chem.rdDistGeom import EmbedMolecule
5
7 """
8 - tryEmbedding: if set the process attempts to generate a standard RDKit distance geometry
9 conformation for the stereisomer. If this fails, we assume that the stereoisomer is
10 non-physical and don't return it. NOTE that this is computationally expensive and is
11 just a heuristic that could result in stereoisomers being lost.
12
13 - onlyUnassigned: if set (the default), stereocenters which have specified stereochemistry
14 will not be perturbed
15
16 - maxIsomers: the maximum number of isomers to yield, if the
17 number of possible isomers is greater than maxIsomers, a
18 random subset will be yielded. If 0, all isomers are
19 yielded. Since every additional stereo center doubles the
20 number of results (and execution time) it's important to
21 keep an eye on this.
22 """
23 __slots__ = ('tryEmbedding', 'onlyUnassigned', 'maxIsomers', 'rand')
24 - def __init__(self, tryEmbedding = False, onlyUnassigned = True,
25 maxIsomers = 1024, rand = None):
26 self.tryEmbedding = tryEmbedding
27 self.onlyUnassigned = onlyUnassigned
28 self.maxIsomers = maxIsomers
29 self.rand = rand
30
34
35 - def flip(self, flag):
36 if flag:
37 self.bond.SetStereo(Chem.BondStereo.STEREOCIS)
38 else:
39 self.bond.SetStereo(Chem.BondStereo.STEREOTRANS)
40
44
45 - def flip(self, flag):
46 if flag:
47 self.atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CW)
48 else:
49 self.atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CCW)
50
52 Chem.FindPotentialStereoBonds(mol)
53
54 flippers = []
55 for atom in mol.GetAtoms():
56 if atom.HasProp("_ChiralityPossible"):
57 if (not options.onlyUnassigned or
58 atom.GetChiralTag() == Chem.ChiralType.CHI_UNSPECIFIED):
59 flippers.append(_AtomFlipper(atom))
60
61 for bond in mol.GetBonds():
62 bstereo = bond.GetStereo()
63 if bstereo != Chem.BondStereo.STEREONONE:
64 if (not options.onlyUnassigned or
65 bstereo == Chem.BondStereo.STEREOANY):
66 flippers.append(_BondFlipper(bond))
67
68 return flippers
69
72 self.nCenters = nCenters
73
75 for val in six.moves.range(2**self.nCenters):
76 yield val
77
79 - def __init__(self, nCenters, maxIsomers, rand):
80 self.nCenters = nCenters
81 self.maxIsomers = maxIsomers
82 self.rand = rand
83 self.already_seen = set()
84
86
87
88
89 while len(self.already_seen) < 2**self.nCenters:
90 bits = self.rand.getrandbits(self.nCenters)
91 if bits in self.already_seen:
92 continue
93
94 self.already_seen.add(bits)
95 yield bits
96
98 """ returns a generator that yields possible stereoisomers for a molecule
99
100 Arguments:
101 - m: the molecule to work with
102 - verbose: toggles how verbose the output is
103
104 A small example with 3 chiral atoms and 1 chiral bond (16 theoretical stereoisomers):
105 >>> from rdkit import Chem
106 >>> from rdkit.Chem.EnumerateStereoisomers import EnumerateStereoisomers, StereoEnumerationOptions
107 >>> m = Chem.MolFromSmiles('BrC=CC1OC(C2)(F)C2(Cl)C1')
108 >>> isomers = tuple(EnumerateStereoisomers(m))
109 >>> len(isomers)
110 16
111 >>> for smi in sorted(Chem.MolToSmiles(x, isomericSmiles=True) for x in isomers):
112 ... print(smi)
113 ...
114 F[C@@]12C[C@@]1(Cl)C[C@@H](/C=C/Br)O2
115 F[C@@]12C[C@@]1(Cl)C[C@@H](/C=C\Br)O2
116 F[C@@]12C[C@@]1(Cl)C[C@H](/C=C/Br)O2
117 F[C@@]12C[C@@]1(Cl)C[C@H](/C=C\Br)O2
118 F[C@@]12C[C@]1(Cl)C[C@@H](/C=C/Br)O2
119 F[C@@]12C[C@]1(Cl)C[C@@H](/C=C\Br)O2
120 F[C@@]12C[C@]1(Cl)C[C@H](/C=C/Br)O2
121 F[C@@]12C[C@]1(Cl)C[C@H](/C=C\Br)O2
122 F[C@]12C[C@@]1(Cl)C[C@@H](/C=C/Br)O2
123 F[C@]12C[C@@]1(Cl)C[C@@H](/C=C\Br)O2
124 F[C@]12C[C@@]1(Cl)C[C@H](/C=C/Br)O2
125 F[C@]12C[C@@]1(Cl)C[C@H](/C=C\Br)O2
126 F[C@]12C[C@]1(Cl)C[C@@H](/C=C/Br)O2
127 F[C@]12C[C@]1(Cl)C[C@@H](/C=C\Br)O2
128 F[C@]12C[C@]1(Cl)C[C@H](/C=C/Br)O2
129 F[C@]12C[C@]1(Cl)C[C@H](/C=C\Br)O2
130
131 Because the molecule is constrained, not all of those isomers can
132 actually exist. We can check that:
133 >>> opts = StereoEnumerationOptions(tryEmbedding=True)
134 >>> isomers = tuple(EnumerateStereoisomers(m, options=opts))
135 >>> len(isomers)
136 8
137 >>> for smi in sorted(Chem.MolToSmiles(x,isomericSmiles=True) for x in isomers):
138 ... print(smi)
139 ...
140 F[C@@]12C[C@]1(Cl)C[C@@H](/C=C/Br)O2
141 F[C@@]12C[C@]1(Cl)C[C@@H](/C=C\Br)O2
142 F[C@@]12C[C@]1(Cl)C[C@H](/C=C/Br)O2
143 F[C@@]12C[C@]1(Cl)C[C@H](/C=C\Br)O2
144 F[C@]12C[C@@]1(Cl)C[C@@H](/C=C/Br)O2
145 F[C@]12C[C@@]1(Cl)C[C@@H](/C=C\Br)O2
146 F[C@]12C[C@@]1(Cl)C[C@H](/C=C/Br)O2
147 F[C@]12C[C@@]1(Cl)C[C@H](/C=C\Br)O2
148
149 By default the code only expands unspecified stereocenters:
150 >>> m = Chem.MolFromSmiles('BrC=C[C@H]1OC(C2)(F)C2(Cl)C1')
151 >>> isomers = tuple(EnumerateStereoisomers(m))
152 >>> len(isomers)
153 8
154 >>> for smi in sorted(Chem.MolToSmiles(x,isomericSmiles=True) for x in isomers):
155 ... print(smi)
156 ...
157 F[C@@]12C[C@@]1(Cl)C[C@@H](/C=C/Br)O2
158 F[C@@]12C[C@@]1(Cl)C[C@@H](/C=C\Br)O2
159 F[C@@]12C[C@]1(Cl)C[C@@H](/C=C/Br)O2
160 F[C@@]12C[C@]1(Cl)C[C@@H](/C=C\Br)O2
161 F[C@]12C[C@@]1(Cl)C[C@@H](/C=C/Br)O2
162 F[C@]12C[C@@]1(Cl)C[C@@H](/C=C\Br)O2
163 F[C@]12C[C@]1(Cl)C[C@@H](/C=C/Br)O2
164 F[C@]12C[C@]1(Cl)C[C@@H](/C=C\Br)O2
165
166 But we can change that behavior:
167 >>> opts = StereoEnumerationOptions(onlyUnassigned=False)
168 >>> isomers = tuple(EnumerateStereoisomers(m, options=opts))
169 >>> len(isomers)
170 16
171
172 Since the result is a generator, we can allow exploring at least parts of very
173 large result sets:
174 >>> m = Chem.MolFromSmiles('Br' + '[CH](Cl)' * 20 + 'F')
175 >>> opts = StereoEnumerationOptions(maxIsomers=0)
176 >>> isomers = EnumerateStereoisomers(m, options=opts)
177 >>> for x in range(5):
178 ... print(Chem.MolToSmiles(next(isomers),isomericSmiles=True))
179 F[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)Br
180 F[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)Br
181 F[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@@H](Cl)Br
182 F[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@H](Cl)Br
183 F[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@@H](Cl)[C@@H](Cl)Br
184
185 Or randomly sample a small subset:
186 >>> m = Chem.MolFromSmiles('Br' + '[CH](Cl)' * 20 + 'F')
187 >>> opts = StereoEnumerationOptions(maxIsomers=3)
188 >>> isomers = EnumerateStereoisomers(m, options=opts)
189 >>> for smi in sorted(Chem.MolToSmiles(x, isomericSmiles=True) for x in isomers):
190 ... print(smi)
191 F[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@@H](Cl)[C@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@H](Cl)[C@H](Cl)[C@H](Cl)[C@H](Cl)[C@@H](Cl)Br
192 F[C@@H](Cl)[C@H](Cl)[C@@H](Cl)[C@H](Cl)[C@@H](Cl)[C@H](Cl)[C@H](Cl)[C@H](Cl)[C@H](Cl)[C@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@@H](Cl)Br
193 F[C@H](Cl)[C@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@H](Cl)[C@H](Cl)[C@H](Cl)[C@H](Cl)[C@@H](Cl)[C@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@@H](Cl)Br
194 """
195
196 tm = Chem.Mol(m)
197 flippers = _getFlippers(tm, options)
198 nCenters = len(flippers)
199 if not nCenters:
200 yield tm
201 return
202
203 if (options.maxIsomers == 0 or
204 2**nCenters <= options.maxIsomers):
205 bitsource = _RangeBitsGenerator(nCenters)
206 else:
207 if options.rand is None:
208
209 seed = hash(tuple(sorted([(a.GetDegree(), a.GetAtomicNum()) for a in tm.GetAtoms()])))
210 rand = random.Random(seed)
211 elif isinstance(options.rand, random.Random):
212
213
214 rand = options.rand
215 else:
216 rand = random.Random(options.rand)
217
218 bitsource = _UniqueRandomBitsGenerator(nCenters, options.maxIsomers, rand)
219
220 numIsomers = 0
221 for bitflag in bitsource:
222 for i in range(nCenters):
223 flag = bool(bitflag & (1 << i))
224 flippers[i].flip(flag)
225
226 isomer = Chem.Mol(tm)
227 if options.tryEmbedding:
228 ntm = Chem.AddHs(isomer)
229 cid = EmbedMolecule(ntm, randomSeed=bitflag)
230 if cid >= 0:
231 conf = Chem.Conformer(isomer.GetNumAtoms())
232 for aid in range(isomer.GetNumAtoms()):
233 conf.SetAtomPosition(aid, ntm.GetConformer().GetAtomPosition(aid))
234 isomer.AddConformer(conf)
235 else:
236 cid = 1
237 if cid >= 0:
238 yield isomer
239 numIsomers += 1
240 if options.maxIsomers != 0 and numIsomers >= options.maxIsomers:
241 break
242 elif verbose:
243 print("%s failed to embed" % (Chem.MolToSmiles(isomer, isomericSmiles=True)))
244