Package rdkit :: Package Chem :: Module EnumerateStereoisomers
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.EnumerateStereoisomers

  1  import six 
  2  import random 
  3  from rdkit import Chem 
  4  from rdkit.Chem.rdDistGeom import EmbedMolecule 
  5   
6 -class StereoEnumerationOptions(object):
7 """ 8 - tryEmbedding: if set the process attempts to generate a standard RDKit distance geometry 9 conformation for the stereisomer. If this fails, we assume that the stereoisomer is 10 non-physical and don't return it. NOTE that this is computationally expensive and is 11 just a heuristic that could result in stereoisomers being lost. 12 13 - onlyUnassigned: if set (the default), stereocenters which have specified stereochemistry 14 will not be perturbed 15 16 - maxIsomers: the maximum number of isomers to yield, if the 17 number of possible isomers is greater than maxIsomers, a 18 random subset will be yielded. If 0, all isomers are 19 yielded. Since every additional stereo center doubles the 20 number of results (and execution time) it's important to 21 keep an eye on this. 22 """ 23 __slots__ = ('tryEmbedding', 'onlyUnassigned', 'maxIsomers', 'rand')
24 - def __init__(self, tryEmbedding = False, onlyUnassigned = True, 25 maxIsomers = 1024, rand = None):
26 self.tryEmbedding = tryEmbedding 27 self.onlyUnassigned = onlyUnassigned 28 self.maxIsomers = maxIsomers 29 self.rand = rand
30
31 -class _BondFlipper(object):
32 - def __init__(self, bond):
33 self.bond = bond
34
35 - def flip(self, flag):
36 if flag: 37 self.bond.SetStereo(Chem.BondStereo.STEREOCIS) 38 else: 39 self.bond.SetStereo(Chem.BondStereo.STEREOTRANS)
40
41 -class _AtomFlipper(object):
42 - def __init__(self, atom):
43 self.atom = atom
44
45 - def flip(self, flag):
46 if flag: 47 self.atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CW) 48 else: 49 self.atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CCW)
50
51 -def _getFlippers(mol, options):
52 Chem.FindPotentialStereoBonds(mol) 53 54 flippers = [] 55 for atom in mol.GetAtoms(): 56 if atom.HasProp("_ChiralityPossible"): 57 if (not options.onlyUnassigned or 58 atom.GetChiralTag() == Chem.ChiralType.CHI_UNSPECIFIED): 59 flippers.append(_AtomFlipper(atom)) 60 61 for bond in mol.GetBonds(): 62 bstereo = bond.GetStereo() 63 if bstereo != Chem.BondStereo.STEREONONE: 64 if (not options.onlyUnassigned or 65 bstereo == Chem.BondStereo.STEREOANY): 66 flippers.append(_BondFlipper(bond)) 67 68 return flippers
69
70 -class _RangeBitsGenerator(object):
71 - def __init__(self, nCenters):
72 self.nCenters = nCenters
73
74 - def __iter__(self):
75 for val in six.moves.range(2**self.nCenters): 76 yield val
77
78 -class _UniqueRandomBitsGenerator(object):
79 - def __init__(self, nCenters, maxIsomers, rand):
80 self.nCenters = nCenters 81 self.maxIsomers = maxIsomers 82 self.rand = rand 83 self.already_seen = set()
84
85 - def __iter__(self):
86 # note: important that this is not 'while True' otherwise it 87 # would be possible to have an infinite loop caused by all 88 # isomers failing the embedding process 89 while len(self.already_seen) < 2**self.nCenters: 90 bits = self.rand.getrandbits(self.nCenters) 91 if bits in self.already_seen: 92 continue 93 94 self.already_seen.add(bits) 95 yield bits
96
97 -def EnumerateStereoisomers(m, options=StereoEnumerationOptions(), verbose=False):
98 """ returns a generator that yields possible stereoisomers for a molecule 99 100 Arguments: 101 - m: the molecule to work with 102 - verbose: toggles how verbose the output is 103 104 A small example with 3 chiral atoms and 1 chiral bond (16 theoretical stereoisomers): 105 >>> from rdkit import Chem 106 >>> from rdkit.Chem.EnumerateStereoisomers import EnumerateStereoisomers, StereoEnumerationOptions 107 >>> m = Chem.MolFromSmiles('BrC=CC1OC(C2)(F)C2(Cl)C1') 108 >>> isomers = tuple(EnumerateStereoisomers(m)) 109 >>> len(isomers) 110 16 111 >>> for smi in sorted(Chem.MolToSmiles(x, isomericSmiles=True) for x in isomers): 112 ... print(smi) 113 ... 114 F[C@@]12C[C@@]1(Cl)C[C@@H](/C=C/Br)O2 115 F[C@@]12C[C@@]1(Cl)C[C@@H](/C=C\Br)O2 116 F[C@@]12C[C@@]1(Cl)C[C@H](/C=C/Br)O2 117 F[C@@]12C[C@@]1(Cl)C[C@H](/C=C\Br)O2 118 F[C@@]12C[C@]1(Cl)C[C@@H](/C=C/Br)O2 119 F[C@@]12C[C@]1(Cl)C[C@@H](/C=C\Br)O2 120 F[C@@]12C[C@]1(Cl)C[C@H](/C=C/Br)O2 121 F[C@@]12C[C@]1(Cl)C[C@H](/C=C\Br)O2 122 F[C@]12C[C@@]1(Cl)C[C@@H](/C=C/Br)O2 123 F[C@]12C[C@@]1(Cl)C[C@@H](/C=C\Br)O2 124 F[C@]12C[C@@]1(Cl)C[C@H](/C=C/Br)O2 125 F[C@]12C[C@@]1(Cl)C[C@H](/C=C\Br)O2 126 F[C@]12C[C@]1(Cl)C[C@@H](/C=C/Br)O2 127 F[C@]12C[C@]1(Cl)C[C@@H](/C=C\Br)O2 128 F[C@]12C[C@]1(Cl)C[C@H](/C=C/Br)O2 129 F[C@]12C[C@]1(Cl)C[C@H](/C=C\Br)O2 130 131 Because the molecule is constrained, not all of those isomers can 132 actually exist. We can check that: 133 >>> opts = StereoEnumerationOptions(tryEmbedding=True) 134 >>> isomers = tuple(EnumerateStereoisomers(m, options=opts)) 135 >>> len(isomers) 136 8 137 >>> for smi in sorted(Chem.MolToSmiles(x,isomericSmiles=True) for x in isomers): 138 ... print(smi) 139 ... 140 F[C@@]12C[C@]1(Cl)C[C@@H](/C=C/Br)O2 141 F[C@@]12C[C@]1(Cl)C[C@@H](/C=C\Br)O2 142 F[C@@]12C[C@]1(Cl)C[C@H](/C=C/Br)O2 143 F[C@@]12C[C@]1(Cl)C[C@H](/C=C\Br)O2 144 F[C@]12C[C@@]1(Cl)C[C@@H](/C=C/Br)O2 145 F[C@]12C[C@@]1(Cl)C[C@@H](/C=C\Br)O2 146 F[C@]12C[C@@]1(Cl)C[C@H](/C=C/Br)O2 147 F[C@]12C[C@@]1(Cl)C[C@H](/C=C\Br)O2 148 149 By default the code only expands unspecified stereocenters: 150 >>> m = Chem.MolFromSmiles('BrC=C[C@H]1OC(C2)(F)C2(Cl)C1') 151 >>> isomers = tuple(EnumerateStereoisomers(m)) 152 >>> len(isomers) 153 8 154 >>> for smi in sorted(Chem.MolToSmiles(x,isomericSmiles=True) for x in isomers): 155 ... print(smi) 156 ... 157 F[C@@]12C[C@@]1(Cl)C[C@@H](/C=C/Br)O2 158 F[C@@]12C[C@@]1(Cl)C[C@@H](/C=C\Br)O2 159 F[C@@]12C[C@]1(Cl)C[C@@H](/C=C/Br)O2 160 F[C@@]12C[C@]1(Cl)C[C@@H](/C=C\Br)O2 161 F[C@]12C[C@@]1(Cl)C[C@@H](/C=C/Br)O2 162 F[C@]12C[C@@]1(Cl)C[C@@H](/C=C\Br)O2 163 F[C@]12C[C@]1(Cl)C[C@@H](/C=C/Br)O2 164 F[C@]12C[C@]1(Cl)C[C@@H](/C=C\Br)O2 165 166 But we can change that behavior: 167 >>> opts = StereoEnumerationOptions(onlyUnassigned=False) 168 >>> isomers = tuple(EnumerateStereoisomers(m, options=opts)) 169 >>> len(isomers) 170 16 171 172 Since the result is a generator, we can allow exploring at least parts of very 173 large result sets: 174 >>> m = Chem.MolFromSmiles('Br' + '[CH](Cl)' * 20 + 'F') 175 >>> opts = StereoEnumerationOptions(maxIsomers=0) 176 >>> isomers = EnumerateStereoisomers(m, options=opts) 177 >>> for x in range(5): 178 ... print(Chem.MolToSmiles(next(isomers),isomericSmiles=True)) 179 F[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)Br 180 F[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)Br 181 F[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@@H](Cl)Br 182 F[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@H](Cl)Br 183 F[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@@H](Cl)[C@@H](Cl)Br 184 185 Or randomly sample a small subset: 186 >>> m = Chem.MolFromSmiles('Br' + '[CH](Cl)' * 20 + 'F') 187 >>> opts = StereoEnumerationOptions(maxIsomers=3) 188 >>> isomers = EnumerateStereoisomers(m, options=opts) 189 >>> for smi in sorted(Chem.MolToSmiles(x, isomericSmiles=True) for x in isomers): 190 ... print(smi) 191 F[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@@H](Cl)[C@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@H](Cl)[C@H](Cl)[C@H](Cl)[C@H](Cl)[C@@H](Cl)Br 192 F[C@@H](Cl)[C@H](Cl)[C@@H](Cl)[C@H](Cl)[C@@H](Cl)[C@H](Cl)[C@H](Cl)[C@H](Cl)[C@H](Cl)[C@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@@H](Cl)Br 193 F[C@H](Cl)[C@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@H](Cl)[C@H](Cl)[C@H](Cl)[C@H](Cl)[C@@H](Cl)[C@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@@H](Cl)Br 194 """ 195 196 tm = Chem.Mol(m) 197 flippers = _getFlippers(tm, options) 198 nCenters = len(flippers) 199 if not nCenters: 200 yield tm 201 return 202 203 if (options.maxIsomers == 0 or 204 2**nCenters <= options.maxIsomers): 205 bitsource = _RangeBitsGenerator(nCenters) 206 else: 207 if options.rand is None: 208 # deterministic random seed invariant to input atom order 209 seed = hash(tuple(sorted([(a.GetDegree(), a.GetAtomicNum()) for a in tm.GetAtoms()]))) 210 rand = random.Random(seed) 211 elif isinstance(options.rand, random.Random): 212 # other implementations of Python random number generators 213 # can inherit from this class to pick up utility methods 214 rand = options.rand 215 else: 216 rand = random.Random(options.rand) 217 218 bitsource = _UniqueRandomBitsGenerator(nCenters, options.maxIsomers, rand) 219 220 numIsomers = 0 221 for bitflag in bitsource: 222 for i in range(nCenters): 223 flag = bool(bitflag & (1 << i)) 224 flippers[i].flip(flag) 225 226 isomer = Chem.Mol(tm) 227 if options.tryEmbedding: 228 ntm = Chem.AddHs(isomer) 229 cid = EmbedMolecule(ntm, randomSeed=bitflag) 230 if cid >= 0: 231 conf = Chem.Conformer(isomer.GetNumAtoms()) 232 for aid in range(isomer.GetNumAtoms()): 233 conf.SetAtomPosition(aid, ntm.GetConformer().GetAtomPosition(aid)) 234 isomer.AddConformer(conf) 235 else: 236 cid = 1 237 if cid >= 0: 238 yield isomer 239 numIsomers += 1 240 if options.maxIsomers != 0 and numIsomers >= options.maxIsomers: 241 break 242 elif verbose: 243 print("%s failed to embed" % (Chem.MolToSmiles(isomer, isomericSmiles=True)))
244