1
2
3
4
5
6
7
8 """ Torsion Fingerprints (Deviation) (TFD)
9 According to a paper from Schulz-Gasch et al., JCIM, 52, 1499-1512 (2012).
10
11 """
12 from rdkit import rdBase
13 from rdkit import RDConfig
14 from rdkit import Geometry
15 from rdkit import Chem
16 from rdkit.Chem import rdchem
17 from rdkit.Chem import rdMolDescriptors
18 import math, os
19
20
22 """ Helper function to check if all atoms in the list are the same
23
24 Arguments:
25 - inv: atom invariants (used to define equivalence of atoms)
26 - atoms: list of atoms to check
27
28 Return: boolean
29 """
30 match = True
31 for i in range(len(atoms) - 1):
32 for j in range(i + 1, len(atoms)):
33 if (inv[atoms[i].GetIdx()] != inv[atoms[j].GetIdx()]):
34 match = False
35 return match
36 return match
37
38
40 """ Helper function to check if all atoms in the list are NOT the same
41
42 Arguments:
43 - inv: atom invariants (used to define equivalence of atoms)
44 - atoms: list of atoms to check
45
46 Return: boolean
47 """
48 match = True
49 for i in range(len(atoms) - 1):
50 for j in range(i + 1, len(atoms)):
51 if (inv[atoms[i].GetIdx()] == inv[atoms[j].GetIdx()]):
52 match = False
53 return match
54 return match
55
56
58 """ Helper function to check if two atoms in the list are the same,
59 and one not
60 Note: Works only for three atoms
61
62 Arguments:
63 - inv: atom invariants (used to define equivalence of atoms)
64 - atoms: list of atoms to check
65
66 Return: atom that is different
67 """
68 if len(atoms) != 3:
69 raise ValueError("Number of atoms must be three")
70 a1 = atoms[0].GetIdx()
71 a2 = atoms[1].GetIdx()
72 a3 = atoms[2].GetIdx()
73 if (inv[a1] == inv[a2] and inv[a1] != inv[a3] and inv[a2] != inv[a3]):
74 return atoms[2]
75 elif (inv[a1] != inv[a2] and inv[a1] == inv[a3] and inv[a2] != inv[a3]):
76 return atoms[1]
77 elif (inv[a1] != inv[a2] and inv[a1] != inv[a3] and inv[a2] == inv[a3]):
78 return atoms[0]
79 return None
80
81
83 """ Helper function to calculate the atom invariants for each atom
84 with a given radius
85
86 Arguments:
87 - mol: the molecule of interest
88 - radius: the radius for the Morgan fingerprint
89
90 Return: list of atom invariants
91 """
92 inv = []
93 for i in range(mol.GetNumAtoms()):
94 info = {}
95 fp = rdMolDescriptors.GetMorganFingerprint(mol, radius, fromAtoms=[i], bitInfo=info)
96 for k in info.keys():
97 if info[k][0][1] == radius:
98 inv.append(k)
99 return inv
100
101
103 """ Helper function to calculate the number of heavy atom neighbors.
104
105 Arguments:
106 - atom1: the atom of interest
107 - aid2: atom index that should be excluded from neighbors (default: none)
108
109 Return: a list of heavy atom neighbors of the given atom
110 """
111 if aid2 < 0:
112 return [n for n in atom1.GetNeighbors() if n.GetSymbol() != 'H']
113 else:
114 return [n for n in atom1.GetNeighbors() if (n.GetSymbol() != 'H' and n.GetIdx() != aid2)]
115
116
118 """ Helper function to calculate the index of the reference atom for
119 a given atom
120
121 Arguments:
122 - neighbors: list of the neighbors of the atom
123 - inv: atom invariants
124
125 Return: list of atom indices as reference for torsion
126 """
127 if len(neighbors) == 1:
128 return [neighbors[0]]
129 elif _doMatch(inv, neighbors):
130 return neighbors
131 elif _doNotMatch(inv, neighbors):
132
133 neighbors = sorted(neighbors, key=lambda x: inv[x.GetIdx()])
134 return [neighbors[0]]
135 at = _doMatchExcept1(inv, neighbors)
136 if at is None:
137 raise ValueError("Atom neighbors are either all the same or all different")
138 return [at]
139
140
142 """ Determine the bonds (or pair of atoms treated like a bond) for which
143 torsions should be calculated.
144
145 Arguments:
146 - refmol: the molecule of interest
147 - ignoreColinearBonds: if True (default), single bonds adjacent to
148 triple bonds are ignored
149 if False, alternative not-covalently bound
150 atoms are used to define the torsion
151 """
152
153
154 patts = [Chem.MolFromSmarts(x) for x in ['*#*', '[$([C](=*)=*)]']]
155 atomFlags = [0] * mol.GetNumAtoms()
156 for p in patts:
157 if mol.HasSubstructMatch(p):
158 matches = mol.GetSubstructMatches(p)
159 for match in matches:
160 for a in match:
161 atomFlags[a] = 1
162
163 bonds = []
164 doneBonds = [0] * mol.GetNumBonds()
165 for b in mol.GetBonds():
166 if b.IsInRing():
167 continue
168 a1 = b.GetBeginAtomIdx()
169 a2 = b.GetEndAtomIdx()
170 nb1 = _getHeavyAtomNeighbors(b.GetBeginAtom(), a2)
171 nb2 = _getHeavyAtomNeighbors(b.GetEndAtom(), a1)
172 if not doneBonds[b.GetIdx()] and (nb1 and nb2):
173 doneBonds[b.GetIdx()] = 1
174
175 if atomFlags[a1] or atomFlags[a2]:
176 if not ignoreColinearBonds:
177 while len(nb1) == 1 and atomFlags[a1]:
178 a1old = a1
179 a1 = nb1[0].GetIdx()
180 b = mol.GetBondBetweenAtoms(a1old, a1)
181 if b.GetEndAtom().GetIdx() == a1old:
182 nb1 = _getHeavyAtomNeighbors(b.GetBeginAtom(), a1old)
183 else:
184 nb1 = _getHeavyAtomNeighbors(b.GetEndAtom(), a1old)
185 doneBonds[b.GetIdx()] = 1
186 while len(nb2) == 1 and atomFlags[a2]:
187 doneBonds[b.GetIdx()] = 1
188 a2old = a2
189 a2 = nb2[0].GetIdx()
190 b = mol.GetBondBetweenAtoms(a2old, a2)
191 if b.GetBeginAtom().GetIdx() == a2old:
192 nb2 = _getHeavyAtomNeighbors(b.GetEndAtom(), a2old)
193 else:
194 nb2 = _getHeavyAtomNeighbors(b.GetBeginAtom(), a2old)
195 doneBonds[b.GetIdx()] = 1
196 if nb1 and nb2:
197 bonds.append((a1, a2, nb1, nb2))
198
199 else:
200 bonds.append((a1, a2, nb1, nb2))
201 return bonds
202
203
205 """ Calculate a list of torsions for a given molecule. For each torsion
206 the four atom indices are determined and stored in a set.
207
208 Arguments:
209 - mol: the molecule of interest
210 - maxDev: maximal deviation used for normalization
211 'equal': all torsions are normalized using 180.0 (default)
212 'spec': each torsion is normalized using its specific
213 maximal deviation as given in the paper
214 - symmRadius: radius used for calculating the atom invariants
215 (default: 2)
216 - ignoreColinearBonds: if True (default), single bonds adjacent to
217 triple bonds are ignored
218 if False, alternative not-covalently bound
219 atoms are used to define the torsion
220
221 Return: two lists of torsions: non-ring and ring torsions
222 """
223 if maxDev not in ['equal', 'spec']:
224 raise ValueError("maxDev must be either equal or spec")
225
226 bonds = _getBondsForTorsions(mol, ignoreColinearBonds)
227
228 if symmRadius > 0:
229 inv = _getAtomInvariantsWithRadius(mol, symmRadius)
230 else:
231 inv = rdMolDescriptors.GetConnectivityInvariants(mol)
232
233 tors_list = []
234 for a1, a2, nb1, nb2 in bonds:
235 d1 = _getIndexforTorsion(nb1, inv)
236 d2 = _getIndexforTorsion(nb2, inv)
237 if len(d1) == 1 and len(d2) == 1:
238 tors_list.append(([(d1[0].GetIdx(), a1, a2, d2[0].GetIdx())], 180.0))
239 elif len(d1) == 1:
240 if len(nb2) == 2:
241 tors_list.append(([(d1[0].GetIdx(), a1, a2, nb.GetIdx()) for nb in d2], 90.0))
242 else:
243 tors_list.append(([(d1[0].GetIdx(), a1, a2, nb.GetIdx()) for nb in d2], 60.0))
244 elif len(d2) == 1:
245 if len(nb1) == 2:
246 tors_list.append(([(nb.GetIdx(), a1, a2, d2[0].GetIdx()) for nb in d1], 90.0))
247 else:
248 tors_list.append(([(nb.GetIdx(), a1, a2, d2[0].GetIdx()) for nb in d1], 60.0))
249 else:
250 tmp = []
251 for n1 in d1:
252 for n2 in d2:
253 tmp.append((n1.GetIdx(), a1, a2, n2.GetIdx()))
254 if len(nb1) == 2 and len(nb2) == 2:
255 tors_list.append((tmp, 90.0))
256 elif len(nb1) == 3 and len(nb2) == 3:
257 tors_list.append((tmp, 60.0))
258 else:
259 tors_list.append((tmp, 30.0))
260
261 if maxDev == 'equal':
262 tors_list = [(t, 180.0) for t, d in tors_list]
263
264 rings = Chem.GetSymmSSSR(mol)
265 tors_list_rings = []
266 for r in rings:
267
268 tmp = []
269 num = len(r)
270 maxdev = 180.0 * math.exp(-0.025 * (num - 14) * (num - 14))
271 for i in range(len(r)):
272 tmp.append((r[i], r[(i + 1) % num], r[(i + 2) % num], r[(i + 3) % num]))
273 tors_list_rings.append((tmp, maxdev))
274 return tors_list, tors_list_rings
275
276
278 """ Helper function to retrieve the coordinates of the four atoms
279 in a torsion
280
281 Arguments:
282 - atoms: list with the four atoms
283 - conf: conformation of the molecule
284
285 Return: Point3D objects of the four atoms
286 """
287 if len(atoms) != 4:
288 raise ValueError("List must contain exactly four atoms")
289 p1 = conf.GetAtomPosition(atoms[0])
290 p2 = conf.GetAtomPosition(atoms[1])
291 p3 = conf.GetAtomPosition(atoms[2])
292 p4 = conf.GetAtomPosition(atoms[3])
293 return p1, p2, p3, p4
294
295
297 """ Calculate the torsion angles for a list of non-ring and
298 a list of ring torsions.
299
300 Arguments:
301 - mol: the molecule of interest
302 - tors_list: list of non-ring torsions
303 - tors_list_rings: list of ring torsions
304 - confId: index of the conformation (default: first conformer)
305
306 Return: list of torsion angles
307 """
308 torsions = []
309 conf = mol.GetConformer(confId)
310 for quartets, maxdev in tors_list:
311 tors = []
312
313 for atoms in quartets:
314 p1, p2, p3, p4 = _getTorsionAtomPositions(atoms, conf)
315 tmpTors = (Geometry.ComputeSignedDihedralAngle(p1, p2, p3, p4) / math.pi) * 180.0
316 if tmpTors < 0:
317 tmpTors += 360.0
318 tors.append(tmpTors)
319 torsions.append((tors, maxdev))
320
321 for quartets, maxdev in tors_list_rings:
322 num = len(quartets)
323
324 tors = 0
325 for atoms in quartets:
326 p1, p2, p3, p4 = _getTorsionAtomPositions(atoms, conf)
327 tmpTors = abs((Geometry.ComputeSignedDihedralAngle(p1, p2, p3, p4) / math.pi) * 180.0)
328 tors += tmpTors
329 tors /= num
330 torsions.append(([tors], maxdev))
331 return torsions
332
333
335 """ Helper function to identify the atoms of the most central bond.
336
337 Arguments:
338 - mol: the molecule of interest
339 - distmat: distance matrix of the molecule
340
341 Return: atom indices of the two most central atoms (in order)
342 """
343 from numpy import std
344
345 stds = []
346 for i in range(mol.GetNumAtoms()):
347
348 if len(_getHeavyAtomNeighbors(mol.GetAtomWithIdx(i))) < 2:
349 continue
350 tmp = [d for d in distmat[i]]
351 tmp.pop(i)
352 stds.append((std(tmp), i))
353 stds.sort()
354 aid1 = stds[0][1]
355
356 i = 1
357 while 1:
358 if mol.GetBondBetweenAtoms(aid1, stds[i][1]) is None:
359 i += 1
360 else:
361 aid2 = stds[i][1]
362 break
363 return aid1, aid2
364
365
367 """ Helper function to calculate the beta for torsion weights
368 according to the formula in the paper.
369 w(dmax/2) = 0.1
370
371 Arguments:
372 - mol: the molecule of interest
373 - distmat: distance matrix of the molecule
374 - aid1: atom index of the most central atom
375
376 Return: value of beta (float)
377 """
378
379 bonds = []
380 for b in mol.GetBonds():
381 nb1 = _getHeavyAtomNeighbors(b.GetBeginAtom())
382 nb2 = _getHeavyAtomNeighbors(b.GetEndAtom())
383 if len(nb2) > 1 and len(nb2) > 1:
384 bonds.append(b)
385
386 dmax = 0
387 for b in bonds:
388 bid1 = b.GetBeginAtom().GetIdx()
389 bid2 = b.GetEndAtom().GetIdx()
390 d = max([distmat[aid1][bid1], distmat[aid1][bid2]])
391 if (d > dmax):
392 dmax = d
393 dmax2 = dmax / 2.0
394 beta = -math.log(0.1) / (dmax2 * dmax2)
395 return beta
396
397
399 """ Calculate the weights for the torsions in a molecule.
400 By default, the highest weight is given to the bond
401 connecting the two most central atoms.
402 If desired, two alternate atoms can be specified (must
403 be connected by a bond).
404
405 Arguments:
406 - mol: the molecule of interest
407 - aid1: index of the first atom (default: most central)
408 - aid2: index of the second atom (default: second most central)
409 - ignoreColinearBonds: if True (default), single bonds adjacent to
410 triple bonds are ignored
411 if False, alternative not-covalently bound
412 atoms are used to define the torsion
413
414 Return: list of torsion weights (both non-ring and ring)
415 """
416
417 distmat = Chem.GetDistanceMatrix(mol)
418 if aid1 < 0 and aid2 < 0:
419 aid1, aid2 = _findCentralBond(mol, distmat)
420 else:
421 b = mol.GetBondBetweenAtoms(aid1, aid2)
422 if b is None:
423 raise ValueError("Specified atoms must be connected by a bond.")
424
425 beta = _calculateBeta(mol, distmat, aid1)
426
427 bonds = _getBondsForTorsions(mol, ignoreColinearBonds)
428
429 weights = []
430 for bid1, bid2, nb1, nb2 in bonds:
431 if ((bid1, bid2) == (aid1, aid2) or
432 (bid2, bid1) == (aid1, aid2)):
433 d = 0
434 else:
435
436 d = min(distmat[aid1][bid1], distmat[aid1][bid2], distmat[aid2][bid1],
437 distmat[aid2][bid2]) + 1
438 w = math.exp(-beta * (d * d))
439 weights.append(w)
440
441
442 rings = mol.GetRingInfo()
443 for r in rings.BondRings():
444
445 tmp = []
446 num = len(r)
447 for bidx in r:
448 b = mol.GetBondWithIdx(bidx)
449 bid1 = b.GetBeginAtomIdx()
450 bid2 = b.GetEndAtomIdx()
451
452 d = min(distmat[aid1][bid1], distmat[aid1][bid2], distmat[aid2][bid1],
453 distmat[aid2][bid2]) + 1
454 tmp.append(d)
455
456
457
458
459 w = sum(tmp) / float(num)
460 w = math.exp(-beta * (w * w))
461 weights.append(w * (num / 2.0))
462 return weights
463
464
466 """ Calculate the torsion deviation fingerprint (TFD) given two lists of
467 torsion angles.
468
469 Arguments:
470 - torsions1: torsion angles of conformation 1
471 - torsions2: torsion angles of conformation 2
472 - weights: list of torsion weights (default: None)
473
474 Return: TFD value (float)
475 """
476 if len(torsions1) != len(torsions2):
477 raise ValueError("List of torsions angles must have the same size.")
478
479 deviations = []
480 for tors1, tors2 in zip(torsions1, torsions2):
481 mindiff = 180.0
482 for t1 in tors1[0]:
483 for t2 in tors2[0]:
484 diff = abs(t1 - t2)
485 if (360.0 - diff) < diff:
486 diff = 360.0 - diff
487
488 if diff < mindiff:
489 mindiff = diff
490 deviations.append(mindiff / tors1[1])
491
492 if weights is not None:
493 if len(weights) != len(torsions1):
494 raise ValueError("List of torsions angles and weights must have the same size.")
495 deviations = [d * w for d, w in zip(deviations, weights)]
496 sum_weights = sum(weights)
497 else:
498 sum_weights = len(deviations)
499 tfd = sum(deviations)
500 if sum_weights != 0:
501 tfd /= sum_weights
502 return tfd
503
504
506 """ Generate a new molecule with the atom order of mol1 and coordinates
507 from mol2.
508
509 Arguments:
510 - mol1: first instance of the molecule of interest
511 - mol2: second instance the molecule of interest
512
513 Return: RDKit molecule
514 """
515 match = mol2.GetSubstructMatch(mol1)
516 atomNums = tuple(range(mol1.GetNumAtoms()))
517 if match != atomNums:
518
519 mol3 = Chem.Mol(mol1)
520 mol3.RemoveAllConformers()
521 for conf2 in mol2.GetConformers():
522 confId = conf2.GetId()
523 conf = rdchem.Conformer(mol1.GetNumAtoms())
524 conf.SetId(confId)
525 for i in range(mol1.GetNumAtoms()):
526 conf.SetAtomPosition(i, mol2.GetConformer(confId).GetAtomPosition(match[i]))
527 cid = mol3.AddConformer(conf)
528 return mol3
529 else:
530 return Chem.Mol(mol2)
531
532
533
572
573
574 -def GetTFDBetweenMolecules(mol1, mol2, confId1=-1, confId2=-1, useWeights=True, maxDev='equal',
575 symmRadius=2, ignoreColinearBonds=True):
576 """ Wrapper to calculate the TFD between two molecules.
577 Important: The two molecules must be instances of the same molecule
578
579 Arguments:
580 - mol1: first instance of the molecule of interest
581 - mol2: second instance the molecule of interest
582 - confId1: conformer index for mol1 (default: first conformer)
583 - confId2: conformer index for mol2 (default: first conformer)
584 - useWeights: flag for using torsion weights in the TFD calculation
585 - maxDev: maximal deviation used for normalization
586 'equal': all torsions are normalized using 180.0 (default)
587 'spec': each torsion is normalized using its specific
588 maximal deviation as given in the paper
589 - symmRadius: radius used for calculating the atom invariants
590 (default: 2)
591 - ignoreColinearBonds: if True (default), single bonds adjacent to
592 triple bonds are ignored
593 if False, alternative not-covalently bound
594 atoms are used to define the torsion
595
596 Return: TFD value
597 """
598 if (Chem.MolToSmiles(mol1) != Chem.MolToSmiles(mol2)):
599 raise ValueError("The two molecules must be instances of the same molecule!")
600 mol2 = _getSameAtomOrder(mol1, mol2)
601 tl, tlr = CalculateTorsionLists(mol1, maxDev=maxDev, symmRadius=symmRadius,
602 ignoreColinearBonds=ignoreColinearBonds)
603
604 torsion1 = CalculateTorsionAngles(mol1, tl, tlr, confId=confId1)
605
606 torsion2 = CalculateTorsionAngles(mol2, tl, tlr, confId=confId2)
607 if useWeights:
608 weights = CalculateTorsionWeights(mol1, ignoreColinearBonds=ignoreColinearBonds)
609 tfd = CalculateTFD(torsion1, torsion2, weights=weights)
610 else:
611 tfd = CalculateTFD(torsion1, torsion2)
612 return tfd
613
614
615 -def GetTFDMatrix(mol, useWeights=True, maxDev='equal', symmRadius=2, ignoreColinearBonds=True):
616 """ Wrapper to calculate the matrix of TFD values for the
617 conformers of a molecule.
618
619 Arguments:
620 - mol: the molecule of interest
621 - useWeights: flag for using torsion weights in the TFD calculation
622 - maxDev: maximal deviation used for normalization
623 'equal': all torsions are normalized using 180.0 (default)
624 'spec': each torsion is normalized using its specific
625 maximal deviation as given in the paper
626 - symmRadius: radius used for calculating the atom invariants
627 (default: 2)
628 - ignoreColinearBonds: if True (default), single bonds adjacent to
629 triple bonds are ignored
630 if False, alternative not-covalently bound
631 atoms are used to define the torsion
632
633 Return: matrix of TFD values
634 Note that the returned matrix is symmetrical, i.e. it is the
635 lower half of the matrix, e.g. for 5 conformers:
636 matrix = [ a,
637 b, c,
638 d, e, f,
639 g, h, i, j]
640 """
641 tl, tlr = CalculateTorsionLists(mol, maxDev=maxDev, symmRadius=symmRadius,
642 ignoreColinearBonds=ignoreColinearBonds)
643 numconf = mol.GetNumConformers()
644 torsions = [CalculateTorsionAngles(mol, tl, tlr, confId=conf.GetId())
645 for conf in mol.GetConformers()]
646 tfdmat = []
647 if useWeights:
648 weights = CalculateTorsionWeights(mol, ignoreColinearBonds=ignoreColinearBonds)
649 for i in range(0, numconf):
650 for j in range(0, i):
651 tfdmat.append(CalculateTFD(torsions[i], torsions[j], weights=weights))
652 else:
653 for i in range(0, numconf):
654 for j in range(0, i):
655 tfdmat.append(CalculateTFD(torsions[i], torsions[j]))
656 return tfdmat
657