1
2
3
4
5
6
7
8
9
10
11 """ Calculation of topological/topochemical descriptors.
12
13
14
15 """
16 from __future__ import print_function
17 from rdkit import Chem
18 from rdkit.Chem import Graphs
19 from rdkit.Chem import rdchem
20 from rdkit.Chem import rdMolDescriptors
21
22 from rdkit.Chem import pyPeriodicTable as PeriodicTable
23 import numpy
24 import math
25 from rdkit.ML.InfoTheory import entropy
26
27 periodicTable = rdchem.GetPeriodicTable()
28
29 _log2val = math.log(2)
30
31
33 """ *Internal Use Only*
34
35 this is just a row sum of the matrix... simple, neh?
36
37 """
38 if not onlyOnes:
39 res = sum(mat)
40 else:
41 res = sum(numpy.equal(mat, 1))
42 return res
43
44
46 """ *Internal Use Only*
47
48 """
49 res = mol.GetNumBonds()
50 return res
51
52
54 """ *Internal Use Only*
55
56 """
57 res = {}
58 for v in arr:
59 res[v] = res.get(v, 0) + 1
60 return res
61
62
64 """ calculate the Hall-Kier alpha value for a molecule
65
66 From equations (58) of Rev. Comp. Chem. vol 2, 367-422, (1991)
67
68 """
69 alphaSum = 0.0
70 rC = PeriodicTable.nameTable['C'][5]
71 for atom in m.GetAtoms():
72 atNum = atom.GetAtomicNum()
73 if not atNum:
74 continue
75 symb = atom.GetSymbol()
76 alphaV = PeriodicTable.hallKierAlphas.get(symb, None)
77 if alphaV is not None:
78 hyb = atom.GetHybridization() - 2
79 if (hyb < len(alphaV)):
80 alpha = alphaV[hyb]
81 if alpha is None:
82 alpha = alphaV[-1]
83 else:
84 alpha = alphaV[-1]
85 else:
86 rA = PeriodicTable.nameTable[symb][5]
87 alpha = rA / rC - 1
88
89 alphaSum += alpha
90 return alphaSum
91
92
93
94 -def Ipc(mol, avg=0, dMat=None, forceDMat=0):
95 """This returns the information content of the coefficients of the characteristic
96 polynomial of the adjacency matrix of a hydrogen-suppressed graph of a molecule.
97
98 'avg = 1' returns the information content divided by the total population.
99
100 From D. Bonchev & N. Trinajstic, J. Chem. Phys. vol 67, 4517-4533 (1977)
101
102 """
103 if forceDMat or dMat is None:
104 if forceDMat:
105 dMat = Chem.GetDistanceMatrix(mol, 0)
106 mol._adjMat = dMat
107 else:
108 try:
109 dMat = mol._adjMat
110 except AttributeError:
111 dMat = Chem.GetDistanceMatrix(mol, 0)
112 mol._adjMat = dMat
113
114 adjMat = numpy.equal(dMat, 1)
115 cPoly = abs(Graphs.CharacteristicPolynomial(mol, adjMat))
116 if avg:
117 return entropy.InfoEntropy(cPoly)
118 else:
119 return sum(cPoly) * entropy.InfoEntropy(cPoly)
120
121
122 Ipc.version = "1.0.0"
123
124
126 """ Hall-Kier Kappa1 value
127
128 From equations (58) and (59) of Rev. Comp. Chem. vol 2, 367-422, (1991)
129
130 """
131 P1 = mol.GetNumBonds(1)
132 A = mol.GetNumHeavyAtoms()
133 alpha = HallKierAlpha(mol)
134 denom = P1 + alpha
135 if denom:
136 kappa = (A + alpha) * (A + alpha - 1)**2 / denom**2
137 else:
138 kappa = 0.0
139 return kappa
140
141
142
144 """ Hall-Kier Kappa2 value
145
146 From equations (58) and (60) of Rev. Comp. Chem. vol 2, 367-422, (1991)
147
148 """
149 P2 = len(Chem.FindAllPathsOfLengthN(mol, 2))
150 A = mol.GetNumHeavyAtoms()
151 alpha = HallKierAlpha(mol)
152 denom = (P2 + alpha)**2
153 if denom:
154 kappa = (A + alpha - 1) * (A + alpha - 2)**2 / denom
155 else:
156 kappa = 0
157 return kappa
158
159
160
162 """ Hall-Kier Kappa3 value
163
164 From equations (58), (61) and (62) of Rev. Comp. Chem. vol 2, 367-422, (1991)
165
166 """
167 P3 = len(Chem.FindAllPathsOfLengthN(mol, 3))
168 A = mol.GetNumHeavyAtoms()
169 alpha = HallKierAlpha(mol)
170 denom = (P3 + alpha)**2
171 if denom:
172 if A % 2 == 1:
173 kappa = (A + alpha - 1) * (A + alpha - 3)**2 / denom
174 else:
175 kappa = (A + alpha - 2) * (A + alpha - 3)**2 / denom
176 else:
177 kappa = 0
178 return kappa
179
180
181 HallKierAlpha = lambda x: rdMolDescriptors.CalcHallKierAlpha(x)
182 HallKierAlpha.version = rdMolDescriptors._CalcHallKierAlpha_version
183 Kappa1 = lambda x: rdMolDescriptors.CalcKappa1(x)
184 Kappa1.version = rdMolDescriptors._CalcKappa1_version
185 Kappa2 = lambda x: rdMolDescriptors.CalcKappa2(x)
186 Kappa2.version = rdMolDescriptors._CalcKappa2_version
187 Kappa3 = lambda x: rdMolDescriptors.CalcKappa3(x)
188 Kappa3.version = rdMolDescriptors._CalcKappa3_version
189
190
192 """ From equations (1),(9) and (10) of Rev. Comp. Chem. vol 2, 367-422, (1991)
193
194 """
195 deltas = [x.GetDegree() for x in mol.GetAtoms()]
196 while 0 in deltas:
197 deltas.remove(0)
198 deltas = numpy.array(deltas, 'd')
199 res = sum(numpy.sqrt(1. / deltas))
200 return res
201
202
203 Chi0.version = "1.0.0"
204
205
207 """ From equations (1),(11) and (12) of Rev. Comp. Chem. vol 2, 367-422, (1991)
208
209 """
210 c1s = [x.GetBeginAtom().GetDegree() * x.GetEndAtom().GetDegree() for x in mol.GetBonds()]
211 while 0 in c1s:
212 c1s.remove(0)
213 c1s = numpy.array(c1s, 'd')
214 res = sum(numpy.sqrt(1. / c1s))
215 return res
216
217
218 Chi1.version = "1.0.0"
219
220
222 return periodicTable.GetNOuterElecs(atom.GetAtomicNum()) - atom.GetTotalNumHs()
223
224
226 global periodicTable
227 res = []
228 if hasattr(mol, '_hkDeltas') and mol._hkDeltas is not None:
229 return mol._hkDeltas
230 for atom in mol.GetAtoms():
231 n = atom.GetAtomicNum()
232 if n > 1:
233 nV = periodicTable.GetNOuterElecs(n)
234 nHs = atom.GetTotalNumHs()
235 if n <= 10:
236
237 res.append(float(nV - nHs))
238 else:
239
240 res.append(float(nV - nHs) / float(n - nV - 1))
241 elif n == 1:
242 if not skipHs:
243 res.append(0.0)
244 else:
245 res.append(0.0)
246 mol._hkDeltas = res
247 return res
248
249
251 """ From equations (5),(9) and (10) of Rev. Comp. Chem. vol 2, 367-422, (1991)
252
253 """
254 deltas = _hkDeltas(mol)
255 while 0 in deltas:
256 deltas.remove(0)
257 mol._hkDeltas = None
258 res = sum(numpy.sqrt(1. / numpy.array(deltas)))
259 return res
260
261
263 """ From equations (5),(11) and (12) of Rev. Comp. Chem. vol 2, 367-422, (1991)
264
265 """
266 deltas = numpy.array(_hkDeltas(mol, skipHs=0))
267 res = 0.0
268 for bond in mol.GetBonds():
269 v = deltas[bond.GetBeginAtomIdx()] * deltas[bond.GetEndAtomIdx()]
270 if v != 0.0:
271 res += numpy.sqrt(1. / v)
272 return res
273
274
276 """ From equations (5),(15) and (16) of Rev. Comp. Chem. vol 2, 367-422, (1991)
277
278 **NOTE**: because the current path finding code does, by design,
279 detect rings as paths (e.g. in C1CC1 there is *1* atom path of
280 length 3), values of ChiNv with N >= 3 may give results that differ
281 from those provided by the old code in molecules that have rings of
282 size 3.
283
284 """
285 deltas = numpy.array(
286 [(1. / numpy.sqrt(hkd) if hkd != 0.0 else 0.0) for hkd in _hkDeltas(mol, skipHs=0)])
287 accum = 0.0
288 for path in Chem.FindAllPathsOfLengthN(mol, order + 1, useBonds=0):
289 accum += numpy.prod(deltas[numpy.array(path)])
290 return accum
291
292
294 """ From equations (5),(15) and (16) of Rev. Comp. Chem. vol 2, 367-422, (1991)
295
296 """
297 return _pyChiNv_(mol, 2)
298
299
301 """ From equations (5),(15) and (16) of Rev. Comp. Chem. vol 2, 367-422, (1991)
302
303 """
304 return _pyChiNv_(mol, 3)
305
306
308 """ From equations (5),(15) and (16) of Rev. Comp. Chem. vol 2, 367-422, (1991)
309
310 **NOTE**: because the current path finding code does, by design,
311 detect rings as paths (e.g. in C1CC1 there is *1* atom path of
312 length 3), values of Chi4v may give results that differ from those
313 provided by the old code in molecules that have 3 rings.
314
315 """
316 return _pyChiNv_(mol, 4)
317
318
320 """ Similar to Hall Kier Chi0v, but uses nVal instead of valence
321 This makes a big difference after we get out of the first row.
322
323 """
324 deltas = [_nVal(x) for x in mol.GetAtoms()]
325 while deltas.count(0):
326 deltas.remove(0)
327 deltas = numpy.array(deltas, 'd')
328 res = sum(numpy.sqrt(1. / deltas))
329 return res
330
331
333 """ Similar to Hall Kier Chi1v, but uses nVal instead of valence
334
335 """
336 delts = numpy.array([_nVal(x) for x in mol.GetAtoms()], 'd')
337 res = 0.0
338 for bond in mol.GetBonds():
339 v = delts[bond.GetBeginAtomIdx()] * delts[bond.GetEndAtomIdx()]
340 if v != 0.0:
341 res += numpy.sqrt(1. / v)
342 return res
343
344
346 """ Similar to Hall Kier ChiNv, but uses nVal instead of valence
347 This makes a big difference after we get out of the first row.
348
349 **NOTE**: because the current path finding code does, by design,
350 detect rings as paths (e.g. in C1CC1 there is *1* atom path of
351 length 3), values of ChiNn with N >= 3 may give results that differ
352 from those provided by the old code in molecules that have rings of
353 size 3.
354
355 """
356 nval = [_nVal(x) for x in mol.GetAtoms()]
357 deltas = numpy.array([(1. / numpy.sqrt(x) if x else 0.0) for x in nval])
358 accum = 0.0
359 for path in Chem.FindAllPathsOfLengthN(mol, order + 1, useBonds=0):
360 accum += numpy.prod(deltas[numpy.array(path)])
361 return accum
362
363
365 """ Similar to Hall Kier Chi2v, but uses nVal instead of valence
366 This makes a big difference after we get out of the first row.
367
368 """
369 return _pyChiNn_(mol, 2)
370
371
373 """ Similar to Hall Kier Chi3v, but uses nVal instead of valence
374 This makes a big difference after we get out of the first row.
375
376 """
377 return _pyChiNn_(mol, 3)
378
379
381 """ Similar to Hall Kier Chi4v, but uses nVal instead of valence
382 This makes a big difference after we get out of the first row.
383
384
385 **NOTE**: because the current path finding code does, by design,
386 detect rings as paths (e.g. in C1CC1 there is *1* atom path of
387 length 3), values of Chi4n may give results that differ from those
388 provided by the old code in molecules that have 3 rings.
389
390 """
391 return _pyChiNn_(mol, 4)
392
393
394 Chi0v = lambda x: rdMolDescriptors.CalcChi0v(x)
395 Chi0v.version = rdMolDescriptors._CalcChi0v_version
396 Chi1v = lambda x: rdMolDescriptors.CalcChi1v(x)
397 Chi1v.version = rdMolDescriptors._CalcChi1v_version
398 Chi2v = lambda x: rdMolDescriptors.CalcChi2v(x)
399 Chi2v.version = rdMolDescriptors._CalcChi2v_version
400 Chi3v = lambda x: rdMolDescriptors.CalcChi3v(x)
401 Chi3v.version = rdMolDescriptors._CalcChi3v_version
402 Chi4v = lambda x: rdMolDescriptors.CalcChi4v(x)
403 Chi4v.version = rdMolDescriptors._CalcChi4v_version
404 ChiNv_ = lambda x, y: rdMolDescriptors.CalcChiNv(x, y)
405 ChiNv_.version = rdMolDescriptors._CalcChiNv_version
406
407 Chi0n = lambda x: rdMolDescriptors.CalcChi0n(x)
408 Chi0n.version = rdMolDescriptors._CalcChi0n_version
409 Chi1n = lambda x: rdMolDescriptors.CalcChi1n(x)
410 Chi1n.version = rdMolDescriptors._CalcChi1n_version
411 Chi2n = lambda x: rdMolDescriptors.CalcChi2n(x)
412 Chi2n.version = rdMolDescriptors._CalcChi2n_version
413 Chi3n = lambda x: rdMolDescriptors.CalcChi3n(x)
414 Chi3n.version = rdMolDescriptors._CalcChi3n_version
415 Chi4n = lambda x: rdMolDescriptors.CalcChi4n(x)
416 Chi4n.version = rdMolDescriptors._CalcChi4n_version
417 ChiNn_ = lambda x, y: rdMolDescriptors.CalcChiNn(x, y)
418 ChiNn_.version = rdMolDescriptors._CalcChiNn_version
419
420
421 -def BalabanJ(mol, dMat=None, forceDMat=0):
422 """ Calculate Balaban's J value for a molecule
423
424 **Arguments**
425
426 - mol: a molecule
427
428 - dMat: (optional) a distance/adjacency matrix for the molecule, if this
429 is not provide, one will be calculated
430
431 - forceDMat: (optional) if this is set, the distance/adjacency matrix
432 will be recalculated regardless of whether or not _dMat_ is provided
433 or the molecule already has one
434
435 **Returns**
436
437 - a float containing the J value
438
439 We follow the notation of Balaban's paper:
440 Chem. Phys. Lett. vol 89, 399-404, (1982)
441
442 """
443
444 if forceDMat or dMat is None:
445 if forceDMat:
446
447 dMat = Chem.GetDistanceMatrix(mol, useBO=1, useAtomWts=0, force=1)
448 mol._balabanMat = dMat
449 adjMat = Chem.GetAdjacencyMatrix(mol, useBO=0, emptyVal=0, force=0, prefix="NoBO")
450 mol._adjMat = adjMat
451 else:
452 try:
453
454 dMat = mol._balabanMat
455 except AttributeError:
456
457 dMat = Chem.GetDistanceMatrix(mol, useBO=1, useAtomWts=0, force=0, prefix="Balaban")
458
459 mol._balabanMat = dMat
460 try:
461 adjMat = mol._adjMat
462 except AttributeError:
463 adjMat = Chem.GetAdjacencyMatrix(mol, useBO=0, emptyVal=0, force=0, prefix="NoBO")
464 mol._adjMat = adjMat
465 else:
466 adjMat = Chem.GetAdjacencyMatrix(mol, useBO=0, emptyVal=0, force=0, prefix="NoBO")
467
468 s = _VertexDegrees(dMat)
469 q = _NumAdjacencies(mol, dMat)
470 n = mol.GetNumAtoms()
471 mu = q - n + 1
472
473 sum_ = 0.
474 nS = len(s)
475 for i in range(nS):
476 si = s[i]
477 for j in range(i, nS):
478 if adjMat[i, j] == 1:
479 sum_ += 1. / numpy.sqrt(si * s[j])
480
481 if mu + 1 != 0:
482 J = float(q) / float(mu + 1) * sum_
483 else:
484 J = 0
485
486 return J
487
488
489 BalabanJ.version = "1.0.0"
490
491
492
493
494
495
496
498 """
499 Used by BertzCT
500
501 vdList: the number of neighbors each atom has
502 bdMat: "balaban" distance matrix
503
504 """
505 if forceBDMat:
506 bdMat = Chem.GetDistanceMatrix(mol, useBO=1, useAtomWts=0, force=1, prefix="Balaban")
507 mol._balabanMat = bdMat
508
509 keysSeen = []
510 symList = [0] * numAtoms
511 for i in range(numAtoms):
512 tmpList = bdMat[i].tolist()
513 tmpList.sort()
514 theKey = tuple(['%.4f' % x for x in tmpList[:cutoff]])
515 try:
516 idx = keysSeen.index(theKey)
517 except ValueError:
518 idx = len(keysSeen)
519 keysSeen.append(theKey)
520 symList[i] = idx + 1
521 return tuple(symList)
522
523
525 """
526 Used by BertzCT
527 """
528 if atom1Id < atom2Id:
529 theKey = (atom1Id, atom2Id)
530 else:
531 theKey = (atom2Id, atom1Id)
532 tmp = bondDic[theKey]
533 if tmp == Chem.BondType.AROMATIC:
534 tmp = 1.5
535 else:
536 tmp = float(tmp)
537
538 return tmp
539
540
542 """
543 Used by BertzCT
544 """
545 connectionList = list(connectionDict.values())
546 totConnections = sum(connectionList)
547 connectionIE = totConnections * (
548 entropy.InfoEntropy(numpy.array(connectionList)) + math.log(totConnections) / _log2val)
549 atomTypeList = list(atomTypeDict.values())
550 atomTypeIE = numAtoms * entropy.InfoEntropy(numpy.array(atomTypeList))
551 return atomTypeIE + connectionIE
552
553
555 """ _Internal Use Only_
556 Used by BertzCT
557
558 """
559 bondDict = {}
560 nList = [None] * numAtoms
561 vdList = [0] * numAtoms
562 for aBond in mol.GetBonds():
563 atom1 = aBond.GetBeginAtomIdx()
564 atom2 = aBond.GetEndAtomIdx()
565 if atom1 > atom2:
566 atom2, atom1 = atom1, atom2
567 if not aBond.GetIsAromatic():
568 bondDict[(atom1, atom2)] = aBond.GetBondType()
569 else:
570
571 bondDict[(atom1, atom2)] = Chem.BondType.AROMATIC
572 if nList[atom1] is None:
573 nList[atom1] = [atom2]
574 elif atom2 not in nList[atom1]:
575 nList[atom1].append(atom2)
576 if nList[atom2] is None:
577 nList[atom2] = [atom1]
578 elif atom1 not in nList[atom2]:
579 nList[atom2].append(atom1)
580
581 for i, element in enumerate(nList):
582 try:
583 element.sort()
584 vdList[i] = len(element)
585 except Exception:
586 vdList[i] = 0
587 return bondDict, nList, vdList
588
589
590 -def BertzCT(mol, cutoff=100, dMat=None, forceDMat=1):
591 """ A topological index meant to quantify "complexity" of molecules.
592
593 Consists of a sum of two terms, one representing the complexity
594 of the bonding, the other representing the complexity of the
595 distribution of heteroatoms.
596
597 From S. H. Bertz, J. Am. Chem. Soc., vol 103, 3599-3601 (1981)
598
599 "cutoff" is an integer value used to limit the computational
600 expense. A cutoff value tells the program to consider vertices
601 topologically identical if their distance vectors (sets of
602 distances to all other vertices) are equal out to the "cutoff"th
603 nearest-neighbor.
604
605 **NOTE** The original implementation had the following comment:
606 > this implementation treats aromatic rings as the
607 > corresponding Kekule structure with alternating bonds,
608 > for purposes of counting "connections".
609 Upon further thought, this is the WRONG thing to do. It
610 results in the possibility of a molecule giving two different
611 CT values depending on the kekulization. For example, in the
612 old implementation, these two SMILES:
613 CC2=CN=C1C3=C(C(C)=C(C=N3)C)C=CC1=C2C
614 CC3=CN=C2C1=NC=C(C)C(C)=C1C=CC2=C3C
615 which correspond to differentk kekule forms, yield different
616 values.
617 The new implementation uses consistent (aromatic) bond orders
618 for aromatic bonds.
619
620 THIS MEANS THAT THIS IMPLEMENTATION IS NOT BACKWARDS COMPATIBLE.
621
622 Any molecule containing aromatic rings will yield different
623 values with this implementation. The new behavior is the correct
624 one, so we're going to live with the breakage.
625
626 **NOTE** this barfs if the molecule contains a second (or
627 nth) fragment that is one atom.
628
629 """
630 atomTypeDict = {}
631 connectionDict = {}
632 numAtoms = mol.GetNumAtoms()
633 if forceDMat or dMat is None:
634 if forceDMat:
635
636 dMat = Chem.GetDistanceMatrix(mol, useBO=0, useAtomWts=0, force=1)
637 mol._adjMat = dMat
638 else:
639 try:
640 dMat = mol._adjMat
641 except AttributeError:
642 dMat = Chem.GetDistanceMatrix(mol, useBO=0, useAtomWts=0, force=1)
643 mol._adjMat = dMat
644
645 if numAtoms < 2:
646 return 0
647
648 bondDict, neighborList, vdList = _CreateBondDictEtc(mol, numAtoms)
649 symmetryClasses = _AssignSymmetryClasses(mol, vdList, dMat, forceDMat, numAtoms, cutoff)
650
651 for atomIdx in range(numAtoms):
652 hingeAtomNumber = mol.GetAtomWithIdx(atomIdx).GetAtomicNum()
653 atomTypeDict[hingeAtomNumber] = atomTypeDict.get(hingeAtomNumber, 0) + 1
654
655 hingeAtomClass = symmetryClasses[atomIdx]
656 numNeighbors = vdList[atomIdx]
657 for i in range(numNeighbors):
658 neighbor_iIdx = neighborList[atomIdx][i]
659 NiClass = symmetryClasses[neighbor_iIdx]
660 bond_i_order = _LookUpBondOrder(atomIdx, neighbor_iIdx, bondDict)
661
662 if (bond_i_order > 1) and (neighbor_iIdx > atomIdx):
663 numConnections = bond_i_order * (bond_i_order - 1) / 2
664 connectionKey = (min(hingeAtomClass, NiClass), max(hingeAtomClass, NiClass))
665 connectionDict[connectionKey] = connectionDict.get(connectionKey, 0) + numConnections
666
667 for j in range(i + 1, numNeighbors):
668 neighbor_jIdx = neighborList[atomIdx][j]
669 NjClass = symmetryClasses[neighbor_jIdx]
670 bond_j_order = _LookUpBondOrder(atomIdx, neighbor_jIdx, bondDict)
671 numConnections = bond_i_order * bond_j_order
672 connectionKey = (min(NiClass, NjClass), hingeAtomClass, max(NiClass, NjClass))
673 connectionDict[connectionKey] = connectionDict.get(connectionKey, 0) + numConnections
674
675 if not connectionDict:
676 connectionDict = {'a': 1}
677
678 return _CalculateEntropies(connectionDict, atomTypeDict, numAtoms)
679
680
681 BertzCT.version = "2.0.0"
682
683
684
685
686
687
688
689
690
691