1
2
3
4
5
6
7
8
9
10 """ Contains an implementation of Atom-pair fingerprints, as
11 described in:
12
13 R.E. Carhart, D.H. Smith, R. Venkataraghavan;
14 "Atom Pairs as Molecular Features in Structure-Activity Studies:
15 Definition and Applications" JCICS 25, 64-73 (1985).
16
17 The fingerprints can be accessed through the following functions:
18 - GetAtomPairFingerprint
19 - GetHashedAtomPairFingerprint (identical to GetAtomPairFingerprint)
20 - GetAtomPairFingerprintAsIntVect
21 - GetAtomPairFingerprintAsBitVect
22
23 """
24 from rdkit import DataStructs
25 from rdkit.Chem import rdMolDescriptors
26 from rdkit.Chem.AtomPairs import Utils
27
28 from rdkit.Chem.rdMolDescriptors import GetAtomPairFingerprint, GetHashedAtomPairFingerprint
29 GetAtomPairFingerprintAsIntVect = rdMolDescriptors.GetAtomPairFingerprint
30
31 numPathBits = rdMolDescriptors.AtomPairsParameters.numPathBits
32 _maxPathLen = (1 << numPathBits) - 1
33 numFpBits = numPathBits + 2 * rdMolDescriptors.AtomPairsParameters.codeSize
34 fpLen = 1 << numFpBits
35
36
37 -def pyScorePair(at1, at2, dist, atomCodes=None, includeChirality=False):
38 """ Returns a score for an individual atom pair.
39
40 >>> from rdkit import Chem
41 >>> m = Chem.MolFromSmiles('CCCCC')
42 >>> c1 = Utils.GetAtomCode(m.GetAtomWithIdx(0))
43 >>> c2 = Utils.GetAtomCode(m.GetAtomWithIdx(1))
44 >>> c3 = Utils.GetAtomCode(m.GetAtomWithIdx(2))
45 >>> t = 1 | min(c1,c2)<<numPathBits | max(c1,c2)<<(rdMolDescriptors.AtomPairsParameters.codeSize+numPathBits)
46 >>> pyScorePair(m.GetAtomWithIdx(0),m.GetAtomWithIdx(1),1)==t
47 1
48 >>> pyScorePair(m.GetAtomWithIdx(1),m.GetAtomWithIdx(0),1)==t
49 1
50 >>> t = 2 | min(c1,c3)<<numPathBits | max(c1,c3)<<(rdMolDescriptors.AtomPairsParameters.codeSize+numPathBits)
51 >>> pyScorePair(m.GetAtomWithIdx(0),m.GetAtomWithIdx(2),2)==t
52 1
53 >>> pyScorePair(m.GetAtomWithIdx(0),m.GetAtomWithIdx(2),2,
54 ... atomCodes=(Utils.GetAtomCode(m.GetAtomWithIdx(0)),Utils.GetAtomCode(m.GetAtomWithIdx(2))))==t
55 1
56
57 """
58 if not atomCodes:
59 code1 = Utils.GetAtomCode(at1,includeChirality = includeChirality)
60 code2 = Utils.GetAtomCode(at2,includeChirality = includeChirality)
61 else:
62 code1, code2 = atomCodes
63
64 codeSize = rdMolDescriptors.AtomPairsParameters.codeSize
65 if includeChirality:
66 codeSize += rdMolDescriptors.AtomPairsParameters.numChiralBits
67
68 accum = int(dist) % _maxPathLen
69 accum |= min(code1, code2) << numPathBits
70 accum |= max(code1, code2) << (codeSize + numPathBits)
71 return accum
72
73
75 """
76 >>> from rdkit import Chem
77 >>> m = Chem.MolFromSmiles('C=CC')
78 >>> score = pyScorePair(m.GetAtomWithIdx(0),m.GetAtomWithIdx(1),1)
79 >>> ExplainPairScore(score)
80 (('C', 1, 1), 1, ('C', 2, 1))
81 >>> score = pyScorePair(m.GetAtomWithIdx(0),m.GetAtomWithIdx(2),2)
82 >>> ExplainPairScore(score)
83 (('C', 1, 0), 2, ('C', 1, 1))
84 >>> score = pyScorePair(m.GetAtomWithIdx(1),m.GetAtomWithIdx(2),1)
85 >>> ExplainPairScore(score)
86 (('C', 1, 0), 1, ('C', 2, 1))
87 >>> score = pyScorePair(m.GetAtomWithIdx(2),m.GetAtomWithIdx(1),1)
88 >>> ExplainPairScore(score)
89 (('C', 1, 0), 1, ('C', 2, 1))
90
91 We can optionally deal with chirality too
92 >>> m = Chem.MolFromSmiles('C[C@H](F)Cl')
93 >>> score = pyScorePair(m.GetAtomWithIdx(0),m.GetAtomWithIdx(1),1)
94 >>> ExplainPairScore(score)
95 (('C', 1, 0), 1, ('C', 3, 0))
96 >>> score = pyScorePair(m.GetAtomWithIdx(0),m.GetAtomWithIdx(1),1,includeChirality=True)
97 >>> ExplainPairScore(score,includeChirality=True)
98 (('C', 1, 0, ''), 1, ('C', 3, 0, 'R'))
99 >>> m = Chem.MolFromSmiles('F[C@@H](Cl)[C@H](F)Cl')
100 >>> score = pyScorePair(m.GetAtomWithIdx(1),m.GetAtomWithIdx(3),1,includeChirality=True)
101 >>> ExplainPairScore(score,includeChirality=True)
102 (('C', 3, 0, 'R'), 1, ('C', 3, 0, 'S'))
103
104 """
105 codeSize = rdMolDescriptors.AtomPairsParameters.codeSize
106 if includeChirality:
107 codeSize += rdMolDescriptors.AtomPairsParameters.numChiralBits
108
109 codeMask = (1 << codeSize) - 1
110
111 pathMask = (1 << numPathBits) - 1
112 dist = score & pathMask
113
114 score = score >> numPathBits
115 code1 = score & codeMask
116 score = score >> codeSize
117 code2 = score & codeMask
118
119 res = (Utils.ExplainAtomCode(code1,includeChirality=includeChirality),
120 dist,
121 Utils.ExplainAtomCode(code2,includeChirality=includeChirality))
122 return res
123
124
126 """ Returns the Atom-pair fingerprint for a molecule as
127 a SparseBitVect. Note that this doesn't match the standard
128 definition of atom pairs, which uses counts of the
129 pairs, not just their presence.
130
131 **Arguments**:
132
133 - mol: a molecule
134
135 **Returns**: a SparseBitVect
136
137 >>> from rdkit import Chem
138 >>> m = Chem.MolFromSmiles('CCC')
139 >>> v = [ pyScorePair(m.GetAtomWithIdx(0),m.GetAtomWithIdx(1),1),
140 ... pyScorePair(m.GetAtomWithIdx(0),m.GetAtomWithIdx(2),2),
141 ... ]
142 >>> v.sort()
143 >>> fp = GetAtomPairFingerprintAsBitVect(m)
144 >>> list(fp.GetOnBits())==v
145 True
146
147 """
148 res = DataStructs.SparseBitVect(fpLen)
149 fp = rdMolDescriptors.GetAtomPairFingerprint(mol)
150 for val in fp.GetNonzeroElements():
151 res.SetBit(val)
152 return res
153
154
155
156
157
158
160 import sys
161 import doctest
162 failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose)
163 sys.exit(failed)
164
165
166 if __name__ == '__main__':
167 _runDoctests()
168