1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33 import copy
34 import math
35
36 from matplotlib import cm
37 from matplotlib.colors import LinearSegmentedColormap
38 import numpy
39
40 from rdkit import Chem
41 from rdkit import DataStructs
42 from rdkit.Chem import Draw
43 from rdkit.Chem import rdMolDescriptors as rdMD
44 from rdkit.six import iteritems
45
46
48 """
49 Calculates the atomic weights for the probe molecule
50 based on a fingerprint function and a metric.
51
52 Parameters:
53 refMol -- the reference molecule
54 probeMol -- the probe molecule
55 fpFunction -- the fingerprint function
56 metric -- the similarity metric
57
58 Note:
59 If fpFunction needs additional parameters, use a lambda construct
60 """
61 if hasattr(probeMol, '_fpInfo'):
62 delattr(probeMol, '_fpInfo')
63 if hasattr(refMol, '_fpInfo'):
64 delattr(refMol, '_fpInfo')
65 refFP = fpFunction(refMol, -1)
66 probeFP = fpFunction(probeMol, -1)
67 baseSimilarity = metric(refFP, probeFP)
68
69 weights = []
70 for atomId in range(probeMol.GetNumAtoms()):
71 newFP = fpFunction(probeMol, atomId)
72 newSimilarity = metric(refFP, newFP)
73 weights.append(baseSimilarity - newSimilarity)
74 if hasattr(probeMol, '_fpInfo'):
75 delattr(probeMol, '_fpInfo')
76 if hasattr(refMol, '_fpInfo'):
77 delattr(refMol, '_fpInfo')
78 return weights
79
80
104
105
107 """
108 Normalizes the weights,
109 such that the absolute maximum weight equals 1.0.
110
111 Parameters:
112 weights -- the list with the atomic weights
113 """
114 tmp = [math.fabs(w) for w in weights]
115 currentMax = max(tmp)
116 if currentMax > 0:
117 return [w / currentMax for w in weights], currentMax
118 else:
119 return weights, currentMax
120
121
122 -def GetSimilarityMapFromWeights(mol, weights, colorMap=None, scale=-1, size=(250, 250),
123 sigma=None, coordScale=1.5, step=0.01, colors='k', contourLines=10,
124 alpha=0.5, **kwargs):
125 """
126 Generates the similarity map for a molecule given the atomic weights.
127
128 Parameters:
129 mol -- the molecule of interest
130 colorMap -- the matplotlib color map scheme, default is custom PiWG color map
131 scale -- the scaling: scale < 0 -> the absolute maximum weight is used as maximum scale
132 scale = double -> this is the maximum scale
133 size -- the size of the figure
134 sigma -- the sigma for the Gaussians
135 coordScale -- scaling factor for the coordinates
136 step -- the step for calcAtomGaussian
137 colors -- color of the contour lines
138 contourLines -- if integer number N: N contour lines are drawn
139 if list(numbers): contour lines at these numbers are drawn
140 alpha -- the alpha blending value for the contour lines
141 kwargs -- additional arguments for drawing
142 """
143 if mol.GetNumAtoms() < 2:
144 raise ValueError("too few atoms")
145 fig = Draw.MolToMPL(mol, coordScale=coordScale, size=size, **kwargs)
146 if sigma is None:
147 if mol.GetNumBonds() > 0:
148 bond = mol.GetBondWithIdx(0)
149 idx1 = bond.GetBeginAtomIdx()
150 idx2 = bond.GetEndAtomIdx()
151 sigma = 0.3 * math.sqrt(
152 sum([(mol._atomPs[idx1][i] - mol._atomPs[idx2][i])**2 for i in range(2)]))
153 else:
154 sigma = 0.3 * math.sqrt(sum([(mol._atomPs[0][i] - mol._atomPs[1][i])**2 for i in range(2)]))
155 sigma = round(sigma, 2)
156 x, y, z = Draw.calcAtomGaussians(mol, sigma, weights=weights, step=step)
157
158 if scale <= 0.0:
159 maxScale = max(math.fabs(numpy.min(z)), math.fabs(numpy.max(z)))
160 else:
161 maxScale = scale
162
163 if colorMap is None:
164 PiYG_cmap = cm.get_cmap('PiYG',2)
165 colorMap = LinearSegmentedColormap.from_list('PiWG', [PiYG_cmap(0), (1.0, 1.0, 1.0), PiYG_cmap(1)], N=255)
166
167 fig.axes[0].imshow(z, cmap=colorMap, interpolation='bilinear', origin='lower',
168 extent=(0, 1, 0, 1), vmin=-maxScale, vmax=maxScale)
169
170
171 if len([w for w in weights if w != 0.0]):
172 contourset = fig.axes[0].contour(x, y, z, contourLines, colors=colors, alpha=alpha, **kwargs)
173 for j, c in enumerate(contourset.collections):
174 if contourset.levels[j] == 0.0:
175 c.set_linewidth(0.0)
176 elif contourset.levels[j] < 0:
177 c.set_dashes([(0, (3.0, 3.0))])
178 fig.axes[0].set_axis_off()
179 return fig
180
181
184 """
185 Generates the similarity map for a given reference and probe molecule,
186 fingerprint function and similarity metric.
187
188 Parameters:
189 refMol -- the reference molecule
190 probeMol -- the probe molecule
191 fpFunction -- the fingerprint function
192 metric -- the similarity metric.
193 kwargs -- additional arguments for drawing
194 """
195 weights = GetAtomicWeightsForFingerprint(refMol, probeMol, fpFunction, metric)
196 weights, maxWeight = GetStandardizedWeights(weights)
197 fig = GetSimilarityMapFromWeights(probeMol, weights, **kwargs)
198 return fig, maxWeight
199
200
216
217
218 apDict = {}
219 apDict[
220 'normal'] = lambda m, bits, minl, maxl, bpe, ia, **kwargs: rdMD.GetAtomPairFingerprint(m, minLength=minl, maxLength=maxl, ignoreAtoms=ia, **kwargs)
221 apDict[
222 'hashed'] = lambda m, bits, minl, maxl, bpe, ia, **kwargs: rdMD.GetHashedAtomPairFingerprint(m, nBits=bits, minLength=minl, maxLength=maxl, ignoreAtoms=ia, **kwargs)
223 apDict[
224 'bv'] = lambda m, bits, minl, maxl, bpe, ia, **kwargs: rdMD.GetHashedAtomPairFingerprintAsBitVect(m, nBits=bits, minLength=minl, maxLength=maxl, nBitsPerEntry=bpe, ignoreAtoms=ia, **kwargs)
225
226
227
228 -def GetAPFingerprint(mol, atomId=-1, fpType='normal', nBits=2048, minLength=1, maxLength=30,
229 nBitsPerEntry=4, **kwargs):
230 """
231 Calculates the atom pairs fingerprint with the torsions of atomId removed.
232
233 Parameters:
234 mol -- the molecule of interest
235 atomId -- the atom to remove the pairs for (if -1, no pair is removed)
236 fpType -- the type of AP fingerprint ('normal', 'hashed', 'bv')
237 nBits -- the size of the bit vector (only for fpType='bv')
238 minLength -- the minimum path length for an atom pair
239 maxLength -- the maxmimum path length for an atom pair
240 nBitsPerEntry -- the number of bits available for each pair
241 """
242 if fpType not in ['normal', 'hashed', 'bv']:
243 raise ValueError("Unknown Atom pairs fingerprint type")
244 if atomId < 0:
245 return apDict[fpType](mol, nBits, minLength, maxLength, nBitsPerEntry, 0, **kwargs)
246 if atomId >= mol.GetNumAtoms():
247 raise ValueError("atom index greater than number of atoms")
248 return apDict[fpType](mol, nBits, minLength, maxLength, nBitsPerEntry, [atomId], **kwargs)
249
250
251 ttDict = {}
252 ttDict[
253 'normal'] = lambda m, bits, ts, bpe, ia, **kwargs: rdMD.GetTopologicalTorsionFingerprint(m, targetSize=ts, ignoreAtoms=ia, **kwargs)
254 ttDict[
255 'hashed'] = lambda m, bits, ts, bpe, ia, **kwargs: rdMD.GetHashedTopologicalTorsionFingerprint(m, nBits=bits, targetSize=ts, ignoreAtoms=ia, **kwargs)
256 ttDict[
257 'bv'] = lambda m, bits, ts, bpe, ia, **kwargs: rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m, nBits=bits, targetSize=ts, nBitsPerEntry=bpe, ignoreAtoms=ia, **kwargs)
258
259
260
261 -def GetTTFingerprint(mol, atomId=-1, fpType='normal', nBits=2048, targetSize=4, nBitsPerEntry=4,
262 **kwargs):
263 """
264 Calculates the topological torsion fingerprint with the pairs of atomId removed.
265
266 Parameters:
267 mol -- the molecule of interest
268 atomId -- the atom to remove the torsions for (if -1, no torsion is removed)
269 fpType -- the type of TT fingerprint ('normal', 'hashed', 'bv')
270 nBits -- the size of the bit vector (only for fpType='bv')
271 minLength -- the minimum path length for an atom pair
272 maxLength -- the maxmimum path length for an atom pair
273 nBitsPerEntry -- the number of bits available for each torsion
274
275 any additional keyword arguments will be passed to the fingerprinting function.
276
277 """
278 if fpType not in ['normal', 'hashed', 'bv']:
279 raise ValueError("Unknown Topological torsion fingerprint type")
280 if atomId < 0:
281 return ttDict[fpType](mol, nBits, targetSize, nBitsPerEntry, 0, **kwargs)
282 if atomId >= mol.GetNumAtoms():
283 raise ValueError("atom index greater than number of atoms")
284 return ttDict[fpType](mol, nBits, targetSize, nBitsPerEntry, [atomId], **kwargs)
285
286
287
288 -def GetMorganFingerprint(mol, atomId=-1, radius=2, fpType='bv', nBits=2048, useFeatures=False,
289 **kwargs):
290 """
291 Calculates the Morgan fingerprint with the environments of atomId removed.
292
293 Parameters:
294 mol -- the molecule of interest
295 radius -- the maximum radius
296 fpType -- the type of Morgan fingerprint: 'count' or 'bv'
297 atomId -- the atom to remove the environments for (if -1, no environments is removed)
298 nBits -- the size of the bit vector (only for fpType = 'bv')
299 useFeatures -- if false: ConnectivityMorgan, if true: FeatureMorgan
300
301 any additional keyword arguments will be passed to the fingerprinting function.
302 """
303 if fpType not in ['bv', 'count']:
304 raise ValueError("Unknown Morgan fingerprint type")
305 if not hasattr(mol, '_fpInfo'):
306 info = {}
307
308 if fpType == 'bv':
309 molFp = rdMD.GetMorganFingerprintAsBitVect(mol, radius, nBits=nBits, useFeatures=useFeatures,
310 bitInfo=info, **kwargs)
311 else:
312 molFp = rdMD.GetMorganFingerprint(mol, radius, useFeatures=useFeatures, bitInfo=info,
313 **kwargs)
314
315 if fpType == 'bv':
316 bitmap = [DataStructs.ExplicitBitVect(nBits) for _ in range(mol.GetNumAtoms())]
317 else:
318 bitmap = [[] for _ in range(mol.GetNumAtoms())]
319 for bit, es in iteritems(info):
320 for at1, rad in es:
321 if rad == 0:
322 if fpType == 'bv':
323 bitmap[at1][bit] = 1
324 else:
325 bitmap[at1].append(bit)
326 else:
327 env = Chem.FindAtomEnvironmentOfRadiusN(mol, rad, at1)
328 amap = {}
329 Chem.PathToSubmol(mol, env, atomMap=amap)
330 for at2 in amap.keys():
331 if fpType == 'bv':
332 bitmap[at2][bit] = 1
333 else:
334 bitmap[at2].append(bit)
335 mol._fpInfo = (molFp, bitmap)
336
337 if atomId < 0:
338 return mol._fpInfo[0]
339 else:
340 if atomId >= mol.GetNumAtoms():
341 raise ValueError("atom index greater than number of atoms")
342 if len(mol._fpInfo) != 2:
343 raise ValueError("_fpInfo not set")
344 if fpType == 'bv':
345 molFp = mol._fpInfo[0] ^ mol._fpInfo[1][atomId]
346 else:
347 molFp = copy.deepcopy(mol._fpInfo[0])
348
349 for bit in mol._fpInfo[1][atomId]:
350 molFp[bit] -= 1
351 return molFp
352
353
354
355 -def GetRDKFingerprint(mol, atomId=-1, fpType='bv', nBits=2048, minPath=1, maxPath=5, nBitsPerHash=2,
356 **kwargs):
357 """
358 Calculates the RDKit fingerprint with the paths of atomId removed.
359
360 Parameters:
361 mol -- the molecule of interest
362 atomId -- the atom to remove the paths for (if -1, no path is removed)
363 fpType -- the type of RDKit fingerprint: 'bv'
364 nBits -- the size of the bit vector
365 minPath -- minimum path length
366 maxPath -- maximum path length
367 nBitsPerHash -- number of to set per path
368 """
369 if fpType not in ['bv', '']:
370 raise ValueError("Unknown RDKit fingerprint type")
371 fpType = 'bv'
372 if not hasattr(mol, '_fpInfo'):
373 info = []
374
375 molFp = Chem.RDKFingerprint(mol, fpSize=nBits, minPath=minPath, maxPath=maxPath,
376 nBitsPerHash=nBitsPerHash, atomBits=info, **kwargs)
377 mol._fpInfo = (molFp, info)
378
379 if atomId < 0:
380 return mol._fpInfo[0]
381 else:
382 if atomId >= mol.GetNumAtoms():
383 raise ValueError("atom index greater than number of atoms")
384 if len(mol._fpInfo) != 2:
385 raise ValueError("_fpInfo not set")
386 molFp = copy.deepcopy(mol._fpInfo[0])
387 molFp.UnSetBitsFromList(mol._fpInfo[1][atomId])
388 return molFp
389