Package rdkit :: Package Chem :: Package Draw :: Module SimilarityMaps
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.Draw.SimilarityMaps

  1  # 
  2  #  Copyright (c) 2013, Novartis Institutes for BioMedical Research Inc. 
  3  #  All rights reserved. 
  4  # 
  5  # Redistribution and use in source and binary forms, with or without 
  6  # modification, are permitted provided that the following conditions are 
  7  # met: 
  8  # 
  9  #     * Redistributions of source code must retain the above copyright 
 10  #       notice, this list of conditions and the following disclaimer. 
 11  #     * Redistributions in binary form must reproduce the above 
 12  #       copyright notice, this list of conditions and the following 
 13  #       disclaimer in the documentation and/or other materials provided 
 14  #       with the distribution. 
 15  #     * Neither the name of Novartis Institutes for BioMedical Research Inc. 
 16  #       nor the names of its contributors may be used to endorse or promote 
 17  #       products derived from this software without specific prior written permission. 
 18  # 
 19  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 20  # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 21  # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
 22  # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
 23  # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 24  # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 25  # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 26  # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 27  # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 28  # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 29  # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
 30  # 
 31  # Created by Sereina Riniker, Aug 2013 
 32   
 33  import copy 
 34  import math 
 35   
 36  from matplotlib import cm 
 37  from matplotlib.colors import LinearSegmentedColormap 
 38  import numpy 
 39   
 40  from rdkit import Chem 
 41  from rdkit import DataStructs 
 42  from rdkit.Chem import Draw 
 43  from rdkit.Chem import rdMolDescriptors as rdMD 
 44  from rdkit.six import iteritems 
 45   
 46   
47 -def GetAtomicWeightsForFingerprint(refMol, probeMol, fpFunction, metric=DataStructs.DiceSimilarity):
48 """ 49 Calculates the atomic weights for the probe molecule 50 based on a fingerprint function and a metric. 51 52 Parameters: 53 refMol -- the reference molecule 54 probeMol -- the probe molecule 55 fpFunction -- the fingerprint function 56 metric -- the similarity metric 57 58 Note: 59 If fpFunction needs additional parameters, use a lambda construct 60 """ 61 if hasattr(probeMol, '_fpInfo'): 62 delattr(probeMol, '_fpInfo') 63 if hasattr(refMol, '_fpInfo'): 64 delattr(refMol, '_fpInfo') 65 refFP = fpFunction(refMol, -1) 66 probeFP = fpFunction(probeMol, -1) 67 baseSimilarity = metric(refFP, probeFP) 68 # loop over atoms 69 weights = [] 70 for atomId in range(probeMol.GetNumAtoms()): 71 newFP = fpFunction(probeMol, atomId) 72 newSimilarity = metric(refFP, newFP) 73 weights.append(baseSimilarity - newSimilarity) 74 if hasattr(probeMol, '_fpInfo'): 75 delattr(probeMol, '_fpInfo') 76 if hasattr(refMol, '_fpInfo'): 77 delattr(refMol, '_fpInfo') 78 return weights
79 80
81 -def GetAtomicWeightsForModel(probeMol, fpFunction, predictionFunction):
82 """ 83 Calculates the atomic weights for the probe molecule based on 84 a fingerprint function and the prediction function of a ML model. 85 86 Parameters: 87 probeMol -- the probe molecule 88 fpFunction -- the fingerprint function 89 predictionFunction -- the prediction function of the ML model 90 """ 91 if hasattr(probeMol, '_fpInfo'): 92 delattr(probeMol, '_fpInfo') 93 probeFP = fpFunction(probeMol, -1) 94 baseProba = predictionFunction(probeFP) 95 # loop over atoms 96 weights = [] 97 for atomId in range(probeMol.GetNumAtoms()): 98 newFP = fpFunction(probeMol, atomId) 99 newProba = predictionFunction(newFP) 100 weights.append(baseProba - newProba) 101 if hasattr(probeMol, '_fpInfo'): 102 delattr(probeMol, '_fpInfo') 103 return weights
104 105
106 -def GetStandardizedWeights(weights):
107 """ 108 Normalizes the weights, 109 such that the absolute maximum weight equals 1.0. 110 111 Parameters: 112 weights -- the list with the atomic weights 113 """ 114 tmp = [math.fabs(w) for w in weights] 115 currentMax = max(tmp) 116 if currentMax > 0: 117 return [w / currentMax for w in weights], currentMax 118 else: 119 return weights, currentMax
120 121
122 -def GetSimilarityMapFromWeights(mol, weights, colorMap=None, scale=-1, size=(250, 250), 123 sigma=None, coordScale=1.5, step=0.01, colors='k', contourLines=10, 124 alpha=0.5, **kwargs):
125 """ 126 Generates the similarity map for a molecule given the atomic weights. 127 128 Parameters: 129 mol -- the molecule of interest 130 colorMap -- the matplotlib color map scheme, default is custom PiWG color map 131 scale -- the scaling: scale < 0 -> the absolute maximum weight is used as maximum scale 132 scale = double -> this is the maximum scale 133 size -- the size of the figure 134 sigma -- the sigma for the Gaussians 135 coordScale -- scaling factor for the coordinates 136 step -- the step for calcAtomGaussian 137 colors -- color of the contour lines 138 contourLines -- if integer number N: N contour lines are drawn 139 if list(numbers): contour lines at these numbers are drawn 140 alpha -- the alpha blending value for the contour lines 141 kwargs -- additional arguments for drawing 142 """ 143 if mol.GetNumAtoms() < 2: 144 raise ValueError("too few atoms") 145 fig = Draw.MolToMPL(mol, coordScale=coordScale, size=size, **kwargs) 146 if sigma is None: 147 if mol.GetNumBonds() > 0: 148 bond = mol.GetBondWithIdx(0) 149 idx1 = bond.GetBeginAtomIdx() 150 idx2 = bond.GetEndAtomIdx() 151 sigma = 0.3 * math.sqrt( 152 sum([(mol._atomPs[idx1][i] - mol._atomPs[idx2][i])**2 for i in range(2)])) 153 else: 154 sigma = 0.3 * math.sqrt(sum([(mol._atomPs[0][i] - mol._atomPs[1][i])**2 for i in range(2)])) 155 sigma = round(sigma, 2) 156 x, y, z = Draw.calcAtomGaussians(mol, sigma, weights=weights, step=step) 157 # scaling 158 if scale <= 0.0: 159 maxScale = max(math.fabs(numpy.min(z)), math.fabs(numpy.max(z))) 160 else: 161 maxScale = scale 162 # coloring 163 if colorMap is None: 164 PiYG_cmap = cm.get_cmap('PiYG',2) 165 colorMap = LinearSegmentedColormap.from_list('PiWG', [PiYG_cmap(0), (1.0, 1.0, 1.0), PiYG_cmap(1)], N=255) 166 167 fig.axes[0].imshow(z, cmap=colorMap, interpolation='bilinear', origin='lower', 168 extent=(0, 1, 0, 1), vmin=-maxScale, vmax=maxScale) 169 # contour lines 170 # only draw them when at least one weight is not zero 171 if len([w for w in weights if w != 0.0]): 172 contourset = fig.axes[0].contour(x, y, z, contourLines, colors=colors, alpha=alpha, **kwargs) 173 for j, c in enumerate(contourset.collections): 174 if contourset.levels[j] == 0.0: 175 c.set_linewidth(0.0) 176 elif contourset.levels[j] < 0: 177 c.set_dashes([(0, (3.0, 3.0))]) 178 fig.axes[0].set_axis_off() 179 return fig
180 181
182 -def GetSimilarityMapForFingerprint(refMol, probeMol, fpFunction, metric=DataStructs.DiceSimilarity, 183 **kwargs):
184 """ 185 Generates the similarity map for a given reference and probe molecule, 186 fingerprint function and similarity metric. 187 188 Parameters: 189 refMol -- the reference molecule 190 probeMol -- the probe molecule 191 fpFunction -- the fingerprint function 192 metric -- the similarity metric. 193 kwargs -- additional arguments for drawing 194 """ 195 weights = GetAtomicWeightsForFingerprint(refMol, probeMol, fpFunction, metric) 196 weights, maxWeight = GetStandardizedWeights(weights) 197 fig = GetSimilarityMapFromWeights(probeMol, weights, **kwargs) 198 return fig, maxWeight
199 200
201 -def GetSimilarityMapForModel(probeMol, fpFunction, predictionFunction, **kwargs):
202 """ 203 Generates the similarity map for a given ML model and probe molecule, 204 and fingerprint function. 205 206 Parameters: 207 probeMol -- the probe molecule 208 fpFunction -- the fingerprint function 209 predictionFunction -- the prediction function of the ML model 210 kwargs -- additional arguments for drawing 211 """ 212 weights = GetAtomicWeightsForModel(probeMol, fpFunction, predictionFunction) 213 weights, maxWeight = GetStandardizedWeights(weights) 214 fig = GetSimilarityMapFromWeights(probeMol, weights, **kwargs) 215 return fig, maxWeight
216 217 218 apDict = {} 219 apDict[ 220 'normal'] = lambda m, bits, minl, maxl, bpe, ia, **kwargs: rdMD.GetAtomPairFingerprint(m, minLength=minl, maxLength=maxl, ignoreAtoms=ia, **kwargs) 221 apDict[ 222 'hashed'] = lambda m, bits, minl, maxl, bpe, ia, **kwargs: rdMD.GetHashedAtomPairFingerprint(m, nBits=bits, minLength=minl, maxLength=maxl, ignoreAtoms=ia, **kwargs) 223 apDict[ 224 'bv'] = lambda m, bits, minl, maxl, bpe, ia, **kwargs: rdMD.GetHashedAtomPairFingerprintAsBitVect(m, nBits=bits, minLength=minl, maxLength=maxl, nBitsPerEntry=bpe, ignoreAtoms=ia, **kwargs) 225 226 227 # usage: lambda m,i: GetAPFingerprint(m, i, fpType, nBits, minLength, maxLength, nBitsPerEntry)
228 -def GetAPFingerprint(mol, atomId=-1, fpType='normal', nBits=2048, minLength=1, maxLength=30, 229 nBitsPerEntry=4, **kwargs):
230 """ 231 Calculates the atom pairs fingerprint with the torsions of atomId removed. 232 233 Parameters: 234 mol -- the molecule of interest 235 atomId -- the atom to remove the pairs for (if -1, no pair is removed) 236 fpType -- the type of AP fingerprint ('normal', 'hashed', 'bv') 237 nBits -- the size of the bit vector (only for fpType='bv') 238 minLength -- the minimum path length for an atom pair 239 maxLength -- the maxmimum path length for an atom pair 240 nBitsPerEntry -- the number of bits available for each pair 241 """ 242 if fpType not in ['normal', 'hashed', 'bv']: 243 raise ValueError("Unknown Atom pairs fingerprint type") 244 if atomId < 0: 245 return apDict[fpType](mol, nBits, minLength, maxLength, nBitsPerEntry, 0, **kwargs) 246 if atomId >= mol.GetNumAtoms(): 247 raise ValueError("atom index greater than number of atoms") 248 return apDict[fpType](mol, nBits, minLength, maxLength, nBitsPerEntry, [atomId], **kwargs)
249 250 251 ttDict = {} 252 ttDict[ 253 'normal'] = lambda m, bits, ts, bpe, ia, **kwargs: rdMD.GetTopologicalTorsionFingerprint(m, targetSize=ts, ignoreAtoms=ia, **kwargs) 254 ttDict[ 255 'hashed'] = lambda m, bits, ts, bpe, ia, **kwargs: rdMD.GetHashedTopologicalTorsionFingerprint(m, nBits=bits, targetSize=ts, ignoreAtoms=ia, **kwargs) 256 ttDict[ 257 'bv'] = lambda m, bits, ts, bpe, ia, **kwargs: rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m, nBits=bits, targetSize=ts, nBitsPerEntry=bpe, ignoreAtoms=ia, **kwargs) 258 259 260 # usage: lambda m,i: GetTTFingerprint(m, i, fpType, nBits, targetSize)
261 -def GetTTFingerprint(mol, atomId=-1, fpType='normal', nBits=2048, targetSize=4, nBitsPerEntry=4, 262 **kwargs):
263 """ 264 Calculates the topological torsion fingerprint with the pairs of atomId removed. 265 266 Parameters: 267 mol -- the molecule of interest 268 atomId -- the atom to remove the torsions for (if -1, no torsion is removed) 269 fpType -- the type of TT fingerprint ('normal', 'hashed', 'bv') 270 nBits -- the size of the bit vector (only for fpType='bv') 271 minLength -- the minimum path length for an atom pair 272 maxLength -- the maxmimum path length for an atom pair 273 nBitsPerEntry -- the number of bits available for each torsion 274 275 any additional keyword arguments will be passed to the fingerprinting function. 276 277 """ 278 if fpType not in ['normal', 'hashed', 'bv']: 279 raise ValueError("Unknown Topological torsion fingerprint type") 280 if atomId < 0: 281 return ttDict[fpType](mol, nBits, targetSize, nBitsPerEntry, 0, **kwargs) 282 if atomId >= mol.GetNumAtoms(): 283 raise ValueError("atom index greater than number of atoms") 284 return ttDict[fpType](mol, nBits, targetSize, nBitsPerEntry, [atomId], **kwargs)
285 286 287 # usage: lambda m,i: GetMorganFingerprint(m, i, radius, fpType, nBits, useFeatures)
288 -def GetMorganFingerprint(mol, atomId=-1, radius=2, fpType='bv', nBits=2048, useFeatures=False, 289 **kwargs):
290 """ 291 Calculates the Morgan fingerprint with the environments of atomId removed. 292 293 Parameters: 294 mol -- the molecule of interest 295 radius -- the maximum radius 296 fpType -- the type of Morgan fingerprint: 'count' or 'bv' 297 atomId -- the atom to remove the environments for (if -1, no environments is removed) 298 nBits -- the size of the bit vector (only for fpType = 'bv') 299 useFeatures -- if false: ConnectivityMorgan, if true: FeatureMorgan 300 301 any additional keyword arguments will be passed to the fingerprinting function. 302 """ 303 if fpType not in ['bv', 'count']: 304 raise ValueError("Unknown Morgan fingerprint type") 305 if not hasattr(mol, '_fpInfo'): 306 info = {} 307 # get the fingerprint 308 if fpType == 'bv': 309 molFp = rdMD.GetMorganFingerprintAsBitVect(mol, radius, nBits=nBits, useFeatures=useFeatures, 310 bitInfo=info, **kwargs) 311 else: 312 molFp = rdMD.GetMorganFingerprint(mol, radius, useFeatures=useFeatures, bitInfo=info, 313 **kwargs) 314 # construct the bit map 315 if fpType == 'bv': 316 bitmap = [DataStructs.ExplicitBitVect(nBits) for _ in range(mol.GetNumAtoms())] 317 else: 318 bitmap = [[] for _ in range(mol.GetNumAtoms())] 319 for bit, es in iteritems(info): 320 for at1, rad in es: 321 if rad == 0: # for radius 0 322 if fpType == 'bv': 323 bitmap[at1][bit] = 1 324 else: 325 bitmap[at1].append(bit) 326 else: # for radii > 0 327 env = Chem.FindAtomEnvironmentOfRadiusN(mol, rad, at1) 328 amap = {} 329 Chem.PathToSubmol(mol, env, atomMap=amap) 330 for at2 in amap.keys(): 331 if fpType == 'bv': 332 bitmap[at2][bit] = 1 333 else: 334 bitmap[at2].append(bit) 335 mol._fpInfo = (molFp, bitmap) 336 337 if atomId < 0: 338 return mol._fpInfo[0] 339 else: # remove the bits of atomId 340 if atomId >= mol.GetNumAtoms(): 341 raise ValueError("atom index greater than number of atoms") 342 if len(mol._fpInfo) != 2: 343 raise ValueError("_fpInfo not set") 344 if fpType == 'bv': 345 molFp = mol._fpInfo[0] ^ mol._fpInfo[1][atomId] # xor 346 else: # count 347 molFp = copy.deepcopy(mol._fpInfo[0]) 348 # delete the bits with atomId 349 for bit in mol._fpInfo[1][atomId]: 350 molFp[bit] -= 1 351 return molFp
352 353 354 # usage: lambda m,i: GetRDKFingerprint(m, i, fpType, nBits, minPath, maxPath, nBitsPerHash)
355 -def GetRDKFingerprint(mol, atomId=-1, fpType='bv', nBits=2048, minPath=1, maxPath=5, nBitsPerHash=2, 356 **kwargs):
357 """ 358 Calculates the RDKit fingerprint with the paths of atomId removed. 359 360 Parameters: 361 mol -- the molecule of interest 362 atomId -- the atom to remove the paths for (if -1, no path is removed) 363 fpType -- the type of RDKit fingerprint: 'bv' 364 nBits -- the size of the bit vector 365 minPath -- minimum path length 366 maxPath -- maximum path length 367 nBitsPerHash -- number of to set per path 368 """ 369 if fpType not in ['bv', '']: 370 raise ValueError("Unknown RDKit fingerprint type") 371 fpType = 'bv' 372 if not hasattr(mol, '_fpInfo'): 373 info = [] # list with bits for each atom 374 # get the fingerprint 375 molFp = Chem.RDKFingerprint(mol, fpSize=nBits, minPath=minPath, maxPath=maxPath, 376 nBitsPerHash=nBitsPerHash, atomBits=info, **kwargs) 377 mol._fpInfo = (molFp, info) 378 379 if atomId < 0: 380 return mol._fpInfo[0] 381 else: # remove the bits of atomId 382 if atomId >= mol.GetNumAtoms(): 383 raise ValueError("atom index greater than number of atoms") 384 if len(mol._fpInfo) != 2: 385 raise ValueError("_fpInfo not set") 386 molFp = copy.deepcopy(mol._fpInfo[0]) 387 molFp.UnSetBitsFromList(mol._fpInfo[1][atomId]) 388 return molFp
389