Package rdkit :: Package Chem :: Module MolSurf
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.MolSurf

  1  # 
  2  # Copyright (C) 2001-2008 greg Landrum and Rational Discovery LLC 
  3  # 
  4  #   @@ All Rights Reserved @@ 
  5  #  This file is part of the RDKit. 
  6  #  The contents are covered by the terms of the BSD license 
  7  #  which is included in the file license.txt, found at the root 
  8  #  of the RDKit source tree. 
  9  # 
 10  """ Exposes functionality for MOE-like approximate molecular surface area 
 11  descriptors. 
 12   
 13    The MOE-like VSA descriptors are also calculated here 
 14   
 15  """ 
 16  from __future__ import print_function 
 17   
 18  import bisect 
 19   
 20  import numpy 
 21   
 22  from rdkit import Chem 
 23  from rdkit.Chem import Crippen 
 24  from rdkit.Chem import rdPartialCharges, rdMolDescriptors 
 25  from rdkit.Chem.PeriodicTable import numTable 
 26   
 27   
 28  radCol = 5 
 29  bondScaleFacts = [.1, 0, .2, .3]  # aromatic,single,double,triple 
 30   
 31   
32 -def _LabuteHelper(mol, includeHs=1, force=0):
33 """ *Internal Use Only* 34 helper function for LabuteASA calculation 35 returns an array of atomic contributions to the ASA 36 37 **Note:** Changes here affect the version numbers of all ASA descriptors 38 39 """ 40 if not force: 41 try: 42 res = mol._labuteContribs 43 except AttributeError: 44 pass 45 else: 46 if res: 47 return res 48 tpl = rdMolDescriptors._CalcLabuteASAContribs(mol, includeHs) 49 ats, hs = tpl 50 Vi = [hs] + list(ats) 51 mol._labuteContribs = Vi 52 return Vi
53 54
55 -def _pyLabuteHelper(mol, includeHs=1, force=0):
56 """ *Internal Use Only* 57 helper function for LabuteASA calculation 58 returns an array of atomic contributions to the ASA 59 60 **Note:** Changes here affect the version numbers of all ASA descriptors 61 62 """ 63 import math 64 if not force: 65 try: 66 res = mol._labuteContribs 67 except AttributeError: 68 pass 69 else: 70 if res.all(): 71 return res 72 73 nAts = mol.GetNumAtoms() 74 Vi = numpy.zeros(nAts + 1, 'd') 75 rads = numpy.zeros(nAts + 1, 'd') 76 77 # 0 contains the H information 78 rads[0] = numTable[1][radCol] 79 for i in range(nAts): 80 rads[i + 1] = numTable[mol.GetAtomWithIdx(i).GetAtomicNum()][radCol] 81 82 # start with explicit bonds 83 for bond in mol.GetBonds(): 84 idx1 = bond.GetBeginAtomIdx() + 1 85 idx2 = bond.GetEndAtomIdx() + 1 86 Ri = rads[idx1] 87 Rj = rads[idx2] 88 89 if not bond.GetIsAromatic(): 90 bij = Ri + Rj - bondScaleFacts[bond.GetBondType()] 91 else: 92 bij = Ri + Rj - bondScaleFacts[0] 93 dij = min(max(abs(Ri - Rj), bij), Ri + Rj) 94 Vi[idx1] += Rj * Rj - (Ri - dij) ** 2 / dij 95 Vi[idx2] += Ri * Ri - (Rj - dij) ** 2 / dij 96 97 # add in hydrogens 98 if includeHs: 99 j = 0 100 Rj = rads[j] 101 for i in range(1, nAts + 1): 102 Ri = rads[i] 103 bij = Ri + Rj 104 dij = min(max(abs(Ri - Rj), bij), Ri + Rj) 105 Vi[i] += Rj * Rj - (Ri - dij) ** 2 / dij 106 Vi[j] += Ri * Ri - (Rj - dij) ** 2 / dij 107 108 for i in range(nAts + 1): 109 Ri = rads[i] 110 Vi[i] = 4 * math.pi * Ri ** 2 - math.pi * Ri * Vi[i] 111 112 mol._labuteContribs = Vi 113 return Vi
114 115 # def SMR_VSA(mol,bins=[0.11,0.26,0.35,0.39,0.44,0.485,0.56]): 116 # original default bins from assuming Labute values are logs 117 # mrBins=[1.29, 1.82, 2.24, 2.45, 2.75, 3.05, 3.63] 118 mrBins = [1.29, 1.82, 2.24, 2.45, 2.75, 3.05, 3.63, 3.8, 4.0] 119 120
121 -def pySMR_VSA_(mol, bins=None, force=1):
122 """ *Internal Use Only* 123 """ 124 if not force: 125 try: 126 res = mol._smrVSA 127 except AttributeError: 128 pass 129 else: 130 if res.all(): 131 return res 132 133 if bins is None: 134 bins = mrBins 135 Crippen._Init() 136 propContribs = Crippen._GetAtomContribs(mol, force=force) 137 volContribs = _LabuteHelper(mol) 138 139 ans = numpy.zeros(len(bins) + 1, 'd') 140 for i in range(len(propContribs)): 141 prop = propContribs[i] 142 vol = volContribs[i + 1] 143 if prop is not None: 144 bin_ = bisect.bisect_right(bins, prop[1]) 145 ans[bin_] += vol 146 147 mol._smrVSA = ans 148 return ans
149 150 151 SMR_VSA_ = rdMolDescriptors.SMR_VSA_ 152 153 # 154 # Original bins (from Labute paper) are: 155 # [-0.4,-0.2,0,0.1,0.15,0.2,0.25,0.3,0.4] 156 # 157 logpBins = [-0.4, -0.2, 0, 0.1, 0.15, 0.2, 0.25, 0.3, 0.4, 0.5, 0.6] 158 159
160 -def pySlogP_VSA_(mol, bins=None, force=1):
161 """ *Internal Use Only* 162 """ 163 if not force: 164 try: 165 res = mol._slogpVSA 166 except AttributeError: 167 pass 168 else: 169 if res.all(): 170 return res 171 172 if bins is None: 173 bins = logpBins 174 Crippen._Init() 175 propContribs = Crippen._GetAtomContribs(mol, force=force) 176 volContribs = _LabuteHelper(mol) 177 178 ans = numpy.zeros(len(bins) + 1, 'd') 179 for i in range(len(propContribs)): 180 prop = propContribs[i] 181 vol = volContribs[i + 1] 182 if prop is not None: 183 bin_ = bisect.bisect_right(bins, prop[0]) 184 ans[bin_] += vol 185 186 mol._slogpVSA = ans 187 return ans
188 189 190 SlogP_VSA_ = rdMolDescriptors.SlogP_VSA_ 191 192 chgBins = [-.3, -.25, -.20, -.15, -.10, -.05, 0, .05, .10, .15, .20, .25, .30] 193 194
195 -def pyPEOE_VSA_(mol, bins=None, force=1):
196 """ *Internal Use Only* 197 """ 198 if not force: 199 try: 200 res = mol._peoeVSA 201 except AttributeError: 202 pass 203 else: 204 if res.all(): 205 return res 206 if bins is None: 207 bins = chgBins 208 Crippen._Init() 209 # print('\ts:',repr(mol.GetMol())) 210 # print('\t\t:',len(mol.GetAtoms())) 211 rdPartialCharges.ComputeGasteigerCharges(mol) 212 213 # propContribs = [float(x.GetProp('_GasteigerCharge')) for x in mol.GetAtoms()] 214 propContribs = [] 215 for at in mol.GetAtoms(): 216 p = at.GetProp('_GasteigerCharge') 217 try: 218 v = float(p) 219 except ValueError: 220 v = 0.0 221 propContribs.append(v) 222 volContribs = _LabuteHelper(mol) 223 224 ans = numpy.zeros(len(bins) + 1, 'd') 225 for i in range(len(propContribs)): 226 prop = propContribs[i] 227 vol = volContribs[i + 1] 228 if prop is not None: 229 bin_ = bisect.bisect_right(bins, prop) 230 ans[bin_] += vol 231 232 mol._peoeVSA = ans 233 return ans
234 235 236 PEOE_VSA_ = rdMolDescriptors.PEOE_VSA_ 237 238 239 # ------------------------------------------------- 240 # install the various VSA descriptors in the namespace
241 -def _InstallDescriptors():
242 for i in range(len(mrBins)): 243 fn = lambda x, y = i: SMR_VSA_(x, force=0)[y] 244 if i > 0: 245 fn.__doc__ = "MOE MR VSA Descriptor %d (% 4.2f <= x < % 4.2f)" % (i + 1, mrBins[i - 1], 246 mrBins[i]) 247 else: 248 fn.__doc__ = "MOE MR VSA Descriptor %d (-inf < x < % 4.2f)" % (i + 1, mrBins[i]) 249 name = "SMR_VSA%d" % (i + 1) 250 fn.version = "1.0.1" 251 globals()[name] = fn 252 i += 1 253 fn = lambda x, y = i: SMR_VSA_(x, force=0)[y] 254 fn.__doc__ = "MOE MR VSA Descriptor %d (% 4.2f <= x < inf)" % (i + 1, mrBins[i - 1]) 255 fn.version = "1.0.1" 256 name = "SMR_VSA%d" % (i + 1) 257 globals()[name] = fn 258 259 for i in range(len(logpBins)): 260 fn = lambda x, y = i: SlogP_VSA_(x, force=0)[y] 261 if i > 0: 262 fn.__doc__ = "MOE logP VSA Descriptor %d (% 4.2f <= x < % 4.2f)" % (i + 1, logpBins[i - 1], 263 logpBins[i]) 264 else: 265 fn.__doc__ = "MOE logP VSA Descriptor %d (-inf < x < % 4.2f)" % (i + 1, logpBins[i]) 266 name = "SlogP_VSA%d" % (i + 1) 267 fn.version = "1.0.1" 268 globals()[name] = fn 269 i += 1 270 fn = lambda x, y = i: SlogP_VSA_(x, force=0)[y] 271 fn.__doc__ = "MOE logP VSA Descriptor %d (% 4.2f <= x < inf)" % (i + 1, logpBins[i - 1]) 272 fn.version = "1.0.1" 273 name = "SlogP_VSA%d" % (i + 1) 274 globals()[name] = fn 275 276 for i in range(len(chgBins)): 277 fn = lambda x, y = i: PEOE_VSA_(x, force=0)[y] 278 if i > 0: 279 fn.__doc__ = "MOE Charge VSA Descriptor %d (% 4.2f <= x < % 4.2f)" % (i + 1, chgBins[i - 1], 280 chgBins[i]) 281 else: 282 fn.__doc__ = "MOE Charge VSA Descriptor %d (-inf < x < % 4.2f)" % (i + 1, chgBins[i]) 283 name = "PEOE_VSA%d" % (i + 1) 284 fn.version = "1.0.1" 285 globals()[name] = fn 286 i += 1 287 fn = lambda x, y = i: PEOE_VSA_(x, force=0)[y] 288 fn.version = "1.0.1" 289 fn.__doc__ = "MOE Charge VSA Descriptor %d (% 4.2f <= x < inf)" % (i + 1, chgBins[i - 1]) 290 name = "PEOE_VSA%d" % (i + 1) 291 globals()[name] = fn 292 fn = None
293 # Change log for the MOE-type descriptors: 294 # version 1.0.1: optimizations, values unaffected 295 296 297 _InstallDescriptors() 298 299
300 -def pyLabuteASA(mol, includeHs=1):
301 """ calculates Labute's Approximate Surface Area (ASA from MOE) 302 303 Definition from P. Labute's article in the Journal of the Chemical Computing Group 304 and J. Mol. Graph. Mod. _18_ 464-477 (2000) 305 306 """ 307 Vi = _LabuteHelper(mol, includeHs=includeHs) 308 return sum(Vi)
309 310 311 pyLabuteASA.version = "1.0.1" 312 # Change log for LabuteASA: 313 # version 1.0.1: optimizations, values unaffected 314 LabuteASA = lambda *x, **y: rdMolDescriptors.CalcLabuteASA(*x, **y) 315 LabuteASA.version = rdMolDescriptors._CalcLabuteASA_version 316 317
318 -def _pyTPSAContribs(mol, verbose=False):
319 """ DEPRECATED: this has been reimplmented in C++ 320 calculates atomic contributions to a molecules TPSA 321 322 Algorithm described in: 323 P. Ertl, B. Rohde, P. Selzer 324 Fast Calculation of Molecular Polar Surface Area as a Sum of Fragment-based 325 Contributions and Its Application to the Prediction of Drug Transport 326 Properties, J.Med.Chem. 43, 3714-3717, 2000 327 328 Implementation based on the Daylight contrib program tpsa.c 329 330 NOTE: The JMC paper describing the TPSA algorithm includes 331 contributions from sulfur and phosphorus, however according to 332 Peter Ertl (personal communication, 2010) the correlation of TPSA 333 with various ADME properties is better if only contributions from 334 oxygen and nitrogen are used. This matches the daylight contrib 335 implementation. 336 337 """ 338 res = [0] * mol.GetNumAtoms() 339 for i in range(mol.GetNumAtoms()): 340 atom = mol.GetAtomWithIdx(i) 341 atNum = atom.GetAtomicNum() 342 if atNum in [7, 8]: 343 nHs = atom.GetTotalNumHs() 344 chg = atom.GetFormalCharge() 345 in3Ring = atom.IsInRingSize(3) 346 347 bonds = atom.GetBonds() 348 numNeighbors = atom.GetDegree() 349 nSing = 0 350 nDoub = 0 351 nTrip = 0 352 nArom = 0 353 for bond in bonds: 354 otherAt = bond.GetOtherAtom(atom) 355 if otherAt.GetAtomicNum() != 1: 356 if bond.GetIsAromatic(): 357 nArom += 1 358 else: 359 order = bond.GetBondType() 360 if order == Chem.BondType.SINGLE: 361 nSing += 1 362 elif order == Chem.BondType.DOUBLE: 363 nDoub += 1 364 elif order == Chem.BondType.TRIPLE: 365 nTrip += 1 366 else: 367 numNeighbors -= 1 368 nHs += 1 369 tmp = -1 370 if atNum == 7: 371 if numNeighbors == 1: 372 if nHs == 0 and nTrip == 1 and chg == 0: 373 tmp = 23.79 374 elif nHs == 1 and nDoub == 1 and chg == 0: 375 tmp = 23.85 376 elif nHs == 2 and nSing == 1 and chg == 0: 377 tmp = 26.02 378 elif nHs == 2 and nDoub == 1 and chg == 1: 379 tmp = 25.59 380 elif nHs == 3 and nSing == 1 and chg == 1: 381 tmp = 27.64 382 elif numNeighbors == 2: 383 if nHs == 0 and nSing == 1 and nDoub == 1 and chg == 0: 384 tmp = 12.36 385 elif nHs == 0 and nTrip == 1 and nDoub == 1 and chg == 0: 386 tmp = 13.60 387 elif nHs == 1 and nSing == 2 and chg == 0: 388 if not in3Ring: 389 tmp = 12.03 390 else: 391 tmp = 21.94 392 elif nHs == 0 and nTrip == 1 and nSing == 1 and chg == 1: 393 tmp = 4.36 394 elif nHs == 1 and nDoub == 1 and nSing == 1 and chg == 1: 395 tmp = 13.97 396 elif nHs == 2 and nSing == 2 and chg == 1: 397 tmp = 16.61 398 elif nHs == 0 and nArom == 2 and chg == 0: 399 tmp = 12.89 400 elif nHs == 1 and nArom == 2 and chg == 0: 401 tmp = 15.79 402 elif nHs == 1 and nArom == 2 and chg == 1: 403 tmp = 14.14 404 elif numNeighbors == 3: 405 if nHs == 0 and nSing == 3 and chg == 0: 406 if not in3Ring: 407 tmp = 3.24 408 else: 409 tmp = 3.01 410 elif nHs == 0 and nSing == 1 and nDoub == 2 and chg == 0: 411 tmp = 11.68 412 elif nHs == 0 and nSing == 2 and nDoub == 1 and chg == 1: 413 tmp = 3.01 414 elif nHs == 1 and nSing == 3 and chg == 1: 415 tmp = 4.44 416 elif nHs == 0 and nArom == 3 and chg == 0: 417 tmp = 4.41 418 elif nHs == 0 and nSing == 1 and nArom == 2 and chg == 0: 419 tmp = 4.93 420 elif nHs == 0 and nDoub == 1 and nArom == 2 and chg == 0: 421 tmp = 8.39 422 elif nHs == 0 and nArom == 3 and chg == 1: 423 tmp = 4.10 424 elif nHs == 0 and nSing == 1 and nArom == 2 and chg == 1: 425 tmp = 3.88 426 elif numNeighbors == 4: 427 if nHs == 0 and nSing == 4 and chg == 1: 428 tmp = 0.00 429 if tmp < 0.0: 430 tmp = 30.5 - numNeighbors * 8.2 + nHs * 1.5 431 if tmp < 0.0: 432 tmp = 0.0 433 elif atNum == 8: 434 if numNeighbors == 1: 435 if nHs == 0 and nDoub == 1 and chg == 0: 436 tmp = 17.07 437 elif nHs == 1 and nSing == 1 and chg == 0: 438 tmp = 20.23 439 elif nHs == 0 and nSing == 1 and chg == -1: 440 tmp = 23.06 441 elif numNeighbors == 2: 442 if nHs == 0 and nSing == 2 and chg == 0: 443 if not in3Ring: 444 tmp = 9.23 445 else: 446 tmp = 12.53 447 elif nHs == 0 and nArom == 2 and chg == 0: 448 tmp = 13.14 449 450 if tmp < 0.0: 451 tmp = 28.5 - numNeighbors * 8.6 + nHs * 1.5 452 if tmp < 0.0: 453 tmp = 0.0 454 if verbose: 455 print('\t', atom.GetIdx(), atom.GetSymbol(), atNum, nHs, nSing, nDoub, nTrip, nArom, chg, 456 tmp) 457 458 res[atom.GetIdx()] = tmp 459 return res
460 461
462 -def _pyTPSA(mol, verbose=False):
463 """ DEPRECATED: this has been reimplmented in C++ 464 calculates the polar surface area of a molecule based upon fragments 465 466 Algorithm in: 467 P. Ertl, B. Rohde, P. Selzer 468 Fast Calculation of Molecular Polar Surface Area as a Sum of Fragment-based 469 Contributions and Its Application to the Prediction of Drug Transport 470 Properties, J.Med.Chem. 43, 3714-3717, 2000 471 472 Implementation based on the Daylight contrib program tpsa.c 473 """ 474 contribs = _pyTPSAContribs(mol, verbose=verbose) 475 res = 0.0 476 for contrib in contribs: 477 res += contrib 478 return res
479 480 481 _pyTPSA.version = "1.0.1" 482 483 TPSA = lambda *x, **y: rdMolDescriptors.CalcTPSA(*x, **y) 484 TPSA.version = rdMolDescriptors._CalcTPSA_version 485 486 if __name__ == '__main__': # pragma: nocover 487 smis = ['C', 'CC', 'CCC', 'CCCC', 'CO', 'CCO', 'COC'] 488 smis = ['C(=O)O', 'c1ccccc1'] 489 for smi in smis: 490 m = Chem.MolFromSmiles(smi) 491 # print(smi, LabuteASA(m)) 492 print('-----------\n', smi) 493 # print('M:',['% 4.2f'%x for x in SMR_VSA_(m)]) 494 # print('L:',['% 4.2f'%x for x in SlogP_VSA_(m)]) 495 print('P:', ['% 4.2f' % x for x in PEOE_VSA_(m)]) 496 print('P:', ['% 4.2f' % x for x in PEOE_VSA_(m)]) 497 print() 498