Package rdkit :: Package Chem :: Package Pharm3D :: Module EmbedLib
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.Pharm3D.EmbedLib

   1  # 
   2  # Copyright (C) 2004-2008 Greg Landrum and Rational Discovery LLC 
   3  # 
   4  #   @@ All Rights Reserved @@ 
   5  #  This file is part of the RDKit. 
   6  #  The contents are covered by the terms of the BSD license 
   7  #  which is included in the file license.txt, found at the root 
   8  #  of the RDKit source tree. 
   9  # 
  10  from __future__ import print_function 
  11   
  12  import math 
  13  import sys 
  14  import time 
  15   
  16  import numpy 
  17   
  18  from rdkit import Chem 
  19  from rdkit import RDLogger as logging 
  20  from rdkit.Chem import ChemicalFeatures 
  21  from rdkit.Chem import ChemicalForceFields 
  22  from rdkit.Chem import rdDistGeom as MolDG 
  23  from rdkit.Chem.Pharm3D import ExcludedVolume 
  24  import rdkit.DistanceGeometry as DG 
  25  from rdkit.ML.Data import Stats 
  26   
  27  _times = {} 
  28   
  29  logger = logging.logger() 
  30  defaultFeatLength = 2.0 
  31   
  32   
33 -def GetAtomHeavyNeighbors(atom):
34 """ returns a list of the heavy-atom neighbors of the 35 atom passed in: 36 37 >>> m = Chem.MolFromSmiles('CCO') 38 >>> l = GetAtomHeavyNeighbors(m.GetAtomWithIdx(0)) 39 >>> len(l) 40 1 41 >>> isinstance(l[0],Chem.Atom) 42 True 43 >>> l[0].GetIdx() 44 1 45 46 >>> l = GetAtomHeavyNeighbors(m.GetAtomWithIdx(1)) 47 >>> len(l) 48 2 49 >>> l[0].GetIdx() 50 0 51 >>> l[1].GetIdx() 52 2 53 54 """ 55 res = [] 56 for nbr in atom.GetNeighbors(): 57 if nbr.GetAtomicNum() != 1: 58 res.append(nbr) 59 return res
60 61
62 -def ReplaceGroup(match, bounds, slop=0.01, useDirs=False, dirLength=defaultFeatLength):
63 """ Adds an entry at the end of the bounds matrix for a point at 64 the center of a multi-point feature 65 66 returns a 2-tuple: 67 new bounds mat 68 index of point added 69 70 >>> boundsMat = numpy.array([[0.0,2.0,2.0],[1.0,0.0,2.0],[1.0,1.0,0.0]]) 71 >>> match = [0,1,2] 72 >>> bm,idx = ReplaceGroup(match,boundsMat,slop=0.0) 73 74 the index is at the end: 75 >>> idx == 3 76 True 77 78 and the matrix is one bigger: 79 >>> bm.shape == (4, 4) 80 True 81 82 but the original bounds mat is not altered: 83 >>> boundsMat.shape == (3, 3) 84 True 85 86 87 We make the assumption that the points of the 88 feature form a regular polygon, are listed in order 89 (i.e. pt 0 is a neighbor to pt 1 and pt N-1) 90 and that the replacement point goes at the center: 91 >>> print(', '.join(['%.3f'%x for x in bm[-1]])) 92 0.577, 0.577, 0.577, 0.000 93 >>> print(', '.join(['%.3f'%x for x in bm[:,-1]])) 94 1.155, 1.155, 1.155, 0.000 95 96 The slop argument (default = 0.01) is fractional: 97 >>> bm,idx = ReplaceGroup(match,boundsMat) 98 >>> print(', '.join(['%.3f'%x for x in bm[-1]])) 99 0.572, 0.572, 0.572, 0.000 100 >>> print(', '.join(['%.3f'%x for x in bm[:,-1]])) 101 1.166, 1.166, 1.166, 0.000 102 103 104 """ 105 maxVal = -1000.0 106 minVal = 1e8 107 nPts = len(match) 108 for i in range(nPts): 109 idx0 = match[i] 110 if i < nPts - 1: 111 idx1 = match[i + 1] 112 else: 113 idx1 = match[0] 114 if idx1 < idx0: 115 idx0, idx1 = idx1, idx0 116 minVal = min(minVal, bounds[idx1, idx0]) 117 maxVal = max(maxVal, bounds[idx0, idx1]) 118 maxVal *= (1 + slop) 119 minVal *= (1 - slop) 120 121 scaleFact = 1.0 / (2.0 * math.sin(math.pi / nPts)) 122 minVal *= scaleFact 123 maxVal *= scaleFact 124 125 replaceIdx = bounds.shape[0] 126 if not useDirs: 127 bm = numpy.zeros((bounds.shape[0] + 1, bounds.shape[1] + 1), numpy.float) 128 else: 129 bm = numpy.zeros((bounds.shape[0] + 2, bounds.shape[1] + 2), numpy.float) 130 bm[0:bounds.shape[0], 0:bounds.shape[1]] = bounds 131 bm[:replaceIdx, replaceIdx] = 1000. 132 133 if useDirs: 134 bm[:replaceIdx + 1, replaceIdx + 1] = 1000. 135 # set the feature - direction point bounds: 136 bm[replaceIdx, replaceIdx + 1] = dirLength + slop 137 bm[replaceIdx + 1, replaceIdx] = dirLength - slop 138 139 for idx1 in match: 140 bm[idx1, replaceIdx] = maxVal 141 bm[replaceIdx, idx1] = minVal 142 if useDirs: 143 # set the point - direction point bounds: 144 bm[idx1, replaceIdx + 1] = numpy.sqrt(bm[replaceIdx, replaceIdx + 1] ** 2 + maxVal ** 2) 145 bm[replaceIdx + 1, idx1] = numpy.sqrt(bm[replaceIdx + 1, replaceIdx] ** 2 + minVal ** 2) 146 return bm, replaceIdx
147 148
149 -def EmbedMol(mol, bm, atomMatch=None, weight=2.0, randomSeed=-1, excludedVolumes=None):
150 """ Generates an embedding for a molecule based on a bounds matrix and adds 151 a conformer (id 0) to the molecule 152 153 if the optional argument atomMatch is provided, it will be used to provide 154 supplemental weights for the embedding routine (used in the optimization 155 phase to ensure that the resulting geometry really does satisfy the 156 pharmacophore). 157 158 if the excludedVolumes is provided, it should be a sequence of 159 ExcludedVolume objects 160 161 >>> m = Chem.MolFromSmiles('c1ccccc1C') 162 >>> bounds = MolDG.GetMoleculeBoundsMatrix(m) 163 >>> bounds.shape == (7, 7) 164 True 165 >>> m.GetNumConformers() 166 0 167 >>> EmbedMol(m,bounds,randomSeed=23) 168 >>> m.GetNumConformers() 169 1 170 171 172 """ 173 nAts = mol.GetNumAtoms() 174 weights = [] 175 if atomMatch: 176 for i in range(len(atomMatch)): 177 for j in range(i + 1, len(atomMatch)): 178 weights.append((i, j, weight)) 179 if excludedVolumes: 180 for vol in excludedVolumes: 181 idx = vol.index 182 # excluded volumes affect every other atom: 183 for i in range(nAts): 184 weights.append((i, idx, weight)) 185 coords = DG.EmbedBoundsMatrix(bm, weights=weights, numZeroFail=1, randomSeed=randomSeed) 186 # for row in coords: 187 # print(', '.join(['%.2f'%x for x in row])) 188 189 conf = Chem.Conformer(nAts) 190 conf.SetId(0) 191 for i in range(nAts): 192 conf.SetAtomPosition(i, list(coords[i])) 193 if excludedVolumes: 194 for vol in excludedVolumes: 195 vol.pos = numpy.array(coords[vol.index]) 196 197 print(' % 7.4f % 7.4f % 7.4f Ar 0 0 0 0 0 0 0 0 0 0 0 0' % tuple(coords[-1]), 198 file=sys.stderr) 199 mol.AddConformer(conf)
200 201
202 -def AddExcludedVolumes(bm, excludedVolumes, smoothIt=True):
203 """ Adds a set of excluded volumes to the bounds matrix 204 and returns the new matrix 205 206 excludedVolumes is a list of ExcludedVolume objects 207 208 209 >>> boundsMat = numpy.array([[0.0,2.0,2.0],[1.0,0.0,2.0],[1.0,1.0,0.0]]) 210 >>> ev1 = ExcludedVolume.ExcludedVolume(([(0,),0.5,1.0],),exclusionDist=1.5) 211 >>> bm = AddExcludedVolumes(boundsMat,(ev1,)) 212 213 the results matrix is one bigger: 214 >>> bm.shape == (4, 4) 215 True 216 217 and the original bounds mat is not altered: 218 >>> boundsMat.shape == (3, 3) 219 True 220 221 >>> print(', '.join(['%.3f'%x for x in bm[-1]])) 222 0.500, 1.500, 1.500, 0.000 223 >>> print(', '.join(['%.3f'%x for x in bm[:,-1]])) 224 1.000, 3.000, 3.000, 0.000 225 226 """ 227 oDim = bm.shape[0] 228 dim = oDim + len(excludedVolumes) 229 res = numpy.zeros((dim, dim), numpy.float) 230 res[:oDim, :oDim] = bm 231 for i, vol in enumerate(excludedVolumes): 232 bmIdx = oDim + i 233 vol.index = bmIdx 234 235 # set values to all the atoms: 236 res[bmIdx, :bmIdx] = vol.exclusionDist 237 res[:bmIdx, bmIdx] = 1000.0 238 239 # set values to our defining features: 240 for indices, minV, maxV in vol.featInfo: 241 for index in indices: 242 try: 243 res[bmIdx, index] = minV 244 res[index, bmIdx] = maxV 245 except IndexError: 246 logger.error('BAD INDEX: res[%d,%d], shape is %s' % (bmIdx, index, str(res.shape))) 247 raise IndexError 248 249 # set values to other excluded volumes: 250 for j in range(bmIdx + 1, dim): 251 res[bmIdx, j:dim] = 0 252 res[j:dim, bmIdx] = 1000 253 254 if smoothIt: 255 DG.DoTriangleSmoothing(res) 256 return res
257 258
259 -def UpdatePharmacophoreBounds(bm, atomMatch, pcophore, useDirs=False, dirLength=defaultFeatLength, 260 mol=None):
261 """ loops over a distance bounds matrix and replaces the elements 262 that are altered by a pharmacophore 263 264 **NOTE** this returns the resulting bounds matrix, but it may also 265 alter the input matrix 266 267 atomMatch is a sequence of sequences containing atom indices 268 for each of the pharmacophore's features. 269 270 >>> from rdkit import Geometry 271 >>> from rdkit.Chem.Pharm3D import Pharmacophore 272 >>> feats = [ 273 ... ChemicalFeatures.FreeChemicalFeature('HBondAcceptor', 'HAcceptor1', 274 ... Geometry.Point3D(0.0, 0.0, 0.0)), 275 ... ChemicalFeatures.FreeChemicalFeature('HBondDonor', 'HDonor1', 276 ... Geometry.Point3D(2.65, 0.0, 0.0)), 277 ... ] 278 >>> pcophore=Pharmacophore.Pharmacophore(feats) 279 >>> pcophore.setLowerBound(0,1, 1.0) 280 >>> pcophore.setUpperBound(0,1, 2.0) 281 282 >>> boundsMat = numpy.array([[0.0,3.0,3.0],[2.0,0.0,3.0],[2.0,2.0,0.0]]) 283 >>> atomMatch = ((0,),(1,)) 284 >>> bm = UpdatePharmacophoreBounds(boundsMat,atomMatch,pcophore) 285 286 287 In this case, there are no multi-atom features, so the result matrix 288 is the same as the input: 289 >>> bm is boundsMat 290 True 291 292 this means, of course, that the input boundsMat is altered: 293 >>> print(', '.join(['%.3f'%x for x in boundsMat[0]])) 294 0.000, 2.000, 3.000 295 >>> print(', '.join(['%.3f'%x for x in boundsMat[1]])) 296 1.000, 0.000, 3.000 297 >>> print(', '.join(['%.3f'%x for x in boundsMat[2]])) 298 2.000, 2.000, 0.000 299 300 """ 301 replaceMap = {} 302 for i, matchI in enumerate(atomMatch): 303 if len(matchI) > 1: 304 bm, replaceIdx = ReplaceGroup(matchI, bm, useDirs=useDirs) 305 replaceMap[i] = replaceIdx 306 307 for i, matchI in enumerate(atomMatch): 308 mi = replaceMap.get(i, matchI[0]) 309 for j in range(i + 1, len(atomMatch)): 310 mj = replaceMap.get(j, atomMatch[j][0]) 311 if mi < mj: 312 idx0, idx1 = mi, mj 313 else: 314 idx0, idx1 = mj, mi 315 bm[idx0, idx1] = pcophore.getUpperBound(i, j) 316 bm[idx1, idx0] = pcophore.getLowerBound(i, j) 317 318 return bm
319 320
321 -def EmbedPharmacophore(mol, atomMatch, pcophore, randomSeed=-1, count=10, smoothFirst=True, 322 silent=False, bounds=None, excludedVolumes=None, targetNumber=-1, 323 useDirs=False):
324 """ Generates one or more embeddings for a molecule that satisfy a pharmacophore 325 326 atomMatch is a sequence of sequences containing atom indices 327 for each of the pharmacophore's features. 328 329 - count: is the maximum number of attempts to make a generating an embedding 330 - smoothFirst: toggles triangle smoothing of the molecular bounds matix 331 - bounds: if provided, should be the molecular bounds matrix. If this isn't 332 provided, the matrix will be generated. 333 - targetNumber: if this number is positive, it provides a maximum number 334 of embeddings to generate (i.e. we'll have count attempts to generate 335 targetNumber embeddings). 336 337 returns: a 3 tuple: 338 1) the molecular bounds matrix adjusted for the pharmacophore 339 2) a list of embeddings (molecules with a single conformer) 340 3) the number of failed attempts at embedding 341 342 >>> from rdkit import Geometry 343 >>> from rdkit.Chem.Pharm3D import Pharmacophore 344 >>> m = Chem.MolFromSmiles('OCCN') 345 >>> feats = [ 346 ... ChemicalFeatures.FreeChemicalFeature('HBondAcceptor', 'HAcceptor1', 347 ... Geometry.Point3D(0.0, 0.0, 0.0)), 348 ... ChemicalFeatures.FreeChemicalFeature('HBondDonor', 'HDonor1', 349 ... Geometry.Point3D(2.65, 0.0, 0.0)), 350 ... ] 351 >>> pcophore=Pharmacophore.Pharmacophore(feats) 352 >>> pcophore.setLowerBound(0,1, 2.5) 353 >>> pcophore.setUpperBound(0,1, 3.5) 354 >>> atomMatch = ((0,),(3,)) 355 356 >>> bm,embeds,nFail = EmbedPharmacophore(m,atomMatch,pcophore,randomSeed=23,silent=1) 357 >>> len(embeds) 358 10 359 >>> nFail 360 0 361 362 Set up a case that can't succeed: 363 >>> pcophore=Pharmacophore.Pharmacophore(feats) 364 >>> pcophore.setLowerBound(0,1, 2.0) 365 >>> pcophore.setUpperBound(0,1, 2.1) 366 >>> atomMatch = ((0,),(3,)) 367 368 >>> bm,embeds,nFail = EmbedPharmacophore(m,atomMatch,pcophore,randomSeed=23,silent=1) 369 >>> len(embeds) 370 0 371 >>> nFail 372 10 373 374 """ 375 global _times 376 if not hasattr(mol, '_chiralCenters'): 377 mol._chiralCenters = Chem.FindMolChiralCenters(mol) 378 379 if bounds is None: 380 bounds = MolDG.GetMoleculeBoundsMatrix(mol) 381 if smoothFirst: 382 DG.DoTriangleSmoothing(bounds) 383 384 bm = bounds.copy() 385 # print '------------' 386 # print 'initial' 387 # for row in bm: 388 # print ' ',' '.join(['% 4.2f'%x for x in row]) 389 # print '------------' 390 391 bm = UpdatePharmacophoreBounds(bm, atomMatch, pcophore, useDirs=useDirs, mol=mol) 392 393 if excludedVolumes: 394 bm = AddExcludedVolumes(bm, excludedVolumes, smoothIt=False) 395 396 if not DG.DoTriangleSmoothing(bm): 397 raise ValueError("could not smooth bounds matrix") 398 399 # print '------------' 400 # print 'post replace and smooth' 401 # for row in bm: 402 # print ' ',' '.join(['% 4.2f'%x for x in row]) 403 # print '------------' 404 405 if targetNumber <= 0: 406 targetNumber = count 407 nFailed = 0 408 res = [] 409 for i in range(count): 410 tmpM = bm[:, :] 411 m2 = Chem.Mol(mol) 412 t1 = time.time() 413 try: 414 if randomSeed <= 0: 415 seed = i * 10 + 1 416 else: 417 seed = i * 10 + randomSeed 418 EmbedMol(m2, tmpM, atomMatch, randomSeed=seed, excludedVolumes=excludedVolumes) 419 except ValueError: 420 if not silent: 421 logger.info('Embed failed') 422 nFailed += 1 423 else: 424 t2 = time.time() 425 _times['embed'] = _times.get('embed', 0) + t2 - t1 426 keepIt = True 427 for idx, stereo in mol._chiralCenters: 428 if stereo in ('R', 'S'): 429 vol = ComputeChiralVolume(m2, idx) 430 if (stereo == 'R' and vol >= 0) or (stereo == 'S' and vol <= 0): 431 keepIt = False 432 break 433 if keepIt: 434 res.append(m2) 435 else: 436 logger.debug('Removed embedding due to chiral constraints.') 437 if len(res) == targetNumber: 438 break 439 return bm, res, nFailed
440 441
442 -def isNaN(v):
443 """ provides an OS independent way of detecting NaNs 444 This is intended to be used with values returned from the C++ 445 side of things. 446 447 We can't actually test this from Python (which traps 448 zero division errors), but it would work something like 449 this if we could: 450 >>> isNaN(0) 451 False 452 453 #>>> isNan(1/0) 454 #True 455 456 """ 457 if v != v and sys.platform == 'win32': 458 return True 459 elif v == 0 and v == 1 and sys.platform != 'win32': 460 return True 461 return False
462 463
464 -def OptimizeMol(mol, bm, atomMatches=None, excludedVolumes=None, forceConstant=1200.0, maxPasses=5, 465 verbose=False):
466 """ carries out a UFF optimization for a molecule optionally subject 467 to the constraints in a bounds matrix 468 469 - atomMatches, if provided, is a sequence of sequences 470 - forceConstant is the force constant of the spring used to enforce 471 the constraints 472 473 returns a 2-tuple: 474 1) the energy of the initial conformation 475 2) the energy post-embedding 476 NOTE that these energies include the energies of the constraints 477 478 479 480 >>> from rdkit import Geometry 481 >>> from rdkit.Chem.Pharm3D import Pharmacophore 482 >>> m = Chem.MolFromSmiles('OCCN') 483 >>> feats = [ 484 ... ChemicalFeatures.FreeChemicalFeature('HBondAcceptor', 'HAcceptor1', 485 ... Geometry.Point3D(0.0, 0.0, 0.0)), 486 ... ChemicalFeatures.FreeChemicalFeature('HBondDonor', 'HDonor1', 487 ... Geometry.Point3D(2.65, 0.0, 0.0)), 488 ... ] 489 >>> pcophore=Pharmacophore.Pharmacophore(feats) 490 >>> pcophore.setLowerBound(0,1, 2.5) 491 >>> pcophore.setUpperBound(0,1, 2.8) 492 >>> atomMatch = ((0,),(3,)) 493 >>> bm,embeds,nFail = EmbedPharmacophore(m,atomMatch,pcophore,randomSeed=23,silent=1) 494 >>> len(embeds) 495 10 496 >>> testM = embeds[0] 497 498 Do the optimization: 499 >>> e1,e2 = OptimizeMol(testM,bm,atomMatches=atomMatch) 500 501 Optimizing should have lowered the energy: 502 >>> e2 < e1 503 True 504 505 Check the constrained distance: 506 >>> conf = testM.GetConformer(0) 507 >>> p0 = conf.GetAtomPosition(0) 508 >>> p3 = conf.GetAtomPosition(3) 509 >>> d03 = p0.Distance(p3) 510 >>> d03 >= pcophore.getLowerBound(0,1)-.01 511 True 512 >>> d03 <= pcophore.getUpperBound(0,1)+.01 513 True 514 515 If we optimize without the distance constraints (provided via the atomMatches 516 argument) we're not guaranteed to get the same results, particularly in a case 517 like the current one where the pharmcophore brings the atoms uncomfortably 518 close together: 519 >>> testM = embeds[1] 520 >>> e1,e2 = OptimizeMol(testM,bm) 521 >>> e2 < e1 522 True 523 >>> conf = testM.GetConformer(0) 524 >>> p0 = conf.GetAtomPosition(0) 525 >>> p3 = conf.GetAtomPosition(3) 526 >>> d03 = p0.Distance(p3) 527 >>> d03 >= pcophore.getLowerBound(0,1)-.01 528 True 529 >>> d03 <= pcophore.getUpperBound(0,1)+.01 530 False 531 532 """ 533 try: 534 ff = ChemicalForceFields.UFFGetMoleculeForceField(mol) 535 except Exception: 536 logger.info('Problems building molecular forcefield', exc_info=True) 537 return -1.0, -1.0 538 539 weights = [] 540 if (atomMatches): 541 for k in range(len(atomMatches)): 542 for i in atomMatches[k]: 543 for l in range(k + 1, len(atomMatches)): 544 for j in atomMatches[l]: 545 weights.append((i, j)) 546 for i, j in weights: 547 if j < i: 548 i, j = j, i 549 minV = bm[j, i] 550 maxV = bm[i, j] 551 ff.AddDistanceConstraint(i, j, minV, maxV, forceConstant) 552 if excludedVolumes: 553 nAts = mol.GetNumAtoms() 554 conf = mol.GetConformer() 555 idx = nAts 556 for exVol in excludedVolumes: 557 assert exVol.pos is not None 558 logger.debug('ff.AddExtraPoint(%.4f,%.4f,%.4f)' % (exVol.pos[0], exVol.pos[1], exVol.pos[2])) 559 ff.AddExtraPoint(exVol.pos[0], exVol.pos[1], exVol.pos[2], True) 560 indices = [] 561 for localIndices, _, _ in exVol.featInfo: 562 indices += list(localIndices) 563 for i in range(nAts): 564 v = numpy.array(conf.GetAtomPosition(i)) - numpy.array(exVol.pos) 565 d = numpy.sqrt(numpy.dot(v, v)) 566 if i not in indices: 567 if d < 5.0: 568 logger.debug('ff.AddDistanceConstraint(%d,%d,%.3f,%d,%.0f)' % 569 (i, idx, exVol.exclusionDist, 1000, forceConstant)) 570 ff.AddDistanceConstraint(i, idx, exVol.exclusionDist, 1000, forceConstant) 571 572 else: 573 logger.debug('ff.AddDistanceConstraint(%d,%d,%.3f,%.3f,%.0f)' % 574 (i, idx, bm[exVol.index, i], bm[i, exVol.index], forceConstant)) 575 ff.AddDistanceConstraint(i, idx, bm[exVol.index, i], bm[i, exVol.index], forceConstant) 576 idx += 1 577 ff.Initialize() 578 e1 = ff.CalcEnergy() 579 if isNaN(e1): 580 raise ValueError('bogus energy') 581 582 if verbose: 583 print(Chem.MolToMolBlock(mol)) 584 for i, _ in enumerate(excludedVolumes): 585 pos = ff.GetExtraPointPos(i) 586 print(' % 7.4f % 7.4f % 7.4f As 0 0 0 0 0 0 0 0 0 0 0 0' % tuple(pos), 587 file=sys.stderr) 588 needsMore = ff.Minimize() 589 nPasses = 0 590 while needsMore and nPasses < maxPasses: 591 needsMore = ff.Minimize() 592 nPasses += 1 593 e2 = ff.CalcEnergy() 594 if isNaN(e2): 595 raise ValueError('bogus energy') 596 597 if verbose: 598 print('--------') 599 print(Chem.MolToMolBlock(mol)) 600 for i, _ in enumerate(excludedVolumes): 601 pos = ff.GetExtraPointPos(i) 602 print(' % 7.4f % 7.4f % 7.4f Sb 0 0 0 0 0 0 0 0 0 0 0 0' % tuple(pos), 603 file=sys.stderr) 604 ff = None 605 return e1, e2
606 607
608 -def EmbedOne(mol, name, match, pcophore, count=1, silent=0, **kwargs):
609 """ generates statistics for a molecule's embeddings 610 611 Four energies are computed for each embedding: 612 1) E1: the energy (with constraints) of the initial embedding 613 2) E2: the energy (with constraints) of the optimized embedding 614 3) E3: the energy (no constraints) the geometry for E2 615 4) E4: the energy (no constraints) of the optimized free-molecule 616 (starting from the E3 geometry) 617 618 Returns a 9-tuple: 619 1) the mean value of E1 620 2) the sample standard deviation of E1 621 3) the mean value of E2 622 4) the sample standard deviation of E2 623 5) the mean value of E3 624 6) the sample standard deviation of E3 625 7) the mean value of E4 626 8) the sample standard deviation of E4 627 9) The number of embeddings that failed 628 629 """ 630 global _times 631 atomMatch = [list(x.GetAtomIds()) for x in match] 632 bm, ms, nFailed = EmbedPharmacophore(mol, atomMatch, pcophore, count=count, silent=silent, 633 **kwargs) 634 e1s = [] 635 e2s = [] 636 e3s = [] 637 e4s = [] 638 d12s = [] 639 d23s = [] 640 d34s = [] 641 for m in ms: 642 t1 = time.time() 643 try: 644 e1, e2 = OptimizeMol(m, bm, atomMatch) 645 except ValueError: 646 pass 647 else: 648 t2 = time.time() 649 _times['opt1'] = _times.get('opt1', 0) + t2 - t1 650 651 e1s.append(e1) 652 e2s.append(e2) 653 654 d12s.append(e1 - e2) 655 t1 = time.time() 656 try: 657 e3, e4 = OptimizeMol(m, bm) 658 except ValueError: 659 pass 660 else: 661 t2 = time.time() 662 _times['opt2'] = _times.get('opt2', 0) + t2 - t1 663 e3s.append(e3) 664 e4s.append(e4) 665 d23s.append(e2 - e3) 666 d34s.append(e3 - e4) 667 count += 1 668 try: 669 e1, e1d = Stats.MeanAndDev(e1s) 670 except Exception: 671 e1 = -1.0 672 e1d = -1.0 673 try: 674 e2, e2d = Stats.MeanAndDev(e2s) 675 except Exception: 676 e2 = -1.0 677 e2d = -1.0 678 try: 679 e3, e3d = Stats.MeanAndDev(e3s) 680 except Exception: 681 e3 = -1.0 682 e3d = -1.0 683 684 try: 685 e4, e4d = Stats.MeanAndDev(e4s) 686 except Exception: 687 e4 = -1.0 688 e4d = -1.0 689 if not silent: 690 print('%s(%d): %.2f(%.2f) -> %.2f(%.2f) : %.2f(%.2f) -> %.2f(%.2f)' % 691 (name, nFailed, e1, e1d, e2, e2d, e3, e3d, e4, e4d)) 692 return e1, e1d, e2, e2d, e3, e3d, e4, e4d, nFailed
693 694
695 -def MatchPharmacophoreToMol(mol, featFactory, pcophore):
696 """ generates a list of all possible mappings of a pharmacophore to a molecule 697 698 Returns a 2-tuple: 699 1) a boolean indicating whether or not all features were found 700 2) a list, numFeatures long, of sequences of features 701 702 703 >>> import os.path 704 >>> from rdkit import Geometry, RDConfig 705 >>> from rdkit.Chem.Pharm3D import Pharmacophore 706 >>> fdefFile = os.path.join(RDConfig.RDCodeDir,'Chem/Pharm3D/test_data/BaseFeatures.fdef') 707 >>> featFactory = ChemicalFeatures.BuildFeatureFactory(fdefFile) 708 >>> activeFeats = [ 709 ... ChemicalFeatures.FreeChemicalFeature('Acceptor', Geometry.Point3D(0.0, 0.0, 0.0)), 710 ... ChemicalFeatures.FreeChemicalFeature('Donor',Geometry.Point3D(0.0, 0.0, 0.0))] 711 >>> pcophore= Pharmacophore.Pharmacophore(activeFeats) 712 >>> m = Chem.MolFromSmiles('FCCN') 713 >>> match,mList = MatchPharmacophoreToMol(m,featFactory,pcophore) 714 >>> match 715 True 716 717 Two feature types: 718 >>> len(mList) 719 2 720 721 The first feature type, Acceptor, has two matches: 722 >>> len(mList[0]) 723 2 724 >>> mList[0][0].GetAtomIds() 725 (0,) 726 >>> mList[0][1].GetAtomIds() 727 (3,) 728 729 The first feature type, Donor, has a single match: 730 >>> len(mList[1]) 731 1 732 >>> mList[1][0].GetAtomIds() 733 (3,) 734 735 """ 736 return MatchFeatsToMol(mol, featFactory, pcophore.getFeatures())
737 738
739 -def _getFeatDict(mol, featFactory, features):
740 """ **INTERNAL USE ONLY** 741 742 >>> import os.path 743 >>> from rdkit import Geometry, RDConfig, Chem 744 >>> fdefFile = os.path.join(RDConfig.RDCodeDir,'Chem/Pharm3D/test_data/BaseFeatures.fdef') 745 >>> featFactory = ChemicalFeatures.BuildFeatureFactory(fdefFile) 746 >>> activeFeats = [ 747 ... ChemicalFeatures.FreeChemicalFeature('Acceptor', Geometry.Point3D(0.0, 0.0, 0.0)), 748 ... ChemicalFeatures.FreeChemicalFeature('Donor',Geometry.Point3D(0.0, 0.0, 0.0))] 749 >>> m = Chem.MolFromSmiles('FCCN') 750 >>> d =_getFeatDict(m,featFactory,activeFeats) 751 >>> sorted(list(d.keys())) 752 ['Acceptor', 'Donor'] 753 >>> donors = d['Donor'] 754 >>> len(donors) 755 1 756 >>> donors[0].GetAtomIds() 757 (3,) 758 >>> acceptors = d['Acceptor'] 759 >>> len(acceptors) 760 2 761 >>> acceptors[0].GetAtomIds() 762 (0,) 763 >>> acceptors[1].GetAtomIds() 764 (3,) 765 766 """ 767 molFeats = {} 768 for feat in features: 769 family = feat.GetFamily() 770 if family not in molFeats: 771 matches = featFactory.GetFeaturesForMol(mol, includeOnly=family) 772 molFeats[family] = matches 773 return molFeats
774 775
776 -def MatchFeatsToMol(mol, featFactory, features):
777 """ generates a list of all possible mappings of each feature to a molecule 778 779 Returns a 2-tuple: 780 1) a boolean indicating whether or not all features were found 781 2) a list, numFeatures long, of sequences of features 782 783 784 >>> import os.path 785 >>> from rdkit import RDConfig, Geometry 786 >>> fdefFile = os.path.join(RDConfig.RDCodeDir,'Chem/Pharm3D/test_data/BaseFeatures.fdef') 787 >>> featFactory = ChemicalFeatures.BuildFeatureFactory(fdefFile) 788 >>> activeFeats = [ 789 ... ChemicalFeatures.FreeChemicalFeature('Acceptor', Geometry.Point3D(0.0, 0.0, 0.0)), 790 ... ChemicalFeatures.FreeChemicalFeature('Donor',Geometry.Point3D(0.0, 0.0, 0.0))] 791 >>> m = Chem.MolFromSmiles('FCCN') 792 >>> match,mList = MatchFeatsToMol(m,featFactory,activeFeats) 793 >>> match 794 True 795 796 Two feature types: 797 >>> len(mList) 798 2 799 800 The first feature type, Acceptor, has two matches: 801 >>> len(mList[0]) 802 2 803 >>> mList[0][0].GetAtomIds() 804 (0,) 805 >>> mList[0][1].GetAtomIds() 806 (3,) 807 808 The first feature type, Donor, has a single match: 809 >>> len(mList[1]) 810 1 811 >>> mList[1][0].GetAtomIds() 812 (3,) 813 814 """ 815 molFeats = _getFeatDict(mol, featFactory, features) 816 res = [] 817 for feat in features: 818 matches = molFeats.get(feat.GetFamily(), []) 819 if len(matches) == 0: 820 return False, None 821 res.append(matches) 822 return True, res
823 824
825 -def CombiEnum(sequence):
826 """ This generator takes a sequence of sequences as an argument and 827 provides all combinations of the elements of the subsequences: 828 829 >>> gen = CombiEnum(((1,2),(10,20))) 830 >>> next(gen) 831 [1, 10] 832 >>> next(gen) 833 [1, 20] 834 835 >>> [x for x in CombiEnum(((1,2),(10,20)))] 836 [[1, 10], [1, 20], [2, 10], [2, 20]] 837 838 >>> [x for x in CombiEnum(((1,2),(10,20),(100,200)))] 839 [[1, 10, 100], [1, 10, 200], [1, 20, 100], [1, 20, 200], [2, 10, 100], 840 [2, 10, 200], [2, 20, 100], [2, 20, 200]] 841 842 """ 843 if not len(sequence): 844 yield [] 845 elif len(sequence) == 1: 846 for entry in sequence[0]: 847 yield [entry] 848 else: 849 for entry in sequence[0]: 850 for subVal in CombiEnum(sequence[1:]): 851 yield [entry] + subVal
852 853
854 -def DownsampleBoundsMatrix(bm, indices, maxThresh=4.0):
855 """ removes rows from a bounds matrix that are 856 that are greater than a threshold value away from a set of 857 other points 858 859 returns the modfied bounds matrix 860 861 The goal of this function is to remove rows from the bounds matrix 862 that correspond to atoms that are likely to be quite far from 863 the pharmacophore we're interested in. Because the bounds smoothing 864 we eventually have to do is N^3, this can be a big win 865 866 >>> boundsMat = numpy.array([[0.0,3.0,4.0],[2.0,0.0,3.0],[2.0,2.0,0.0]]) 867 >>> bm = DownsampleBoundsMatrix(boundsMat,(0,),3.5) 868 >>> bm.shape == (2, 2) 869 True 870 871 we don't touch the input matrix: 872 >>> boundsMat.shape == (3, 3) 873 True 874 875 >>> print(', '.join(['%.3f'%x for x in bm[0]])) 876 0.000, 3.000 877 >>> print(', '.join(['%.3f'%x for x in bm[1]])) 878 2.000, 0.000 879 880 if the threshold is high enough, we don't do anything: 881 >>> boundsMat = numpy.array([[0.0,4.0,3.0],[2.0,0.0,3.0],[2.0,2.0,0.0]]) 882 >>> bm = DownsampleBoundsMatrix(boundsMat,(0,),5.0) 883 >>> bm.shape == (3, 3) 884 True 885 886 If there's a max value that's close enough to *any* of the indices 887 we pass in, we'll keep it: 888 >>> boundsMat = numpy.array([[0.0,4.0,3.0],[2.0,0.0,3.0],[2.0,2.0,0.0]]) 889 >>> bm = DownsampleBoundsMatrix(boundsMat,(0,1),3.5) 890 >>> bm.shape == (3, 3) 891 True 892 893 """ 894 nPts = bm.shape[0] 895 k = numpy.zeros(nPts, numpy.int0) 896 for idx in indices: 897 k[idx] = 1 898 for i in indices: 899 row = bm[i] 900 for j in range(i + 1, nPts): 901 if not k[j] and row[j] < maxThresh: 902 k[j] = 1 903 keep = numpy.nonzero(k)[0] 904 bm2 = numpy.zeros((len(keep), len(keep)), numpy.float) 905 for i, idx in enumerate(keep): 906 row = bm[idx] 907 bm2[i] = numpy.take(row, keep) 908 return bm2
909 910
911 -def CoarseScreenPharmacophore(atomMatch, bounds, pcophore, verbose=False):
912 """ 913 >>> from rdkit import Geometry 914 >>> from rdkit.Chem.Pharm3D import Pharmacophore 915 >>> feats = [ 916 ... ChemicalFeatures.FreeChemicalFeature('HBondAcceptor', 'HAcceptor1', 917 ... Geometry.Point3D(0.0, 0.0, 0.0)), 918 ... ChemicalFeatures.FreeChemicalFeature('HBondDonor', 'HDonor1', 919 ... Geometry.Point3D(2.65, 0.0, 0.0)), 920 ... ChemicalFeatures.FreeChemicalFeature('Aromatic', 'Aromatic1', 921 ... Geometry.Point3D(5.12, 0.908, 0.0)), 922 ... ] 923 >>> pcophore=Pharmacophore.Pharmacophore(feats) 924 >>> pcophore.setLowerBound(0,1, 1.1) 925 >>> pcophore.setUpperBound(0,1, 1.9) 926 >>> pcophore.setLowerBound(0,2, 2.1) 927 >>> pcophore.setUpperBound(0,2, 2.9) 928 >>> pcophore.setLowerBound(1,2, 2.1) 929 >>> pcophore.setUpperBound(1,2, 3.9) 930 931 >>> bounds = numpy.array([[0,2,3],[1,0,4],[2,3,0]],numpy.float) 932 >>> CoarseScreenPharmacophore(((0,),(1,)),bounds,pcophore) 933 True 934 935 >>> CoarseScreenPharmacophore(((0,),(2,)),bounds,pcophore) 936 False 937 938 >>> CoarseScreenPharmacophore(((1,),(2,)),bounds,pcophore) 939 False 940 941 >>> CoarseScreenPharmacophore(((0,),(1,),(2,)),bounds,pcophore) 942 True 943 944 >>> CoarseScreenPharmacophore(((1,),(0,),(2,)),bounds,pcophore) 945 False 946 947 >>> CoarseScreenPharmacophore(((2,),(1,),(0,)),bounds,pcophore) 948 False 949 950 # we ignore the point locations here and just use their definitions: 951 >>> feats = [ 952 ... ChemicalFeatures.FreeChemicalFeature('HBondAcceptor', 'HAcceptor1', 953 ... Geometry.Point3D(0.0, 0.0, 0.0)), 954 ... ChemicalFeatures.FreeChemicalFeature('HBondDonor', 'HDonor1', 955 ... Geometry.Point3D(2.65, 0.0, 0.0)), 956 ... ChemicalFeatures.FreeChemicalFeature('Aromatic', 'Aromatic1', 957 ... Geometry.Point3D(5.12, 0.908, 0.0)), 958 ... ChemicalFeatures.FreeChemicalFeature('HBondDonor', 'HDonor1', 959 ... Geometry.Point3D(2.65, 0.0, 0.0)), 960 ... ] 961 >>> pcophore=Pharmacophore.Pharmacophore(feats) 962 >>> pcophore.setLowerBound(0,1, 2.1) 963 >>> pcophore.setUpperBound(0,1, 2.9) 964 >>> pcophore.setLowerBound(0,2, 2.1) 965 >>> pcophore.setUpperBound(0,2, 2.9) 966 >>> pcophore.setLowerBound(0,3, 2.1) 967 >>> pcophore.setUpperBound(0,3, 2.9) 968 >>> pcophore.setLowerBound(1,2, 1.1) 969 >>> pcophore.setUpperBound(1,2, 1.9) 970 >>> pcophore.setLowerBound(1,3, 1.1) 971 >>> pcophore.setUpperBound(1,3, 1.9) 972 >>> pcophore.setLowerBound(2,3, 1.1) 973 >>> pcophore.setUpperBound(2,3, 1.9) 974 >>> bounds = numpy.array([[0,3,3,3],[2,0,2,2],[2,1,0,2],[2,1,1,0]],numpy.float) 975 976 >>> CoarseScreenPharmacophore(((0,),(1,),(2,),(3,)),bounds,pcophore) 977 True 978 979 >>> CoarseScreenPharmacophore(((0,),(1,),(3,),(2,)),bounds,pcophore) 980 True 981 982 >>> CoarseScreenPharmacophore(((1,),(0,),(3,),(2,)),bounds,pcophore) 983 False 984 985 """ 986 for k in range(len(atomMatch)): 987 if len(atomMatch[k]) == 1: 988 for l in range(k + 1, len(atomMatch)): 989 if len(atomMatch[l]) == 1: 990 idx0 = atomMatch[k][0] 991 idx1 = atomMatch[l][0] 992 if idx1 < idx0: 993 idx0, idx1 = idx1, idx0 994 if (bounds[idx1, idx0] >= pcophore.getUpperBound(k, l) or 995 bounds[idx0, idx1] <= pcophore.getLowerBound(k, l)): 996 if verbose: 997 print('\t (%d,%d) [%d,%d] fail' % (idx1, idx0, k, l)) 998 print('\t %f,%f - %f,%f' % (bounds[idx1, idx0], pcophore.getUpperBound(k, l), 999 bounds[idx0, idx1], pcophore.getLowerBound(k, l))) 1000 # logger.debug('\t >%s'%str(atomMatch)) 1001 # logger.debug() 1002 # logger.debug('\t %f,%f - %f,%f'%(bounds[idx1,idx0],pcophore.getUpperBound(k,l), 1003 # bounds[idx0,idx1],pcophore.getLowerBound(k,l))) 1004 return False 1005 return True
1006 1007
1008 -def Check2DBounds(atomMatch, mol, pcophore):
1009 """ checks to see if a particular mapping of features onto 1010 a molecule satisfies a pharmacophore's 2D restrictions 1011 1012 >>> from rdkit import Geometry 1013 >>> from rdkit.Chem.Pharm3D import Pharmacophore 1014 >>> activeFeats = [ 1015 ... ChemicalFeatures.FreeChemicalFeature('Acceptor', Geometry.Point3D(0.0, 0.0, 0.0)), 1016 ... ChemicalFeatures.FreeChemicalFeature('Donor',Geometry.Point3D(0.0, 0.0, 0.0))] 1017 >>> pcophore= Pharmacophore.Pharmacophore(activeFeats) 1018 >>> pcophore.setUpperBound2D(0,1,3) 1019 >>> m = Chem.MolFromSmiles('FCC(N)CN') 1020 >>> Check2DBounds(((0,),(3,)),m,pcophore) 1021 True 1022 >>> Check2DBounds(((0,),(5,)),m,pcophore) 1023 False 1024 1025 """ 1026 dm = Chem.GetDistanceMatrix(mol, False, False, False) 1027 nFeats = len(atomMatch) 1028 for i in range(nFeats): 1029 for j in range(i + 1, nFeats): 1030 lowerB = pcophore._boundsMat2D[j, i] # lowerB = pcophore.getLowerBound2D(i,j) 1031 upperB = pcophore._boundsMat2D[i, j] # upperB = pcophore.getUpperBound2D(i,j) 1032 dij = 10000 1033 for atomI in atomMatch[i]: 1034 for atomJ in atomMatch[j]: 1035 try: 1036 dij = min(dij, dm[atomI, atomJ]) 1037 except IndexError: 1038 print('bad indices:', atomI, atomJ) 1039 print(' shape:', dm.shape) 1040 print(' match:', atomMatch) 1041 print(' mol:') 1042 print(Chem.MolToMolBlock(mol)) 1043 raise IndexError 1044 if dij < lowerB or dij > upperB: 1045 return False 1046 return True
1047 1048
1049 -def _checkMatch(match, mol, bounds, pcophore, use2DLimits):
1050 """ **INTERNAL USE ONLY** 1051 1052 checks whether a particular atom match can be satisfied by 1053 a molecule 1054 1055 """ 1056 atomMatch = ChemicalFeatures.GetAtomMatch(match) 1057 if not atomMatch: 1058 return None 1059 elif use2DLimits: 1060 if not Check2DBounds(atomMatch, mol, pcophore): 1061 return None 1062 if not CoarseScreenPharmacophore(atomMatch, bounds, pcophore): 1063 return None 1064 return atomMatch
1065 1066
1067 -def ConstrainedEnum(matches, mol, pcophore, bounds, use2DLimits=False, index=0, soFar=[]):
1068 """ Enumerates the list of atom mappings a molecule 1069 has to a particular pharmacophore. 1070 We do check distance bounds here. 1071 1072 1073 """ 1074 nMatches = len(matches) 1075 if index >= nMatches: 1076 yield soFar, [] 1077 elif index == nMatches - 1: 1078 for entry in matches[index]: 1079 nextStep = soFar + [entry] 1080 if index != 0: 1081 atomMatch = _checkMatch(nextStep, mol, bounds, pcophore, use2DLimits) 1082 else: 1083 atomMatch = ChemicalFeatures.GetAtomMatch(nextStep) 1084 if atomMatch: 1085 yield soFar + [entry], atomMatch 1086 else: 1087 for entry in matches[index]: 1088 nextStep = soFar + [entry] 1089 if index != 0: 1090 atomMatch = _checkMatch(nextStep, mol, bounds, pcophore, use2DLimits) 1091 if not atomMatch: 1092 continue 1093 for val in ConstrainedEnum(matches, mol, pcophore, bounds, use2DLimits=use2DLimits, 1094 index=index + 1, soFar=nextStep): 1095 if val: 1096 yield val
1097 1098
1099 -def MatchPharmacophore(matches, bounds, pcophore, useDownsampling=False, use2DLimits=False, 1100 mol=None, excludedVolumes=None, useDirs=False):
1101 """ 1102 1103 if use2DLimits is set, the molecule must also be provided and topological 1104 distances will also be used to filter out matches 1105 1106 """ 1107 for match, atomMatch in ConstrainedEnum(matches, mol, pcophore, bounds, use2DLimits=use2DLimits): 1108 bm = bounds.copy() 1109 bm = UpdatePharmacophoreBounds(bm, atomMatch, pcophore, useDirs=useDirs, mol=mol) 1110 1111 if excludedVolumes: 1112 localEvs = [] 1113 for eV in excludedVolumes: 1114 featInfo = [] 1115 for i, entry in enumerate(atomMatch): 1116 info = list(eV.featInfo[i]) 1117 info[0] = entry 1118 featInfo.append(info) 1119 localEvs.append(ExcludedVolume.ExcludedVolume(featInfo, eV.index, eV.exclusionDist)) 1120 bm = AddExcludedVolumes(bm, localEvs, smoothIt=False) 1121 1122 sz = bm.shape[0] 1123 if useDownsampling: 1124 indices = [] 1125 for entry in atomMatch: 1126 indices.extend(entry) 1127 if excludedVolumes: 1128 for vol in localEvs: 1129 indices.append(vol.index) 1130 bm = DownsampleBoundsMatrix(bm, indices) 1131 if DG.DoTriangleSmoothing(bm): 1132 return 0, bm, match, (sz, bm.shape[0]) 1133 1134 return 1, None, None, None
1135 1136
1137 -def GetAllPharmacophoreMatches(matches, bounds, pcophore, useDownsampling=0, progressCallback=None, 1138 use2DLimits=False, mol=None, verbose=False):
1139 res = [] 1140 nDone = 0 1141 for match in CombiEnum(matches): 1142 atomMatch = ChemicalFeatures.GetAtomMatch(match) 1143 if atomMatch and use2DLimits and mol: 1144 pass2D = Check2DBounds(atomMatch, mol, pcophore) 1145 if verbose: 1146 print('..', atomMatch) 1147 print(' ..Pass2d:', pass2D) 1148 else: 1149 pass2D = True 1150 if atomMatch and pass2D and \ 1151 CoarseScreenPharmacophore(atomMatch, bounds, pcophore, verbose=verbose): 1152 if verbose: 1153 print(' ..CoarseScreen: Pass') 1154 1155 bm = bounds.copy() 1156 if verbose: 1157 print('pre update:') 1158 for row in bm: 1159 print(' ', ' '.join(['% 4.2f' % x for x in row])) 1160 bm = UpdatePharmacophoreBounds(bm, atomMatch, pcophore) 1161 if verbose: 1162 print('pre downsample:') 1163 for row in bm: 1164 print(' ', ' '.join(['% 4.2f' % x for x in row])) 1165 1166 if useDownsampling: 1167 indices = [] 1168 for entry in atomMatch: 1169 indices += list(entry) 1170 bm = DownsampleBoundsMatrix(bm, indices) 1171 if verbose: 1172 print('post downsample:') 1173 for row in bm: 1174 print(' ', ' '.join(['% 4.2f' % x for x in row])) 1175 1176 if DG.DoTriangleSmoothing(bm): 1177 res.append(match) 1178 elif verbose: 1179 print('cannot smooth') 1180 nDone += 1 1181 if progressCallback: 1182 progressCallback(nDone) 1183 return res
1184 1185
1186 -def ComputeChiralVolume(mol, centerIdx, confId=-1):
1187 """ Computes the chiral volume of an atom 1188 1189 We're using the chiral volume formula from Figure 7 of 1190 Blaney and Dixon, Rev. Comp. Chem. V, 299-335 (1994) 1191 1192 >>> import os.path 1193 >>> from rdkit import RDConfig 1194 >>> dataDir = os.path.join(RDConfig.RDCodeDir,'Chem/Pharm3D/test_data') 1195 1196 R configuration atoms give negative volumes: 1197 >>> mol = Chem.MolFromMolFile(os.path.join(dataDir,'mol-r.mol')) 1198 >>> Chem.AssignStereochemistry(mol) 1199 >>> mol.GetAtomWithIdx(1).GetProp('_CIPCode') 1200 'R' 1201 >>> ComputeChiralVolume(mol,1) < 0 1202 True 1203 1204 S configuration atoms give positive volumes: 1205 >>> mol = Chem.MolFromMolFile(os.path.join(dataDir,'mol-s.mol')) 1206 >>> Chem.AssignStereochemistry(mol) 1207 >>> mol.GetAtomWithIdx(1).GetProp('_CIPCode') 1208 'S' 1209 >>> ComputeChiralVolume(mol,1) > 0 1210 True 1211 1212 Non-chiral (or non-specified) atoms give zero volume: 1213 >>> ComputeChiralVolume(mol,0) == 0.0 1214 True 1215 1216 We also work on 3-coordinate atoms (with implicit Hs): 1217 >>> mol = Chem.MolFromMolFile(os.path.join(dataDir,'mol-r-3.mol')) 1218 >>> Chem.AssignStereochemistry(mol) 1219 >>> mol.GetAtomWithIdx(1).GetProp('_CIPCode') 1220 'R' 1221 >>> ComputeChiralVolume(mol,1)<0 1222 True 1223 1224 >>> mol = Chem.MolFromMolFile(os.path.join(dataDir,'mol-s-3.mol')) 1225 >>> Chem.AssignStereochemistry(mol) 1226 >>> mol.GetAtomWithIdx(1).GetProp('_CIPCode') 1227 'S' 1228 >>> ComputeChiralVolume(mol,1)>0 1229 True 1230 1231 1232 1233 """ 1234 conf = mol.GetConformer(confId) 1235 Chem.AssignStereochemistry(mol) 1236 center = mol.GetAtomWithIdx(centerIdx) 1237 if not center.HasProp('_CIPCode'): 1238 return 0.0 1239 1240 nbrs = center.GetNeighbors() 1241 nbrRanks = [] 1242 for nbr in nbrs: 1243 rank = int(nbr.GetProp('_CIPRank')) 1244 pos = conf.GetAtomPosition(nbr.GetIdx()) 1245 nbrRanks.append((rank, pos)) 1246 1247 # if we only have three neighbors (i.e. the determining H isn't present) 1248 # then use the central atom as the fourth point: 1249 if len(nbrRanks) == 3: 1250 nbrRanks.append((-1, conf.GetAtomPosition(centerIdx))) 1251 nbrRanks.sort() 1252 1253 ps = [x[1] for x in nbrRanks] 1254 v1 = ps[0] - ps[3] 1255 v2 = ps[1] - ps[3] 1256 v3 = ps[2] - ps[3] 1257 1258 res = v1.DotProduct(v2.CrossProduct(v3)) 1259 return res
1260 1261 1262 # ------------------------------------ 1263 # 1264 # doctest boilerplate 1265 #
1266 -def _runDoctests(verbose=None): # pragma: nocover
1267 import doctest 1268 failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS + doctest.NORMALIZE_WHITESPACE, 1269 verbose=verbose) 1270 sys.exit(failed) 1271 1272 1273 if __name__ == '__main__': # pragma: nocover 1274 _runDoctests() 1275