Package rdkit :: Package Chem :: Package Pharm2D :: Module Matcher
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.Pharm2D.Matcher

  1  # 
  2  # Copyright (C) 2003-2008 greg Landrum and Rational Discovery LLC 
  3  # 
  4  #   @@ All Rights Reserved @@ 
  5  #  This file is part of the RDKit. 
  6  #  The contents are covered by the terms of the BSD license 
  7  #  which is included in the file license.txt, found at the root 
  8  #  of the RDKit source tree. 
  9  # 
 10  """ functionality for finding pharmacophore matches in molecules 
 11   
 12   
 13    See Docs/Chem/Pharm2D.triangles.jpg for an illustration of the way 
 14    pharmacophores are broken into triangles and labelled. 
 15   
 16    See Docs/Chem/Pharm2D.signatures.jpg for an illustration of bit 
 17    numbering 
 18   
 19  """ 
 20  from rdkit import Chem 
 21  from rdkit.Chem.Pharm2D import Utils 
 22   
 23   
24 -class MatchError(Exception):
25 pass
26 27 28 _verbose = 0 29 30
31 -def GetAtomsMatchingBit(sigFactory, bitIdx, mol, dMat=None, justOne=0, matchingAtoms=None):
32 """ Returns a list of lists of atom indices for a bit 33 34 **Arguments** 35 36 - sigFactory: a SigFactory 37 38 - bitIdx: the bit to be queried 39 40 - mol: the molecule to be examined 41 42 - dMat: (optional) the distance matrix of the molecule 43 44 - justOne: (optional) if this is nonzero, only the first match 45 will be returned. 46 47 - matchingAtoms: (optional) if this is nonzero, it should 48 contain a sequence of sequences with the indices of atoms in 49 the molecule which match each of the patterns used by the 50 signature. 51 52 **Returns** 53 54 a list of tuples with the matching atoms 55 """ 56 assert sigFactory.shortestPathsOnly, 'not implemented for non-shortest path signatures' 57 nPts, featCombo, scaffold = sigFactory.GetBitInfo(bitIdx) 58 if _verbose: 59 print('info:', nPts) 60 print('\t', featCombo) 61 print('\t', scaffold) 62 63 if matchingAtoms is None: 64 matchingAtoms = sigFactory.GetMolFeats(mol) 65 66 # find the atoms that match each features 67 # fams = sigFactory.GetFeatFamilies() 68 choices = [] 69 for featIdx in featCombo: 70 tmp = matchingAtoms[featIdx] 71 if tmp: 72 choices.append(tmp) 73 else: 74 # one of the patterns didn't find a match, we 75 # can return now 76 if _verbose: 77 print('no match found for feature:', featIdx) 78 return [] 79 80 if _verbose: 81 print('choices:') 82 print(choices) 83 84 if dMat is None: 85 dMat = Chem.GetDistanceMatrix(mol, sigFactory.includeBondOrder) 86 87 distsToCheck = Utils.nPointDistDict[nPts] 88 89 protoPharmacophores = Utils.GetAllCombinations(choices, noDups=1) 90 91 res = [] 92 for protoPharm in protoPharmacophores: 93 if _verbose: 94 print('protoPharm:', protoPharm) 95 for i in range(len(distsToCheck)): 96 dLow, dHigh = sigFactory.GetBins()[scaffold[i]] 97 a1, a2 = distsToCheck[i] 98 # 99 # FIX: this is making all kinds of assumptions about 100 # things being single-atom matches (or at least that 101 # only the first atom matters 102 # 103 idx1, idx2 = protoPharm[a1][0], protoPharm[a2][0] 104 dist = dMat[idx1, idx2] 105 if _verbose: 106 print('\t dist: %d->%d = %d (%d,%d)' % (idx1, idx2, dist, dLow, dHigh)) 107 if dist < dLow or dist >= dHigh: 108 break 109 else: 110 if _verbose: 111 print('Found one') 112 # we found it 113 protoPharm.sort() 114 protoPharm = tuple(protoPharm) 115 if protoPharm not in res: 116 res.append(protoPharm) 117 if justOne: 118 break 119 return res
120 121
122 -def _exampleCode():
123 import os 124 from rdkit import RDConfig 125 from rdkit.Chem import ChemicalFeatures 126 from rdkit.Chem.Pharm2D import SigFactory, Generate 127 128 fdefFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'Pharm2D', 'test_data', 'BaseFeatures.fdef') 129 featFactory = ChemicalFeatures.BuildFeatureFactory(fdefFile) 130 factory = SigFactory.SigFactory(featFactory) 131 factory.SetBins([(1, 2), (2, 5), (5, 8)]) 132 factory.Init() 133 134 mol = Chem.MolFromSmiles('OCC(=O)CCCN') 135 sig = Generate.Gen2DFingerprint(mol, factory) 136 print('onbits:', list(sig.GetOnBits())) 137 138 _verbose = 0 139 for bit in sig.GetOnBits(): 140 atoms = GetAtomsMatchingBit(factory, bit, mol) 141 print('\tBit %d: ' % (bit), atoms) 142 143 print('finished')
144 145 146 if __name__ == '__main__': # pragma: nocover 147 _exampleCode() 148