Package rdkit :: Package Chem :: Package AtomPairs :: Module Pairs
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.AtomPairs.Pairs

  1  # 
  2  #  Copyright (C) 2004-2017 Greg Landrum and Rational Discovery LLC 
  3  # 
  4  #   @@ All Rights Reserved @@ 
  5  #  This file is part of the RDKit. 
  6  #  The contents are covered by the terms of the BSD license 
  7  #  which is included in the file license.txt, found at the root 
  8  #  of the RDKit source tree. 
  9  # 
 10  """ Contains an implementation of Atom-pair fingerprints, as 
 11  described in: 
 12   
 13  R.E. Carhart, D.H. Smith, R. Venkataraghavan; 
 14  "Atom Pairs as Molecular Features in Structure-Activity Studies: 
 15  Definition and Applications" JCICS 25, 64-73 (1985). 
 16   
 17  The fingerprints can be accessed through the following functions: 
 18  - GetAtomPairFingerprint 
 19  - GetHashedAtomPairFingerprint (identical to GetAtomPairFingerprint) 
 20  - GetAtomPairFingerprintAsIntVect 
 21  - GetAtomPairFingerprintAsBitVect 
 22   
 23  """ 
 24  from rdkit import DataStructs 
 25  from rdkit.Chem import rdMolDescriptors 
 26  from rdkit.Chem.AtomPairs import Utils 
 27   
 28  from rdkit.Chem.rdMolDescriptors import GetAtomPairFingerprint, GetHashedAtomPairFingerprint 
 29  GetAtomPairFingerprintAsIntVect = rdMolDescriptors.GetAtomPairFingerprint 
 30   
 31  numPathBits = rdMolDescriptors.AtomPairsParameters.numPathBits 
 32  _maxPathLen = (1 << numPathBits) - 1 
 33  numFpBits = numPathBits + 2 * rdMolDescriptors.AtomPairsParameters.codeSize 
 34  fpLen = 1 << numFpBits 
 35   
 36   
37 -def pyScorePair(at1, at2, dist, atomCodes=None, includeChirality=False):
38 """ Returns a score for an individual atom pair. 39 40 >>> from rdkit import Chem 41 >>> m = Chem.MolFromSmiles('CCCCC') 42 >>> c1 = Utils.GetAtomCode(m.GetAtomWithIdx(0)) 43 >>> c2 = Utils.GetAtomCode(m.GetAtomWithIdx(1)) 44 >>> c3 = Utils.GetAtomCode(m.GetAtomWithIdx(2)) 45 >>> t = 1 | min(c1,c2)<<numPathBits | max(c1,c2)<<(rdMolDescriptors.AtomPairsParameters.codeSize+numPathBits) 46 >>> pyScorePair(m.GetAtomWithIdx(0),m.GetAtomWithIdx(1),1)==t 47 1 48 >>> pyScorePair(m.GetAtomWithIdx(1),m.GetAtomWithIdx(0),1)==t 49 1 50 >>> t = 2 | min(c1,c3)<<numPathBits | max(c1,c3)<<(rdMolDescriptors.AtomPairsParameters.codeSize+numPathBits) 51 >>> pyScorePair(m.GetAtomWithIdx(0),m.GetAtomWithIdx(2),2)==t 52 1 53 >>> pyScorePair(m.GetAtomWithIdx(0),m.GetAtomWithIdx(2),2, 54 ... atomCodes=(Utils.GetAtomCode(m.GetAtomWithIdx(0)),Utils.GetAtomCode(m.GetAtomWithIdx(2))))==t 55 1 56 57 """ 58 if not atomCodes: 59 code1 = Utils.GetAtomCode(at1,includeChirality = includeChirality) 60 code2 = Utils.GetAtomCode(at2,includeChirality = includeChirality) 61 else: 62 code1, code2 = atomCodes 63 64 codeSize = rdMolDescriptors.AtomPairsParameters.codeSize 65 if includeChirality: 66 codeSize += rdMolDescriptors.AtomPairsParameters.numChiralBits 67 68 accum = int(dist) % _maxPathLen 69 accum |= min(code1, code2) << numPathBits 70 accum |= max(code1, code2) << (codeSize + numPathBits) 71 return accum
72 73
74 -def ExplainPairScore(score,includeChirality=False):
75 """ 76 >>> from rdkit import Chem 77 >>> m = Chem.MolFromSmiles('C=CC') 78 >>> score = pyScorePair(m.GetAtomWithIdx(0),m.GetAtomWithIdx(1),1) 79 >>> ExplainPairScore(score) 80 (('C', 1, 1), 1, ('C', 2, 1)) 81 >>> score = pyScorePair(m.GetAtomWithIdx(0),m.GetAtomWithIdx(2),2) 82 >>> ExplainPairScore(score) 83 (('C', 1, 0), 2, ('C', 1, 1)) 84 >>> score = pyScorePair(m.GetAtomWithIdx(1),m.GetAtomWithIdx(2),1) 85 >>> ExplainPairScore(score) 86 (('C', 1, 0), 1, ('C', 2, 1)) 87 >>> score = pyScorePair(m.GetAtomWithIdx(2),m.GetAtomWithIdx(1),1) 88 >>> ExplainPairScore(score) 89 (('C', 1, 0), 1, ('C', 2, 1)) 90 91 We can optionally deal with chirality too 92 >>> m = Chem.MolFromSmiles('C[C@H](F)Cl') 93 >>> score = pyScorePair(m.GetAtomWithIdx(0),m.GetAtomWithIdx(1),1) 94 >>> ExplainPairScore(score) 95 (('C', 1, 0), 1, ('C', 3, 0)) 96 >>> score = pyScorePair(m.GetAtomWithIdx(0),m.GetAtomWithIdx(1),1,includeChirality=True) 97 >>> ExplainPairScore(score,includeChirality=True) 98 (('C', 1, 0, ''), 1, ('C', 3, 0, 'R')) 99 >>> m = Chem.MolFromSmiles('F[C@@H](Cl)[C@H](F)Cl') 100 >>> score = pyScorePair(m.GetAtomWithIdx(1),m.GetAtomWithIdx(3),1,includeChirality=True) 101 >>> ExplainPairScore(score,includeChirality=True) 102 (('C', 3, 0, 'R'), 1, ('C', 3, 0, 'S')) 103 104 """ 105 codeSize = rdMolDescriptors.AtomPairsParameters.codeSize 106 if includeChirality: 107 codeSize += rdMolDescriptors.AtomPairsParameters.numChiralBits 108 109 codeMask = (1 << codeSize) - 1 110 111 pathMask = (1 << numPathBits) - 1 112 dist = score & pathMask 113 114 score = score >> numPathBits 115 code1 = score & codeMask 116 score = score >> codeSize 117 code2 = score & codeMask 118 119 res = (Utils.ExplainAtomCode(code1,includeChirality=includeChirality), 120 dist, 121 Utils.ExplainAtomCode(code2,includeChirality=includeChirality)) 122 return res
123 124
125 -def GetAtomPairFingerprintAsBitVect(mol):
126 """ Returns the Atom-pair fingerprint for a molecule as 127 a SparseBitVect. Note that this doesn't match the standard 128 definition of atom pairs, which uses counts of the 129 pairs, not just their presence. 130 131 **Arguments**: 132 133 - mol: a molecule 134 135 **Returns**: a SparseBitVect 136 137 >>> from rdkit import Chem 138 >>> m = Chem.MolFromSmiles('CCC') 139 >>> v = [ pyScorePair(m.GetAtomWithIdx(0),m.GetAtomWithIdx(1),1), 140 ... pyScorePair(m.GetAtomWithIdx(0),m.GetAtomWithIdx(2),2), 141 ... ] 142 >>> v.sort() 143 >>> fp = GetAtomPairFingerprintAsBitVect(m) 144 >>> list(fp.GetOnBits())==v 145 True 146 147 """ 148 res = DataStructs.SparseBitVect(fpLen) 149 fp = rdMolDescriptors.GetAtomPairFingerprint(mol) 150 for val in fp.GetNonzeroElements(): 151 res.SetBit(val) 152 return res
153 154 155 # ------------------------------------ 156 # 157 # doctest boilerplate 158 #
159 -def _runDoctests(verbose=None): # pragma: nocover
160 import sys 161 import doctest 162 failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) 163 sys.exit(failed) 164 165 166 if __name__ == '__main__': # pragma: nocover 167 _runDoctests() 168