Package rdkit :: Package Chem :: Module Crippen
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.Crippen

  1  # $Id$ 
  2  # 
  3  # Copyright (C) 2002-2008 greg Landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved @@ 
  6  #  This file is part of the RDKit. 
  7  #  The contents are covered by the terms of the BSD license 
  8  #  which is included in the file license.txt, found at the root 
  9  #  of the RDKit source tree. 
 10  # 
 11  """ Atom-based calculation of LogP and MR using Crippen's approach 
 12   
 13   
 14      Reference: 
 15        S. A. Wildman and G. M. Crippen *JCICS* _39_ 868-873 (1999) 
 16   
 17   
 18  """ 
 19  from __future__ import print_function 
 20  import os 
 21  from rdkit import RDConfig 
 22  from rdkit import Chem 
 23  from rdkit.Chem import rdMolDescriptors 
 24  import numpy 
 25   
 26  _smartsPatterns = {} 
 27  _patternOrder = [] 
 28  # this is the file containing the atom contributions 
 29  defaultPatternFileName = os.path.join(RDConfig.RDDataDir, 'Crippen.txt') 
 30   
 31   
32 -def _ReadPatts(fileName):
33 """ *Internal Use Only* 34 35 parses the pattern list from the data file 36 37 """ 38 patts = {} 39 order = [] 40 with open(fileName, 'r') as f: 41 lines = f.readlines() 42 for line in lines: 43 if line[0] != '#': 44 splitLine = line.split('\t') 45 if len(splitLine) >= 4 and splitLine[0] != '': 46 sma = splitLine[1] 47 if sma != 'SMARTS': 48 sma.replace('"', '') 49 p = Chem.MolFromSmarts(sma) 50 if p: 51 if len(splitLine[0]) > 1 and splitLine[0][1] not in 'S0123456789': 52 cha = splitLine[0][:2] 53 else: 54 cha = splitLine[0][0] 55 logP = float(splitLine[2]) 56 if splitLine[3] != '': 57 mr = float(splitLine[3]) 58 else: 59 mr = 0.0 60 if cha not in order: 61 order.append(cha) 62 l = patts.get(cha, []) 63 l.append((sma, p, logP, mr)) 64 patts[cha] = l 65 else: 66 print('Problems parsing smarts: %s' % (sma)) 67 return order, patts
68 69 70 _GetAtomContribs = rdMolDescriptors._CalcCrippenContribs 71 72
73 -def _pyGetAtomContribs(mol, patts=None, order=None, verbose=0, force=0):
74 """ *Internal Use Only* 75 76 calculates atomic contributions to the LogP and MR values 77 78 if the argument *force* is not set, we'll use the molecules stored 79 _crippenContribs value when possible instead of re-calculating. 80 81 **Note:** Changes here affect the version numbers of MolLogP and MolMR 82 as well as the VSA descriptors in Chem.MolSurf 83 84 """ 85 if not force and hasattr(mol, '_crippenContribs'): 86 return mol._crippenContribs 87 88 if patts is None: 89 patts = _smartsPatterns 90 order = _patternOrder 91 92 nAtoms = mol.GetNumAtoms() 93 atomContribs = [(0., 0.)] * nAtoms 94 doneAtoms = [0] * nAtoms 95 nAtomsFound = 0 96 done = False 97 for cha in order: 98 pattVect = patts[cha] 99 for sma, patt, logp, mr in pattVect: 100 #print('try:',entry[0]) 101 for match in mol.GetSubstructMatches(patt, False, False): 102 firstIdx = match[0] 103 if not doneAtoms[firstIdx]: 104 doneAtoms[firstIdx] = 1 105 atomContribs[firstIdx] = (logp, mr) 106 if verbose: 107 print('\tAtom %d: %s %4.4f %4.4f' % (match[0], sma, logp, mr)) 108 nAtomsFound += 1 109 if nAtomsFound >= nAtoms: 110 done = True 111 break 112 if done: 113 break 114 mol._crippenContribs = atomContribs 115 return atomContribs
116 117
118 -def _Init():
119 global _smartsPatterns, _patternOrder 120 if _smartsPatterns == {}: 121 _patternOrder, _smartsPatterns = _ReadPatts(defaultPatternFileName)
122 123
124 -def _pyMolLogP(inMol, patts=None, order=None, verbose=0, addHs=1):
125 """ DEPRECATED 126 """ 127 if addHs < 0: 128 mol = Chem.AddHs(inMol, 1) 129 elif addHs > 0: 130 mol = Chem.AddHs(inMol, 0) 131 else: 132 mol = inMol 133 134 if patts is None: 135 global _smartsPatterns, _patternOrder 136 if _smartsPatterns == {}: 137 _patternOrder, _smartsPatterns = _ReadPatts(defaultPatternFileName) 138 patts = _smartsPatterns 139 order = _patternOrder 140 atomContribs = _pyGetAtomContribs(mol, patts, order, verbose=verbose) 141 return numpy.sum(atomContribs, 0)[0]
142 143 144 _pyMolLogP.version = "1.1.0" 145 146
147 -def _pyMolMR(inMol, patts=None, order=None, verbose=0, addHs=1):
148 """ DEPRECATED 149 """ 150 if addHs < 0: 151 mol = Chem.AddHs(inMol, 1) 152 elif addHs > 0: 153 mol = Chem.AddHs(inMol, 0) 154 else: 155 mol = inMol 156 157 if patts is None: 158 global _smartsPatterns, _patternOrder 159 if _smartsPatterns == {}: 160 _patternOrder, _smartsPatterns = _ReadPatts(defaultPatternFileName) 161 patts = _smartsPatterns 162 order = _patternOrder 163 164 atomContribs = _pyGetAtomContribs(mol, patts, order, verbose=verbose) 165 return numpy.sum(atomContribs, 0)[1]
166 167 168 _pyMolMR.version = "1.1.0" 169 170 MolLogP = lambda *x, **y: rdMolDescriptors.CalcCrippenDescriptors(*x, **y)[0] 171 MolLogP.version = rdMolDescriptors._CalcCrippenDescriptors_version 172 MolLogP.__doc__ = """ Wildman-Crippen LogP value 173 174 Uses an atom-based scheme based on the values in the paper: 175 S. A. Wildman and G. M. Crippen JCICS 39 868-873 (1999) 176 177 **Arguments** 178 179 - inMol: a molecule 180 181 - addHs: (optional) toggles adding of Hs to the molecule for the calculation. 182 If true, hydrogens will be added to the molecule and used in the calculation. 183 184 """ 185 186 MolMR = lambda *x, **y: rdMolDescriptors.CalcCrippenDescriptors(*x, **y)[1] 187 MolMR.version = rdMolDescriptors._CalcCrippenDescriptors_version 188 MolMR.__doc__ = """ Wildman-Crippen MR value 189 190 Uses an atom-based scheme based on the values in the paper: 191 S. A. Wildman and G. M. Crippen JCICS 39 868-873 (1999) 192 193 **Arguments** 194 195 - inMol: a molecule 196 197 - addHs: (optional) toggles adding of Hs to the molecule for the calculation. 198 If true, hydrogens will be added to the molecule and used in the calculation. 199 200 """ 201 202 if __name__ == '__main__': 203 import sys 204 205 if len(sys.argv): 206 ms = [] 207 verbose = 0 208 if '-v' in sys.argv: 209 verbose = 1 210 sys.argv.remove('-v') 211 for smi in sys.argv[1:]: 212 ms.append((smi, Chem.MolFromSmiles(smi))) 213 214 for smi, m in ms: 215 print('Mol: %s' % (smi)) 216 logp = MolLogP(m, verbose=verbose) 217 print('----') 218 mr = MolMR(m, verbose=verbose) 219 print('Res:', logp, mr) 220 newM = Chem.AddHs(m) 221 logp = MolLogP(newM, addHs=0) 222 mr = MolMR(newM, addHs=0) 223 print('\t', logp, mr) 224 print('-*-*-*-*-*-*-*-*') 225