Package rdkit :: Package Chem :: Package AtomPairs :: Module Sheridan
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.AtomPairs.Sheridan

  1  # 
  2  #  Copyright (C) 2015 Greg Landrum 
  3  # 
  4  #   @@ All Rights Reserved @@ 
  5  #  This file is part of the RDKit. 
  6  #  The contents are covered by the terms of the BSD license 
  7  #  which is included in the file license.txt, found at the root 
  8  #  of the RDKit source tree. 
  9  # 
 10  """ Contains an implementation of Physicochemical property fingerprints, as 
 11  described in: 
 12  Kearsley, S. K. et al. 
 13  "Chemical Similarity Using Physiochemical Property Descriptors." 
 14  J. Chem.Inf. Model. 36, 118-127 (1996) 
 15   
 16  The fingerprints can be accessed through the following functions: 
 17  - GetBPFingerprint 
 18  - GetBTFingerprint 
 19   
 20  """ 
 21  import os.path 
 22  import re 
 23   
 24  from rdkit import Chem 
 25  from rdkit import RDConfig 
 26  from rdkit.Chem import rdMolDescriptors 
 27  from rdkit.Chem.rdMolDescriptors import GetAtomPairFingerprint, GetTopologicalTorsionFingerprint 
 28   
 29  numPathBits = rdMolDescriptors.AtomPairsParameters.numPathBits 
 30  _maxPathLen = (1 << numPathBits) - 1 
 31  numFpBits = numPathBits + 2 * rdMolDescriptors.AtomPairsParameters.codeSize 
 32  fpLen = 1 << numFpBits 
 33   
 34   
35 -def _readPattyDefs(fname=os.path.join(RDConfig.RDDataDir, 'SmartsLib', 'patty_rules.txt')):
36 with open(fname, 'r') as inf: 37 lines = [x.strip().split('# ')[0].strip() for x in inf] 38 splitl = [re.split('[ ]+', x) for x in lines if x != ''] 39 matchers = [] 40 for tpl in splitl: 41 if len(tpl) > 1: 42 mol = Chem.MolFromSmarts(tpl[0]) 43 if mol is None: 44 continue 45 nm = tpl[1] 46 matchers.append((mol, nm)) 47 return matchers
48 49 50 _pattyDefs = None 51 52
53 -def AssignPattyTypes(mol, defns=None):
54 """ 55 56 >>> from rdkit import Chem 57 >>> AssignPattyTypes(Chem.MolFromSmiles('OCC(=O)O')) 58 ['POL', 'HYD', 'OTH', 'ANI', 'ANI'] 59 60 """ 61 global _pattyDefs 62 if defns is None: 63 if _pattyDefs is None: 64 _pattyDefs = _readPattyDefs() 65 defns = _pattyDefs 66 res = [''] * mol.GetNumAtoms() 67 for matcher, nm in defns: 68 matches = mol.GetSubstructMatches(matcher, uniquify=False) 69 for match in matches: 70 res[match[0]] = nm 71 return res
72 73 74 typMap = dict(CAT=1, ANI=2, POL=3, DON=4, ACC=5, HYD=6, OTH=7) 75 76
77 -def GetBPFingerprint(mol, fpfn=GetAtomPairFingerprint):
78 """ 79 >>> from rdkit import Chem 80 >>> fp = GetBPFingerprint(Chem.MolFromSmiles('OCC(=O)O')) 81 >>> fp.GetTotalVal() 82 10 83 >>> nze=fp.GetNonzeroElements() 84 >>> sorted([(k,v) for k,v in nze.items()]) 85 [(32834, 1), (49219, 2), (98370, 2), (98401, 1), (114753, 2), (114786, 1), (114881, 1)] 86 87 """ 88 typs = [typMap[x] for x in AssignPattyTypes(mol)] 89 fp = fpfn(mol, atomInvariants=typs) 90 return fp
91 92
93 -def GetBTFingerprint(mol, fpfn=GetTopologicalTorsionFingerprint):
94 """ 95 >>> from rdkit import Chem 96 >>> mol = Chem.MolFromSmiles('OCC(N)O') 97 >>> AssignPattyTypes(mol) 98 ['POL', 'HYD', 'HYD', 'CAT', 'POL'] 99 >>> fp = GetBTFingerprint(mol) 100 >>> fp.GetTotalVal() 101 2 102 >>> nze=fp.GetNonzeroElements() 103 >>> sorted([(k,v) for k,v in nze.items()]) 104 [(538446850..., 1), (538446852..., 1)] 105 106 """ 107 return GetBPFingerprint(mol, fpfn=fpfn)
108 109 110 # ------------------------------------ 111 # 112 # doctest boilerplate 113 #
114 -def _runDoctests(verbose=None): # pragma: nocover
115 import sys 116 import doctest 117 failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) 118 sys.exit(failed) 119 120 121 if __name__ == '__main__': # pragma: nocover 122 _runDoctests() 123