Package rdkit :: Package Chem :: Package EState :: Module AtomTypes
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.EState.AtomTypes

  1  # $Id$ 
  2  # 
  3  #  Copyright (C) 2002-2006  greg Landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved @@ 
  6  #  This file is part of the RDKit. 
  7  #  The contents are covered by the terms of the BSD license 
  8  #  which is included in the file license.txt, found at the root 
  9  #  of the RDKit source tree. 
 10  # 
 11  """ contains SMARTS definitions and calculators for EState atom types 
 12   
 13   defined in: Hall and Kier JCICS _35_ 1039-1045 (1995)  Table 1 
 14  """ 
 15  from rdkit import Chem 
 16   
 17  _rawD = [ 
 18    ('sLi', '[LiD1]-*'), 
 19    ('ssBe', '[BeD2](-*)-*'), 
 20    ('ssssBe', '[BeD4](-*)(-*)(-*)-*'), 
 21    ('ssBH', '[BD2H](-*)-*'), 
 22    ('sssB', '[BD3](-*)(-*)-*'), 
 23    ('ssssB', '[BD4](-*)(-*)(-*)-*'), 
 24    ('sCH3', '[CD1H3]-*'), 
 25    ('dCH2', '[CD1H2]=*'), 
 26    ('ssCH2', '[CD2H2](-*)-*'), 
 27    ('tCH', '[CD1H]#*'), 
 28    ('dsCH', '[CD2H](=*)-*'), 
 29    ('aaCH', '[C,c;D2H](:*):*'), 
 30    ('sssCH', '[CD3H](-*)(-*)-*'), 
 31    ('ddC', '[CD2H0](=*)=*'), 
 32    ('tsC', '[CD2H0](#*)-*'), 
 33    ('dssC', '[CD3H0](=*)(-*)-*'), 
 34    ('aasC', '[C,c;D3H0](:*)(:*)-*'), 
 35    ('aaaC', '[C,c;D3H0](:*)(:*):*'), 
 36    ('ssssC', '[CD4H0](-*)(-*)(-*)-*'), 
 37    ('sNH3', '[ND1H3]-*'), 
 38    ('sNH2', '[ND1H2]-*'), 
 39    ('ssNH2', '[ND2H2](-*)-*'), 
 40    ('dNH', '[ND1H]=*'), 
 41    ('ssNH', '[ND2H](-*)-*'), 
 42    ('aaNH', '[N,nD2H](:*):*'), 
 43    ('tN', '[ND1H0]#*'), 
 44    ('sssNH', '[ND3H](-*)(-*)-*'), 
 45    ('dsN', '[ND2H0](=*)-*'), 
 46    ('aaN', '[N,nD2H0](:*):*'), 
 47    ('sssN', '[ND3H0](-*)(-*)-*'), 
 48    ('ddsN', '[ND3H0](~[OD1H0])(~[OD1H0])-,:*'),  # mod 
 49    ('aasN', '[N,nD3H0](:*)(:*)-,:*'),  # mod 
 50    ('ssssN', '[ND4H0](-*)(-*)(-*)-*'), 
 51    ('sOH', '[OD1H]-*'), 
 52    ('dO', '[OD1H0]=*'), 
 53    ('ssO', '[OD2H0](-*)-*'), 
 54    ('aaO', '[O,oD2H0](:*):*'), 
 55    ('sF', '[FD1]-*'), 
 56    ('sSiH3', '[SiD1H3]-*'), 
 57    ('ssSiH2', '[SiD2H2](-*)-*'), 
 58    ('sssSiH', '[SiD3H1](-*)(-*)-*'), 
 59    ('ssssSi', '[SiD4H0](-*)(-*)(-*)-*'), 
 60    ('sPH2', '[PD1H2]-*'), 
 61    ('ssPH', '[PD2H1](-*)-*'), 
 62    ('sssP', '[PD3H0](-*)(-*)-*'), 
 63    ('dsssP', '[PD4H0](=*)(-*)(-*)-*'), 
 64    ('sssssP', '[PD5H0](-*)(-*)(-*)(-*)-*'), 
 65    ('sSH', '[SD1H1]-*'), 
 66    ('dS', '[SD1H0]=*'), 
 67    ('ssS', '[SD2H0](-*)-*'), 
 68    ('aaS', '[S,sD2H0](:*):*'), 
 69    ('dssS', '[SD3H0](=*)(-*)-*'), 
 70    ('ddssS', '[SD4H0](~[OD1H0])(~[OD1H0])(-*)-*'),  # mod 
 71    ('sCl', '[ClD1]-*'), 
 72    ('sGeH3', '[GeD1H3](-*)'), 
 73    ('ssGeH2', '[GeD2H2](-*)-*'), 
 74    ('sssGeH', '[GeD3H1](-*)(-*)-*'), 
 75    ('ssssGe', '[GeD4H0](-*)(-*)(-*)-*'), 
 76    ('sAsH2', '[AsD1H2]-*'), 
 77    ('ssAsH', '[AsD2H1](-*)-*'), 
 78    ('sssAs', '[AsD3H0](-*)(-*)-*'), 
 79    ('sssdAs', '[AsD4H0](=*)(-*)(-*)-*'), 
 80    ('sssssAs', '[AsD5H0](-*)(-*)(-*)(-*)-*'), 
 81    ('sSeH', '[SeD1H1]-*'), 
 82    ('dSe', '[SeD1H0]=*'), 
 83    ('ssSe', '[SeD2H0](-*)-*'), 
 84    ('aaSe', '[SeD2H0](:*):*'), 
 85    ('dssSe', '[SeD3H0](=*)(-*)-*'), 
 86    ('ddssSe', '[SeD4H0](=*)(=*)(-*)-*'), 
 87    ('sBr', '[BrD1]-*'), 
 88    ('sSnH3', '[SnD1H3]-*'), 
 89    ('ssSnH2', '[SnD2H2](-*)-*'), 
 90    ('sssSnH', '[SnD3H1](-*)(-*)-*'), 
 91    ('ssssSn', '[SnD4H0](-*)(-*)(-*)-*'), 
 92    ('sI', '[ID1]-*'), 
 93    ('sPbH3', '[PbD1H3]-*'), 
 94    ('ssPbH2', '[PbD2H2](-*)-*'), 
 95    ('sssPbH', '[PbD3H1](-*)(-*)-*'), 
 96    ('ssssPb', '[PbD4H0](-*)(-*)(-*)-*'), 
 97  ] 
 98   
 99  esPatterns = None 
100   
101   
102 -def BuildPatts(rawV=None):
103 """ Internal Use Only 104 105 """ 106 global esPatterns, _rawD 107 if rawV is None: 108 rawV = _rawD 109 110 esPatterns = [None] * len(rawV) 111 for i, (name, sma) in enumerate(rawV): 112 patt = Chem.MolFromSmarts(sma) 113 if patt is None: 114 sys.stderr.write('WARNING: problems with pattern %s (name: %s), skipped.\n' % (sma, name)) 115 else: 116 esPatterns[i] = name, patt
117 118
119 -def TypeAtoms(mol):
120 """ assigns each atom in a molecule to an EState type 121 122 **Returns:** 123 124 list of tuples (atoms can possibly match multiple patterns) with atom types 125 126 """ 127 if esPatterns is None: 128 BuildPatts() 129 nAtoms = mol.GetNumAtoms() 130 res = [None] * nAtoms 131 for name, patt in esPatterns: 132 matches = mol.GetSubstructMatches(patt, uniquify=0) 133 for match in matches: 134 idx = match[0] 135 if res[idx] is None: 136 res[idx] = [name] 137 elif name not in res[idx]: 138 res[idx].append(name) 139 for i, v in enumerate(res): 140 if v is not None: 141 res[i] = tuple(v) 142 else: 143 res[i] = () 144 return res
145