1
2
3
4
5
6
7
8
9
10
11 """ contains SMARTS definitions and calculators for EState atom types
12
13 defined in: Hall and Kier JCICS _35_ 1039-1045 (1995) Table 1
14 """
15 from rdkit import Chem
16
17 _rawD = [
18 ('sLi', '[LiD1]-*'),
19 ('ssBe', '[BeD2](-*)-*'),
20 ('ssssBe', '[BeD4](-*)(-*)(-*)-*'),
21 ('ssBH', '[BD2H](-*)-*'),
22 ('sssB', '[BD3](-*)(-*)-*'),
23 ('ssssB', '[BD4](-*)(-*)(-*)-*'),
24 ('sCH3', '[CD1H3]-*'),
25 ('dCH2', '[CD1H2]=*'),
26 ('ssCH2', '[CD2H2](-*)-*'),
27 ('tCH', '[CD1H]#*'),
28 ('dsCH', '[CD2H](=*)-*'),
29 ('aaCH', '[C,c;D2H](:*):*'),
30 ('sssCH', '[CD3H](-*)(-*)-*'),
31 ('ddC', '[CD2H0](=*)=*'),
32 ('tsC', '[CD2H0](#*)-*'),
33 ('dssC', '[CD3H0](=*)(-*)-*'),
34 ('aasC', '[C,c;D3H0](:*)(:*)-*'),
35 ('aaaC', '[C,c;D3H0](:*)(:*):*'),
36 ('ssssC', '[CD4H0](-*)(-*)(-*)-*'),
37 ('sNH3', '[ND1H3]-*'),
38 ('sNH2', '[ND1H2]-*'),
39 ('ssNH2', '[ND2H2](-*)-*'),
40 ('dNH', '[ND1H]=*'),
41 ('ssNH', '[ND2H](-*)-*'),
42 ('aaNH', '[N,nD2H](:*):*'),
43 ('tN', '[ND1H0]#*'),
44 ('sssNH', '[ND3H](-*)(-*)-*'),
45 ('dsN', '[ND2H0](=*)-*'),
46 ('aaN', '[N,nD2H0](:*):*'),
47 ('sssN', '[ND3H0](-*)(-*)-*'),
48 ('ddsN', '[ND3H0](~[OD1H0])(~[OD1H0])-,:*'),
49 ('aasN', '[N,nD3H0](:*)(:*)-,:*'),
50 ('ssssN', '[ND4H0](-*)(-*)(-*)-*'),
51 ('sOH', '[OD1H]-*'),
52 ('dO', '[OD1H0]=*'),
53 ('ssO', '[OD2H0](-*)-*'),
54 ('aaO', '[O,oD2H0](:*):*'),
55 ('sF', '[FD1]-*'),
56 ('sSiH3', '[SiD1H3]-*'),
57 ('ssSiH2', '[SiD2H2](-*)-*'),
58 ('sssSiH', '[SiD3H1](-*)(-*)-*'),
59 ('ssssSi', '[SiD4H0](-*)(-*)(-*)-*'),
60 ('sPH2', '[PD1H2]-*'),
61 ('ssPH', '[PD2H1](-*)-*'),
62 ('sssP', '[PD3H0](-*)(-*)-*'),
63 ('dsssP', '[PD4H0](=*)(-*)(-*)-*'),
64 ('sssssP', '[PD5H0](-*)(-*)(-*)(-*)-*'),
65 ('sSH', '[SD1H1]-*'),
66 ('dS', '[SD1H0]=*'),
67 ('ssS', '[SD2H0](-*)-*'),
68 ('aaS', '[S,sD2H0](:*):*'),
69 ('dssS', '[SD3H0](=*)(-*)-*'),
70 ('ddssS', '[SD4H0](~[OD1H0])(~[OD1H0])(-*)-*'),
71 ('sCl', '[ClD1]-*'),
72 ('sGeH3', '[GeD1H3](-*)'),
73 ('ssGeH2', '[GeD2H2](-*)-*'),
74 ('sssGeH', '[GeD3H1](-*)(-*)-*'),
75 ('ssssGe', '[GeD4H0](-*)(-*)(-*)-*'),
76 ('sAsH2', '[AsD1H2]-*'),
77 ('ssAsH', '[AsD2H1](-*)-*'),
78 ('sssAs', '[AsD3H0](-*)(-*)-*'),
79 ('sssdAs', '[AsD4H0](=*)(-*)(-*)-*'),
80 ('sssssAs', '[AsD5H0](-*)(-*)(-*)(-*)-*'),
81 ('sSeH', '[SeD1H1]-*'),
82 ('dSe', '[SeD1H0]=*'),
83 ('ssSe', '[SeD2H0](-*)-*'),
84 ('aaSe', '[SeD2H0](:*):*'),
85 ('dssSe', '[SeD3H0](=*)(-*)-*'),
86 ('ddssSe', '[SeD4H0](=*)(=*)(-*)-*'),
87 ('sBr', '[BrD1]-*'),
88 ('sSnH3', '[SnD1H3]-*'),
89 ('ssSnH2', '[SnD2H2](-*)-*'),
90 ('sssSnH', '[SnD3H1](-*)(-*)-*'),
91 ('ssssSn', '[SnD4H0](-*)(-*)(-*)-*'),
92 ('sI', '[ID1]-*'),
93 ('sPbH3', '[PbD1H3]-*'),
94 ('ssPbH2', '[PbD2H2](-*)-*'),
95 ('sssPbH', '[PbD3H1](-*)(-*)-*'),
96 ('ssssPb', '[PbD4H0](-*)(-*)(-*)-*'),
97 ]
98
99 esPatterns = None
100
101
103 """ Internal Use Only
104
105 """
106 global esPatterns, _rawD
107 if rawV is None:
108 rawV = _rawD
109
110 esPatterns = [None] * len(rawV)
111 for i, (name, sma) in enumerate(rawV):
112 patt = Chem.MolFromSmarts(sma)
113 if patt is None:
114 sys.stderr.write('WARNING: problems with pattern %s (name: %s), skipped.\n' % (sma, name))
115 else:
116 esPatterns[i] = name, patt
117
118
120 """ assigns each atom in a molecule to an EState type
121
122 **Returns:**
123
124 list of tuples (atoms can possibly match multiple patterns) with atom types
125
126 """
127 if esPatterns is None:
128 BuildPatts()
129 nAtoms = mol.GetNumAtoms()
130 res = [None] * nAtoms
131 for name, patt in esPatterns:
132 matches = mol.GetSubstructMatches(patt, uniquify=0)
133 for match in matches:
134 idx = match[0]
135 if res[idx] is None:
136 res[idx] = [name]
137 elif name not in res[idx]:
138 res[idx].append(name)
139 for i, v in enumerate(res):
140 if v is not None:
141 res[i] = tuple(v)
142 else:
143 res[i] = ()
144 return res
145