1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33 import os
34 import re
35 import weakref
36
37 from rdkit import Chem
38 from rdkit import RDConfig
39 from rdkit.six.moves import cStringIO as StringIO
40
41
67
68
71
72
73 groupDefns = {}
74 hierarchy = None
75 lastData = None
76 lastFilename = None
77
78
80 global groupDefns, hierarchy, lastData, lastFilename
81 if (not force and hierarchy and (not data or data == lastData) and
82 (not fileNm or fileNm == lastFilename)):
83 return hierarchy[:]
84 lastData = data
85 splitter = re.compile('\t+')
86
87 if not fileNm and not data:
88 fileNm = os.path.join(RDConfig.RDDataDir, 'Functional_Group_Hierarchy.txt')
89
90 if fileNm:
91 inF = open(fileNm, 'r')
92 lastFilename = fileNm
93 elif data:
94 inF = StringIO(data)
95 else:
96 raise ValueError("need data or filename")
97
98 groupDefns = {}
99 res = []
100 for lineNo, line in enumerate(inF.readlines(), 1):
101 line = line.strip()
102 line = line.split('//')[0]
103 if not line:
104 continue
105 splitL = splitter.split(line)
106 if len(splitL) < 3:
107 raise FuncGroupFileParseError("Input line %d (%s) is not long enough." % (lineNo, repr(line)))
108 label = splitL[0].strip()
109 if label in groupDefns:
110 raise FuncGroupFileParseError("Duplicate label on line %d." % lineNo)
111 labelHierarchy = label.split('.')
112 if len(labelHierarchy) > 1:
113 for i in range(len(labelHierarchy) - 1):
114 tmp = '.'.join(labelHierarchy[:i + 1])
115 if tmp not in groupDefns:
116 raise FuncGroupFileParseError("Hierarchy member %s (line %d) not found." % (tmp, lineNo))
117 parent = groupDefns['.'.join(labelHierarchy[:-1])]
118 else:
119 parent = None
120 smarts = splitL[1]
121 patt = Chem.MolFromSmarts(smarts)
122 if not patt:
123 raise FuncGroupFileParseError('Smarts "%s" (line %d) could not be parsed.' % (smarts, lineNo))
124
125 name = splitL[2].strip()
126
127 rxnSmarts = ''
128 if len(splitL) > 3:
129 rxnSmarts = splitL[3]
130
131 node = FGHierarchyNode(name, patt, smarts=smarts, label=label, parent=parent,
132 rxnSmarts=rxnSmarts)
133 if parent:
134 parent.children.append(node)
135 else:
136 res.append(node)
137 groupDefns[label] = node
138 hierarchy = res[:]
139 return res
140
141
143 ms = mol.GetSubstructMatches(node.pattern)
144 count = 0
145 seen = {}
146 for m in ms:
147 if m[0] not in seen:
148 count += 1
149 seen[m[0]] = 1
150 if count:
151 res[idx] = count
152 idx += 1
153 for child in node.children:
154 idx = _SetNodeBits(mol, child, res, idx)
155 else:
156 idx += len(node)
157 return idx
158
159
161 totL = 0
162 for entry in hierarchy:
163 totL += len(entry)
164 res = [0] * totL
165 idx = 0
166 for entry in hierarchy:
167 idx = _SetNodeBits(mol, entry, res, idx)
168 return res
169