1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33 import os
34 import re
35 from collections import namedtuple
36 from contextlib import closing
37
38 from rdkit import Chem, RDConfig
39 from rdkit.Chem.rdmolfiles import SDMolSupplier, SmilesMolSupplier
40
45
47 """
48 Converts given line into a molecule using 'Chem.MolFromSmarts'.
49 """
50
51 whitespace = re.compile(r'[\t ]+')
52
53
54 line = line.strip().split('//')[0]
55 if line:
56 smarts = whitespace.split(line)
57 salt = Chem.MolFromSmarts(smarts[0])
58 if salt is None:
59 raise ValueError(line)
60 return salt
61
71
77
79 defnFilename = os.path.join(RDConfig.RDDataDir, 'Salts.txt')
80
88
90 """
91
92 >>> remover = SaltRemover()
93 >>> len(remover.salts)>0
94 True
95
96 Default input format is SMARTS
97 >>> remover = SaltRemover(defnData="[Cl,Br]")
98 >>> len(remover.salts)
99 1
100
101 >>> remover = SaltRemover(defnData="[Na+]\\nCC(=O)O", defnFormat=InputFormat.SMILES)
102 >>> len(remover.salts)
103 2
104
105 >>> from rdkit import RDLogger
106 >>> RDLogger.DisableLog('rdApp.error')
107 >>> remover = SaltRemover(defnData="[Cl,fail]")
108 Traceback (most recent call last):
109 ...
110 ValueError: [Cl,fail]
111
112 >>> RDLogger.EnableLog('rdApp.error')
113 """
114 if self.defnData:
115 from rdkit.six.moves import cStringIO as StringIO
116 inF = StringIO(self.defnData)
117 with closing(inF):
118 self.salts = []
119 for line in inF:
120 if line:
121 if self.defnFormat == InputFormat.SMARTS:
122 salt = _smartsFromSmartsLine(line)
123 elif self.defnFormat == InputFormat.SMILES:
124 salt = Chem.MolFromSmiles(line)
125 else:
126 raise ValueError('Unsupported format for supplier.')
127 if salt is None:
128 raise ValueError(line)
129 self.salts.append(salt)
130 else:
131 if self.defnFormat == InputFormat.SMARTS:
132 self.salts = [mol for mol in _getSmartsSaltsFromFile(self.defnFilename)]
133 elif self.defnFormat == InputFormat.MOL:
134 self.salts = [mol for mol in SDMolSupplier(self.defnFilename)]
135 elif self.defnFormat == InputFormat.SMILES:
136 self.salts = [mol for mol in SmilesMolSupplier(self.defnFilename)]
137 else:
138 raise ValueError('Unsupported format for supplier.')
139
140 - def StripMol(self, mol, dontRemoveEverything=False):
141 """
142
143 >>> remover = SaltRemover(defnData="[Cl,Br]")
144 >>> len(remover.salts)
145 1
146
147 >>> mol = Chem.MolFromSmiles('CN(C)C.Cl')
148 >>> res = remover.StripMol(mol)
149 >>> res is not None
150 True
151 >>> res.GetNumAtoms()
152 4
153
154 Notice that all salts are removed:
155 >>> mol = Chem.MolFromSmiles('CN(C)C.Cl.Cl.Br')
156 >>> res = remover.StripMol(mol)
157 >>> res.GetNumAtoms()
158 4
159
160 Matching (e.g. "salt-like") atoms in the molecule are unchanged:
161 >>> mol = Chem.MolFromSmiles('CN(Br)Cl')
162 >>> res = remover.StripMol(mol)
163 >>> res.GetNumAtoms()
164 4
165
166 >>> mol = Chem.MolFromSmiles('CN(Br)Cl.Cl')
167 >>> res = remover.StripMol(mol)
168 >>> res.GetNumAtoms()
169 4
170
171 Charged salts are handled reasonably:
172 >>> mol = Chem.MolFromSmiles('C[NH+](C)(C).[Cl-]')
173 >>> res = remover.StripMol(mol)
174 >>> res.GetNumAtoms()
175 4
176
177
178 Watch out for this case (everything removed):
179 >>> remover = SaltRemover()
180 >>> len(remover.salts)>1
181 True
182 >>> mol = Chem.MolFromSmiles('CC(=O)O.[Na]')
183 >>> res = remover.StripMol(mol)
184 >>> res.GetNumAtoms()
185 0
186
187 dontRemoveEverything helps with this by leaving the last salt:
188 >>> res = remover.StripMol(mol,dontRemoveEverything=True)
189 >>> res.GetNumAtoms()
190 4
191
192 but in cases where the last salts are the same, it can't choose
193 between them, so it returns all of them:
194 >>> mol = Chem.MolFromSmiles('Cl.Cl')
195 >>> res = remover.StripMol(mol,dontRemoveEverything=True)
196 >>> res.GetNumAtoms()
197 2
198
199 """
200 strippedMol = self._StripMol(mol, dontRemoveEverything)
201 return strippedMol.mol
202
204 """
205 Strips given molecule and returns it, with the fragments which have been deleted.
206
207 >>> remover = SaltRemover(defnData="[Cl,Br]")
208 >>> len(remover.salts)
209 1
210
211 >>> mol = Chem.MolFromSmiles('CN(C)C.Cl.Br')
212 >>> res, deleted = remover.StripMolWithDeleted(mol)
213 >>> Chem.MolToSmiles(res)
214 'CN(C)C'
215 >>> [Chem.MolToSmarts(m) for m in deleted]
216 ['[Cl,Br]']
217
218 >>> mol = Chem.MolFromSmiles('CN(C)C.Cl')
219 >>> res, deleted = remover.StripMolWithDeleted(mol)
220 >>> res.GetNumAtoms()
221 4
222 >>> len(deleted)
223 1
224 >>> deleted[0].GetNumAtoms()
225 1
226 >>> Chem.MolToSmiles(deleted[0])
227 'Cl'
228
229 Multiple occurrences of 'Cl' and without tuple destructuring
230 >>> mol = Chem.MolFromSmiles('CN(C)C.Cl.Cl')
231 >>> tup = remover.StripMolWithDeleted(mol)
232
233 >>> tup.mol.GetNumAtoms()
234 4
235 >>> len(tup.deleted)
236 1
237 >>> tup.deleted[0].GetNumAtoms()
238 1
239 >>> Chem.MolToSmiles(deleted[0])
240 'Cl'
241 """
242 return self._StripMol(mol, dontRemoveEverything)
243
244 - def _StripMol(self, mol, dontRemoveEverything=False):
245
246 def _applyPattern(m, salt, notEverything):
247 nAts = m.GetNumAtoms()
248 if not nAts:
249 return m
250 res = m
251
252 t = Chem.DeleteSubstructs(res, salt, True)
253 if not t or (notEverything and t.GetNumAtoms() == 0):
254 return res
255 res = t
256 while res.GetNumAtoms() and nAts > res.GetNumAtoms():
257 nAts = res.GetNumAtoms()
258 t = Chem.DeleteSubstructs(res, salt, True)
259 if notEverything and t.GetNumAtoms() == 0:
260 break
261 res = t
262 return res
263
264 StrippedMol = namedtuple('StrippedMol', ['mol', 'deleted'])
265 deleted = []
266 if dontRemoveEverything and len(Chem.GetMolFrags(mol)) <= 1:
267 return StrippedMol(mol, deleted)
268 modified = False
269 natoms = mol.GetNumAtoms()
270 for salt in self.salts:
271 mol = _applyPattern(mol, salt, dontRemoveEverything)
272 if natoms != mol.GetNumAtoms():
273 natoms = mol.GetNumAtoms()
274 modified = True
275 deleted.append(salt)
276 if dontRemoveEverything and len(Chem.GetMolFrags(mol)) <= 1:
277 break
278 if modified and mol.GetNumAtoms() > 0:
279 Chem.SanitizeMol(mol)
280 return StrippedMol(mol, deleted)
281
282 - def __call__(self, mol, dontRemoveEverything=False):
283 """
284
285 >>> remover = SaltRemover(defnData="[Cl,Br]")
286 >>> len(remover.salts)
287 1
288 >>> Chem.MolToSmiles(remover.salts[0])
289 'Cl'
290
291 >>> mol = Chem.MolFromSmiles('CN(C)C.Cl')
292 >>> res = remover(mol)
293 >>> res is not None
294 True
295 >>> res.GetNumAtoms()
296 4
297
298 """
299 return self.StripMol(mol, dontRemoveEverything=dontRemoveEverything)
300
301
302
303
304
305
307 import sys
308 import doctest
309 failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose)
310 sys.exit(failed)
311
312
313 if __name__ == '__main__':
314 _runDoctests()
315