Package rdkit :: Package Chem :: Module SaltRemover
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.SaltRemover

  1  # 
  2  #  Copyright (c) 2010, Novartis Institutes for BioMedical Research Inc. 
  3  #  All rights reserved. 
  4  # 
  5  # Redistribution and use in source and binary forms, with or without 
  6  # modification, are permitted provided that the following conditions are 
  7  # met: 
  8  # 
  9  #     * Redistributions of source code must retain the above copyright 
 10  #       notice, this list of conditions and the following disclaimer. 
 11  #     * Redistributions in binary form must reproduce the above 
 12  #       copyright notice, this list of conditions and the following 
 13  #       disclaimer in the documentation and/or other materials provided 
 14  #       with the distribution. 
 15  #     * Neither the name of Novartis Institutes for BioMedical Research Inc. 
 16  #       nor the names of its contributors may be used to endorse or promote 
 17  #       products derived from this software without specific prior written permission. 
 18  # 
 19  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 20  # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 21  # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
 22  # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
 23  # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 24  # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 25  # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 26  # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 27  # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 28  # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 29  # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
 30  # 
 31  # Created by Greg Landrum, Dec 2006 
 32  # 
 33  import os 
 34  import re 
 35  from collections import namedtuple 
 36  from contextlib import closing 
 37   
 38  from rdkit import Chem, RDConfig 
 39  from rdkit.Chem.rdmolfiles import SDMolSupplier, SmilesMolSupplier 
 40   
41 -class InputFormat:
42 SMARTS = 'smarts' 43 MOL = 'mol' 44 SMILES = 'smiles'
45
46 -def _smartsFromSmartsLine(line):
47 """ 48 Converts given line into a molecule using 'Chem.MolFromSmarts'. 49 """ 50 # Name the regular expression (better than inlining it) 51 whitespace = re.compile(r'[\t ]+') 52 # Reflects the specialisation of this method to read the rather unusual 53 # SMARTS files with the // comments. 54 line = line.strip().split('//')[0] 55 if line: 56 smarts = whitespace.split(line) 57 salt = Chem.MolFromSmarts(smarts[0]) 58 if salt is None: 59 raise ValueError(line) 60 return salt
61
62 -def _getSmartsSaltsFromStream(stream):
63 """ 64 Yields extracted SMARTS salts from given stream. 65 """ 66 with closing(stream) as lines: 67 for line in lines: 68 smarts = _smartsFromSmartsLine(line) 69 if smarts: 70 yield smarts
71
72 -def _getSmartsSaltsFromFile(filename):
73 """ 74 Extracts SMARTS salts from given file object. 75 """ 76 return _getSmartsSaltsFromStream(open(filename, 'r'))
77
78 -class SaltRemover(object):
79 defnFilename = os.path.join(RDConfig.RDDataDir, 'Salts.txt') 80
81 - def __init__(self, defnFilename=None, defnData=None, defnFormat=InputFormat.SMARTS):
82 if defnFilename: 83 self.defnFilename = defnFilename 84 self.defnData = defnData 85 self.salts = None 86 self.defnFormat = defnFormat 87 self._initPatterns()
88
89 - def _initPatterns(self):
90 """ 91 92 >>> remover = SaltRemover() 93 >>> len(remover.salts)>0 94 True 95 96 Default input format is SMARTS 97 >>> remover = SaltRemover(defnData="[Cl,Br]") 98 >>> len(remover.salts) 99 1 100 101 >>> remover = SaltRemover(defnData="[Na+]\\nCC(=O)O", defnFormat=InputFormat.SMILES) 102 >>> len(remover.salts) 103 2 104 105 >>> from rdkit import RDLogger 106 >>> RDLogger.DisableLog('rdApp.error') 107 >>> remover = SaltRemover(defnData="[Cl,fail]") 108 Traceback (most recent call last): 109 ... 110 ValueError: [Cl,fail] 111 112 >>> RDLogger.EnableLog('rdApp.error') 113 """ 114 if self.defnData: 115 from rdkit.six.moves import cStringIO as StringIO 116 inF = StringIO(self.defnData) 117 with closing(inF): 118 self.salts = [] 119 for line in inF: 120 if line: 121 if self.defnFormat == InputFormat.SMARTS: 122 salt = _smartsFromSmartsLine(line) 123 elif self.defnFormat == InputFormat.SMILES: 124 salt = Chem.MolFromSmiles(line) 125 else: 126 raise ValueError('Unsupported format for supplier.') 127 if salt is None: 128 raise ValueError(line) 129 self.salts.append(salt) 130 else: 131 if self.defnFormat == InputFormat.SMARTS: 132 self.salts = [mol for mol in _getSmartsSaltsFromFile(self.defnFilename)] 133 elif self.defnFormat == InputFormat.MOL: 134 self.salts = [mol for mol in SDMolSupplier(self.defnFilename)] 135 elif self.defnFormat == InputFormat.SMILES: 136 self.salts = [mol for mol in SmilesMolSupplier(self.defnFilename)] 137 else: 138 raise ValueError('Unsupported format for supplier.')
139
140 - def StripMol(self, mol, dontRemoveEverything=False):
141 """ 142 143 >>> remover = SaltRemover(defnData="[Cl,Br]") 144 >>> len(remover.salts) 145 1 146 147 >>> mol = Chem.MolFromSmiles('CN(C)C.Cl') 148 >>> res = remover.StripMol(mol) 149 >>> res is not None 150 True 151 >>> res.GetNumAtoms() 152 4 153 154 Notice that all salts are removed: 155 >>> mol = Chem.MolFromSmiles('CN(C)C.Cl.Cl.Br') 156 >>> res = remover.StripMol(mol) 157 >>> res.GetNumAtoms() 158 4 159 160 Matching (e.g. "salt-like") atoms in the molecule are unchanged: 161 >>> mol = Chem.MolFromSmiles('CN(Br)Cl') 162 >>> res = remover.StripMol(mol) 163 >>> res.GetNumAtoms() 164 4 165 166 >>> mol = Chem.MolFromSmiles('CN(Br)Cl.Cl') 167 >>> res = remover.StripMol(mol) 168 >>> res.GetNumAtoms() 169 4 170 171 Charged salts are handled reasonably: 172 >>> mol = Chem.MolFromSmiles('C[NH+](C)(C).[Cl-]') 173 >>> res = remover.StripMol(mol) 174 >>> res.GetNumAtoms() 175 4 176 177 178 Watch out for this case (everything removed): 179 >>> remover = SaltRemover() 180 >>> len(remover.salts)>1 181 True 182 >>> mol = Chem.MolFromSmiles('CC(=O)O.[Na]') 183 >>> res = remover.StripMol(mol) 184 >>> res.GetNumAtoms() 185 0 186 187 dontRemoveEverything helps with this by leaving the last salt: 188 >>> res = remover.StripMol(mol,dontRemoveEverything=True) 189 >>> res.GetNumAtoms() 190 4 191 192 but in cases where the last salts are the same, it can't choose 193 between them, so it returns all of them: 194 >>> mol = Chem.MolFromSmiles('Cl.Cl') 195 >>> res = remover.StripMol(mol,dontRemoveEverything=True) 196 >>> res.GetNumAtoms() 197 2 198 199 """ 200 strippedMol = self._StripMol(mol, dontRemoveEverything) 201 return strippedMol.mol
202
203 - def StripMolWithDeleted(self, mol, dontRemoveEverything=False):
204 """ 205 Strips given molecule and returns it, with the fragments which have been deleted. 206 207 >>> remover = SaltRemover(defnData="[Cl,Br]") 208 >>> len(remover.salts) 209 1 210 211 >>> mol = Chem.MolFromSmiles('CN(C)C.Cl.Br') 212 >>> res, deleted = remover.StripMolWithDeleted(mol) 213 >>> Chem.MolToSmiles(res) 214 'CN(C)C' 215 >>> [Chem.MolToSmarts(m) for m in deleted] 216 ['[Cl,Br]'] 217 218 >>> mol = Chem.MolFromSmiles('CN(C)C.Cl') 219 >>> res, deleted = remover.StripMolWithDeleted(mol) 220 >>> res.GetNumAtoms() 221 4 222 >>> len(deleted) 223 1 224 >>> deleted[0].GetNumAtoms() 225 1 226 >>> Chem.MolToSmiles(deleted[0]) 227 'Cl' 228 229 Multiple occurrences of 'Cl' and without tuple destructuring 230 >>> mol = Chem.MolFromSmiles('CN(C)C.Cl.Cl') 231 >>> tup = remover.StripMolWithDeleted(mol) 232 233 >>> tup.mol.GetNumAtoms() 234 4 235 >>> len(tup.deleted) 236 1 237 >>> tup.deleted[0].GetNumAtoms() 238 1 239 >>> Chem.MolToSmiles(deleted[0]) 240 'Cl' 241 """ 242 return self._StripMol(mol, dontRemoveEverything)
243
244 - def _StripMol(self, mol, dontRemoveEverything=False):
245 246 def _applyPattern(m, salt, notEverything): 247 nAts = m.GetNumAtoms() 248 if not nAts: 249 return m 250 res = m 251 252 t = Chem.DeleteSubstructs(res, salt, True) 253 if not t or (notEverything and t.GetNumAtoms() == 0): 254 return res 255 res = t 256 while res.GetNumAtoms() and nAts > res.GetNumAtoms(): 257 nAts = res.GetNumAtoms() 258 t = Chem.DeleteSubstructs(res, salt, True) 259 if notEverything and t.GetNumAtoms() == 0: 260 break 261 res = t 262 return res
263 264 StrippedMol = namedtuple('StrippedMol', ['mol', 'deleted']) 265 deleted = [] 266 if dontRemoveEverything and len(Chem.GetMolFrags(mol)) <= 1: 267 return StrippedMol(mol, deleted) 268 modified = False 269 natoms = mol.GetNumAtoms() 270 for salt in self.salts: 271 mol = _applyPattern(mol, salt, dontRemoveEverything) 272 if natoms != mol.GetNumAtoms(): 273 natoms = mol.GetNumAtoms() 274 modified = True 275 deleted.append(salt) 276 if dontRemoveEverything and len(Chem.GetMolFrags(mol)) <= 1: 277 break 278 if modified and mol.GetNumAtoms() > 0: 279 Chem.SanitizeMol(mol) 280 return StrippedMol(mol, deleted)
281
282 - def __call__(self, mol, dontRemoveEverything=False):
283 """ 284 285 >>> remover = SaltRemover(defnData="[Cl,Br]") 286 >>> len(remover.salts) 287 1 288 >>> Chem.MolToSmiles(remover.salts[0]) 289 'Cl' 290 291 >>> mol = Chem.MolFromSmiles('CN(C)C.Cl') 292 >>> res = remover(mol) 293 >>> res is not None 294 True 295 >>> res.GetNumAtoms() 296 4 297 298 """ 299 return self.StripMol(mol, dontRemoveEverything=dontRemoveEverything)
300 301 302 # ------------------------------------ 303 # 304 # doctest boilerplate 305 #
306 -def _runDoctests(verbose=None): # pragma: nocover
307 import sys 308 import doctest 309 failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) 310 sys.exit(failed) 311 312 313 if __name__ == '__main__': # pragma: nocover 314 _runDoctests() 315