Package rdkit :: Package Chem :: Package SimpleEnum :: Module Enumerator
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.SimpleEnum.Enumerator

  1  # 
  2  #  Copyright (c) 2014, Novartis Institutes for BioMedical Research Inc. 
  3  #  All rights reserved. 
  4  # 
  5  # Redistribution and use in source and binary forms, with or without 
  6  # modification, are permitted provided that the following conditions are 
  7  # met: 
  8  # 
  9  #     * Redistributions of source code must retain the above copyright 
 10  #       notice, this list of conditions and the following disclaimer. 
 11  #     * Redistributions in binary form must reproduce the above 
 12  #       copyright notice, this list of conditions and the following 
 13  #       disclaimer in the documentation and/or other materials provided 
 14  #       with the distribution. 
 15  #     * Neither the name of Novartis Institutes for BioMedical Research Inc. 
 16  #       nor the names of its contributors may be used to endorse or promote 
 17  #       products derived from this software without specific prior written permission. 
 18  # 
 19  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 20  # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 21  # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
 22  # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
 23  # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 24  # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 25  # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 26  # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 27  # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 28  # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 29  # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
 30  # 
 31  # Created by Greg Landrum, May 2009 
 32  from __future__ import print_function 
 33   
 34  import os 
 35   
 36  from rdkit import Chem 
 37  from rdkit import RDConfig 
 38  from rdkit.Chem import AllChem 
 39  from rdkit.Chem import rdChemReactions 
 40   
 41   
42 -def PreprocessReaction(reaction, funcGroupFilename=None, propName='molFileValue'):
43 """ 44 >>> from rdkit.Chem import AllChem 45 >>> testFile = os.path.join(RDConfig.RDCodeDir,'Chem','SimpleEnum','test_data','boronic1.rxn') 46 >>> rxn = AllChem.ReactionFromRxnFile(testFile) 47 >>> rxn.Initialize() 48 >>> nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn) 49 >>> nWarn 50 0 51 >>> nError 52 0 53 >>> nReacts 54 2 55 >>> nProds 56 1 57 >>> reactantLabels 58 (((0, 'halogen.bromine.aromatic'),), ((1, 'boronicacid'),)) 59 60 If there are functional group labels in the input reaction (via atoms with molFileValue 61 properties), the corresponding atoms will have queries added to them so that they only 62 match such things. We can see this here: 63 >>> rxn = AllChem.ReactionFromRxnFile(testFile) 64 >>> rxn.Initialize() 65 >>> r1 = rxn.GetReactantTemplate(0) 66 >>> m1 = Chem.MolFromSmiles('CCBr') 67 >>> m2 = Chem.MolFromSmiles('c1ccccc1Br') 68 69 These both match because the reaction file itself just has R1-Br: 70 >>> m1.HasSubstructMatch(r1) 71 True 72 >>> m2.HasSubstructMatch(r1) 73 True 74 75 After preprocessing, we only match the aromatic Br: 76 >>> d = PreprocessReaction(rxn) 77 >>> m1.HasSubstructMatch(r1) 78 False 79 >>> m2.HasSubstructMatch(r1) 80 True 81 82 We also support or queries in the values field (separated by commas): 83 >>> testFile = os.path.join(RDConfig.RDCodeDir,'Chem','SimpleEnum','test_data','azide_reaction.rxn') 84 >>> rxn = AllChem.ReactionFromRxnFile(testFile) 85 >>> rxn.Initialize() 86 >>> reactantLabels = PreprocessReaction(rxn)[-1] 87 >>> reactantLabels 88 (((1, 'azide'),), ((1, 'carboxylicacid,acidchloride'),)) 89 >>> m1 = Chem.MolFromSmiles('CC(=O)O') 90 >>> m2 = Chem.MolFromSmiles('CC(=O)Cl') 91 >>> m3 = Chem.MolFromSmiles('CC(=O)N') 92 >>> r2 = rxn.GetReactantTemplate(1) 93 >>> m1.HasSubstructMatch(r2) 94 True 95 >>> m2.HasSubstructMatch(r2) 96 True 97 >>> m3.HasSubstructMatch(r2) 98 False 99 100 unrecognized final group types are returned as None: 101 >>> testFile = os.path.join(RDConfig.RDCodeDir,'Chem','SimpleEnum','test_data','bad_value1.rxn') 102 >>> rxn = AllChem.ReactionFromRxnFile(testFile) 103 >>> rxn.Initialize() 104 >>> nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn) 105 Traceback (most recent call last): 106 File "/usr/prog/python/2.6.6_gnu/lib/python2.6/doctest.py", line 1253, in __run 107 compileflags, 1) in test.globs 108 File "<doctest __main__.PreprocessReaction[36]>", line 1, in <module> 109 nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn) 110 File "Enumerator.py", line 105, in PreprocessReaction 111 reactantLabels = reaction.AddRecursiveQueriesToReaction(queryDict, propName='molFileValue', getLabels=True) 112 RuntimeError: KeyErrorException 113 114 One unrecognized group type in a comma-separated list makes the whole thing fail: 115 >>> testFile = os.path.join(RDConfig.RDCodeDir,'Chem','SimpleEnum','test_data','bad_value2.rxn') 116 >>> rxn = AllChem.ReactionFromRxnFile(testFile) 117 >>> rxn.Initialize() 118 >>> nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn) 119 Traceback (most recent call last): 120 File "/usr/prog/python/2.6.6_gnu/lib/python2.6/doctest.py", line 1253, in __run 121 compileflags, 1) in test.globs 122 File "<doctest __main__.PreprocessReaction[36]>", line 1, in <module> 123 nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn) 124 File "Enumerator.py", line 105, in PreprocessReaction 125 reactantLabels = reaction.AddRecursiveQueriesToReaction(queryDict, propName='molFileValue', getLabels=True) 126 RuntimeError: KeyErrorException 127 >>> testFile = os.path.join(RDConfig.RDCodeDir,'Chem','SimpleEnum','test_data','bad_value3.rxn') 128 >>> rxn = AllChem.ReactionFromRxnFile(testFile) 129 >>> rxn.Initialize() 130 >>> nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn) 131 Traceback (most recent call last): 132 File "/usr/prog/python/2.6.6_gnu/lib/python2.6/doctest.py", line 1253, in __run 133 compileflags, 1) in test.globs 134 File "<doctest __main__.PreprocessReaction[36]>", line 1, in <module> 135 nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn) 136 File "Enumerator.py", line 105, in PreprocessReaction 137 reactantLabels = reaction.AddRecursiveQueriesToReaction(queryDict, propName='molFileValue', getLabels=True) 138 RuntimeError: KeyErrorException 139 >>> rxn = rdChemReactions.ChemicalReaction() 140 >>> rxn.Initialize() 141 >>> nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn) 142 >>> reactantLabels 143 () 144 >>> reactantLabels == () 145 True 146 """ 147 148 if funcGroupFilename: 149 try: 150 queryDict = Chem.ParseMolQueryDefFile(funcGroupFilename) 151 except Exception: 152 raise IOError('cannot open', funcGroupFilename) 153 154 return rdChemReactions.PreprocessReaction(reaction, queryDict, propName) 155 return rdChemReactions.PreprocessReaction(reaction, propName=propName)
156 157
158 -def EnumerateReaction( 159 reaction, bbLists, uniqueProductsOnly=False, 160 funcGroupFilename=os.path.join(RDConfig.RDDataDir, 'Functional_Group_Hierarchy.txt'), 161 propName='molFileValue'):
162 """ 163 >>> testFile = os.path.join(RDConfig.RDCodeDir,'Chem','SimpleEnum','test_data','boronic1.rxn') 164 >>> rxn = AllChem.ReactionFromRxnFile(testFile) 165 >>> rxn.Initialize() 166 >>> reacts1=['Brc1ccccc1','Brc1ncccc1','Brc1cnccc1'] 167 >>> reacts1=[Chem.MolFromSmiles(x) for x in reacts1] 168 >>> reacts2=['CCB(O)O','CCCB(O)O'] 169 >>> reacts2=[Chem.MolFromSmiles(x) for x in reacts2] 170 171 >>> prods = EnumerateReaction(rxn,(reacts1,reacts2)) 172 >>> prods = list(prods) 173 174 This is a bit nasty because of the symmetry of the boronic acid: 175 >>> len(prods) 176 12 177 178 >>> smis = list(set([Chem.MolToSmiles(x[0]) for x in prods])) 179 >>> smis.sort() 180 >>> len(smis) 181 6 182 >>> print(smis) 183 ['CCCc1ccccc1', 'CCCc1ccccn1', 'CCCc1cccnc1', 'CCc1ccccc1', 'CCc1ccccn1', 'CCc1cccnc1'] 184 185 The nastiness can be avoided at the cost of some memory by asking for only unique products: 186 >>> prods = EnumerateReaction(rxn,(reacts1,reacts2),uniqueProductsOnly=True) 187 >>> prods = list(prods) 188 >>> len(prods) 189 6 190 >>> print(sorted([Chem.MolToSmiles(x[0]) for x in prods])) 191 ['CCCc1ccccc1', 'CCCc1ccccn1', 'CCCc1cccnc1', 'CCc1ccccc1', 'CCc1ccccn1', 'CCc1cccnc1'] 192 193 194 """ 195 nWarn, nError, nReacts, nProds, reactantLabels = PreprocessReaction(reaction) 196 if nError: 197 raise ValueError('bad reaction') 198 if len(bbLists) != nReacts: 199 raise ValueError('%d reactants in reaction, %d bb lists supplied' % (nReacts, len(bbLists))) 200 201 def _uniqueOnly(lst): 202 seen = [] 203 for entry in lst: 204 if entry: 205 smi = '.'.join(sorted([Chem.MolToSmiles(x, True) for x in entry])) 206 if smi not in seen: 207 seen.append(smi) 208 yield entry
209 210 ps = AllChem.EnumerateLibraryFromReaction(reaction, bbLists) 211 if not uniqueProductsOnly: 212 return ps 213 else: 214 return _uniqueOnly(ps) 215 216 217 # ------------------------------------ 218 # 219 # doctest boilerplate 220 #
221 -def _runDoctests(verbose=None): # pragma: nocover
222 import sys 223 import doctest 224 failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) 225 sys.exit(failed) 226 227 228 if __name__ == '__main__': # pragma: nocover 229 _runDoctests() 230