Package rdkit :: Package Chem :: Module FunctionalGroups
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.FunctionalGroups

  1  # 
  2  #  Copyright (c) 2010, Novartis Institutes for BioMedical Research Inc. 
  3  #  All rights reserved. 
  4  # 
  5  # Redistribution and use in source and binary forms, with or without 
  6  # modification, are permitted provided that the following conditions are 
  7  # met: 
  8  # 
  9  #     * Redistributions of source code must retain the above copyright 
 10  #       notice, this list of conditions and the following disclaimer. 
 11  #     * Redistributions in binary form must reproduce the above 
 12  #       copyright notice, this list of conditions and the following 
 13  #       disclaimer in the documentation and/or other materials provided 
 14  #       with the distribution. 
 15  #     * Neither the name of Novartis Institutes for BioMedical Research Inc. 
 16  #       nor the names of its contributors may be used to endorse or promote 
 17  #       products derived from this software without specific prior written permission. 
 18  # 
 19  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 20  # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 21  # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
 22  # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
 23  # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 24  # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 25  # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 26  # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 27  # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 28  # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 29  # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
 30  # 
 31  # Created by Greg Landrum, October 2006 
 32  # 
 33  import os 
 34  import re 
 35  import weakref 
 36   
 37  from rdkit import Chem 
 38  from rdkit import RDConfig 
 39  from rdkit.six.moves import cStringIO as StringIO 
 40   
 41   
42 -class FGHierarchyNode(object):
43 children = None 44 name = "" 45 label = "" 46 pattern = None 47 smarts = "" 48 rxnSmarts = "" 49 parent = None 50 removalReaction = None 51
52 - def __init__(self, name, patt, smarts="", label="", rxnSmarts="", parent=None):
53 self.name = name 54 self.pattern = patt 55 if parent: 56 self.parent = weakref.ref(parent) 57 self.label = label 58 self.smarts = smarts 59 self.children = [] 60 self.rxnSmarts = rxnSmarts
61
62 - def __len__(self):
63 res = 1 64 for child in self.children: 65 res += len(child) 66 return res
67 68
69 -class FuncGroupFileParseError(ValueError):
70 pass
71 72 73 groupDefns = {} 74 hierarchy = None 75 lastData = None 76 lastFilename = None 77 78
79 -def BuildFuncGroupHierarchy(fileNm=None, data=None, force=False):
80 global groupDefns, hierarchy, lastData, lastFilename 81 if (not force and hierarchy and (not data or data == lastData) and 82 (not fileNm or fileNm == lastFilename)): 83 return hierarchy[:] 84 lastData = data 85 splitter = re.compile('\t+') 86 87 if not fileNm and not data: 88 fileNm = os.path.join(RDConfig.RDDataDir, 'Functional_Group_Hierarchy.txt') 89 90 if fileNm: 91 inF = open(fileNm, 'r') 92 lastFilename = fileNm 93 elif data: 94 inF = StringIO(data) 95 else: 96 raise ValueError("need data or filename") 97 98 groupDefns = {} 99 res = [] 100 for lineNo, line in enumerate(inF.readlines(), 1): 101 line = line.strip() 102 line = line.split('//')[0] 103 if not line: 104 continue 105 splitL = splitter.split(line) 106 if len(splitL) < 3: 107 raise FuncGroupFileParseError("Input line %d (%s) is not long enough." % (lineNo, repr(line))) 108 label = splitL[0].strip() 109 if label in groupDefns: 110 raise FuncGroupFileParseError("Duplicate label on line %d." % lineNo) 111 labelHierarchy = label.split('.') 112 if len(labelHierarchy) > 1: 113 for i in range(len(labelHierarchy) - 1): 114 tmp = '.'.join(labelHierarchy[:i + 1]) 115 if tmp not in groupDefns: 116 raise FuncGroupFileParseError("Hierarchy member %s (line %d) not found." % (tmp, lineNo)) 117 parent = groupDefns['.'.join(labelHierarchy[:-1])] 118 else: 119 parent = None 120 smarts = splitL[1] 121 patt = Chem.MolFromSmarts(smarts) 122 if not patt: 123 raise FuncGroupFileParseError('Smarts "%s" (line %d) could not be parsed.' % (smarts, lineNo)) 124 125 name = splitL[2].strip() 126 127 rxnSmarts = '' 128 if len(splitL) > 3: 129 rxnSmarts = splitL[3] 130 131 node = FGHierarchyNode(name, patt, smarts=smarts, label=label, parent=parent, 132 rxnSmarts=rxnSmarts) 133 if parent: 134 parent.children.append(node) 135 else: 136 res.append(node) 137 groupDefns[label] = node 138 hierarchy = res[:] 139 return res
140 141
142 -def _SetNodeBits(mol, node, res, idx):
143 ms = mol.GetSubstructMatches(node.pattern) 144 count = 0 145 seen = {} 146 for m in ms: 147 if m[0] not in seen: 148 count += 1 149 seen[m[0]] = 1 150 if count: 151 res[idx] = count 152 idx += 1 153 for child in node.children: 154 idx = _SetNodeBits(mol, child, res, idx) 155 else: 156 idx += len(node) 157 return idx
158 159
160 -def CreateMolFingerprint(mol, hierarchy):
161 totL = 0 162 for entry in hierarchy: 163 totL += len(entry) 164 res = [0] * totL 165 idx = 0 166 for entry in hierarchy: 167 idx = _SetNodeBits(mol, entry, res, idx) 168 return res
169