Package rdkit :: Package Chem :: Package MolKey :: Module InchiInfo
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.MolKey.InchiInfo

  1  # 
  2  #  Copyright (c) 2015, Novartis Institutes for BioMedical Research Inc. 
  3  #  All rights reserved. 
  4  #  
  5  # Redistribution and use in source and binary forms, with or without 
  6  # modification, are permitted provided that the following conditions are 
  7  # met:  
  8  # 
  9  #     * Redistributions of source code must retain the above copyright  
 10  #       notice, this list of conditions and the following disclaimer. 
 11  #     * Redistributions in binary form must reproduce the above 
 12  #       copyright notice, this list of conditions and the following  
 13  #       disclaimer in the documentation and/or other materials provided  
 14  #       with the distribution. 
 15  #     * Neither the name of Novartis Institutes for BioMedical Research Inc.  
 16  #       nor the names of its contributors may be used to endorse or promote  
 17  #       products derived from this software without specific prior written permission. 
 18  # 
 19  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 20  # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 21  # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
 22  # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
 23  # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 24  # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 25  # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 26  # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 27  # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 28  # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 29  # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
 30  # 
 31  # Retrieve stereo and tautomer information from the the InChI string 
 32  # Created on Sep 23, 2010 
 33  # Original author: Thomas Muellerk muelleth 
 34  import logging 
 35  import re 
 36  import unittest 
 37  from rdkit import Chem 
 38   
 39  from rdkit.Chem import inchi 
 40  if not inchi.INCHI_AVAILABLE: 
 41    raise ImportError("This code requires the RDKit to be built with InChI suport") 
 42   
 43   
44 -def _is_achiral_by_symmetry(INCHI):
45 mol = Chem.MolFromInchi(INCHI) 46 if not mol: 47 mol = Chem.MolFromInchi('InChI=1/{0}'.format(INCHI)) 48 49 try: 50 list_chiral = Chem.FindMolChiralCenters(mol, True, True) 51 except Exception: 52 return False 53 54 # is there any real chiral centre? 55 return len(list_chiral) == 0
56 57 console = logging.StreamHandler() 58 UPD_APP = logging.getLogger('inchiinfo.application') # application runtime information 59 60 version_re = re.compile('(.*?)/(.*)') # get version 61 reconnected_re = re.compile('(.*?)/r(.*)') # reconnected layer? 62 fixed_h_re = re.compile('(.*?)/f(.*)') # fixed-H layer? 63 isotope_re = re.compile('(.*?)/i(.*)') # isotope layer? 64 65 stereo_re = re.compile('.*\/t(.*?)\/.*') 66 stereo_all_re = re.compile('.*\/t([^\/]+)') 67 undef_stereo_re = re.compile('(\d+)\?') 68 all_stereo_re = re.compile('(\d+)[?+-]') 69 defined_stereo_re = re.compile('(\d+)[+-]') 70 h_layer_re = re.compile('.*\/h(.*)\/?') 71 mobile_h_group_re = re.compile('(\(H.+?\))') 72 mobile_h_atoms_re = re.compile(',(\d+)') 73 74
75 -class InchiInfo(object):
76
77 - def __init__(self, inchi_str):
78 (version, rest) = version_re.match(inchi_str).groups() 79 reconn_match = reconnected_re.match(rest) 80 81 connection_layers = {} 82 if reconn_match: 83 (connection_layers['id_disconnected'], 84 connection_layers['id_reconnected']) = reconn_match.groups() 85 else: 86 (connection_layers['id']) = rest 87 88 fixed_h_layers = {} 89 for conn_layer in connection_layers: 90 fixed_h_layers[conn_layer] = {} 91 fixed_match = fixed_h_re.match(connection_layers[conn_layer]) 92 if fixed_match: 93 (fixed_h_layers[conn_layer]['main'], 94 fixed_h_layers[conn_layer]['fixed_h']) = fixed_match.groups() 95 else: 96 fixed_h_layers[conn_layer]['main'] = connection_layers[conn_layer] 97 98 inchi = {} 99 for i0_layer in fixed_h_layers: 100 inchi[i0_layer] = {} 101 for i1_layer in fixed_h_layers[i0_layer]: 102 inchi[i0_layer][i1_layer] = {} 103 iso_match = isotope_re.match(fixed_h_layers[i0_layer][i1_layer]) 104 if iso_match: 105 (inchi[i0_layer][i1_layer]['non-isotopic'], 106 inchi[i0_layer][i1_layer]['isotopic']) = iso_match.groups() 107 else: 108 inchi[i0_layer][i1_layer]['non-isotopic'] = fixed_h_layers[i0_layer][i1_layer] 109 110 self.parsed_inchi = inchi
111
112 - def get_sp3_stereo(self):
113 ''' retrieve sp3 stereo information 114 return a 4-item tuple containing 115 1) Number of stereocenters detected. If 0, the remaining items of the tuple = None 116 2) Number of undefined stereocenters. Must be smaller or equal to above 117 3) True if the molecule is a meso form (with chiral centers and a plane of symmetry) 118 4) Comma-separated list of internal atom numbers with sp3 stereochemistry 119 ''' 120 sp3_stereo = {} 121 122 for con_layer in self.parsed_inchi: 123 for fixed_layer in self.parsed_inchi[con_layer]: 124 sp3_stereo[fixed_layer] = {} 125 for iso_layer in self.parsed_inchi[con_layer][fixed_layer]: 126 sp3_stereo[fixed_layer][iso_layer] = {} 127 stereo_match = stereo_re.match(self.parsed_inchi[con_layer][fixed_layer][iso_layer]) 128 stereo_all_match = stereo_all_re.match(self.parsed_inchi[con_layer][fixed_layer][ 129 iso_layer]) 130 num_stereo = 0 131 num_undef_stereo = 0 132 is_meso = False 133 stereo = '' 134 stereo_centers = [] 135 undef_stereo_centers = [] 136 # match patterns with defined and undefined stereo 137 if stereo_match: 138 stereo = stereo_match.group(1) 139 # match patterns with only undefined stereo or for the MESO case 140 elif stereo_all_match: 141 stereo = stereo_all_match.group(1) 142 is_meso = len(defined_stereo_re.findall(stereo)) > 1 143 # Number of ALL stereo centres 144 stereo_centers = all_stereo_re.findall(stereo) 145 num_stereo = len(stereo_centers) 146 undef_stereo_centers = undef_stereo_re.findall(stereo) 147 num_undef_stereo = len(undef_stereo_centers) 148 # Meso centres -- VT -- 2011.12.08 149 inchi_layer = self.parsed_inchi[con_layer][fixed_layer][iso_layer] 150 is_meso = is_meso or (num_undef_stereo > 1 and _is_achiral_by_symmetry(inchi_layer)) 151 sp3_stereo[fixed_layer][iso_layer] = (num_stereo, num_undef_stereo, is_meso, stereo) 152 return sp3_stereo
153
154 - def get_mobile_h(self):
155 ''' retrieve mobile H (tautomer) information 156 return a 2-item tuple containing 157 1) Number of mobile hydrogen groups detected. If 0, next item = '' 158 2) List of groups 159 ''' 160 mobile_h = {} 161 for con_layer in self.parsed_inchi: 162 for fixed_layer in self.parsed_inchi[con_layer]: 163 mobile_h[fixed_layer] = {} 164 for iso_layer in self.parsed_inchi[con_layer][fixed_layer]: 165 num_groups = 0 166 mobile_h_groups = '' 167 h_layer_match = h_layer_re.match(self.parsed_inchi[con_layer][fixed_layer][iso_layer]) 168 if h_layer_match: 169 mobile_h_matches = mobile_h_group_re.findall(h_layer_match.group(1)) 170 num_groups = len(mobile_h_matches) 171 mobile_h_groups = ','.join(mobile_h_matches) 172 mobile_h[fixed_layer][iso_layer] = (num_groups, mobile_h_groups) 173 return mobile_h
174