Package rdkit :: Package Chem :: Module SATIS
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.SATIS

 1  # 
 2  # Copyright (C) 2001-2006 greg Landrum and Rational Discovery LLC 
 3  # 
 4  #   @@ All Rights Reserved @@ 
 5  #  This file is part of the RDKit. 
 6  #  The contents are covered by the terms of the BSD license 
 7  #  which is included in the file license.txt, found at the root 
 8  #  of the RDKit source tree. 
 9  # 
10  """  Functionality for SATIS typing atoms 
11   
12  """ 
13  from __future__ import print_function 
14   
15  import itertools 
16   
17  from rdkit import Chem 
18   
19  #  These are SMARTS patterns for the special cases used in SATIS typing. 
20  aldehydePatt = Chem.MolFromSmarts('[CD2]=[OD1]') 
21  ketonePatt = Chem.MolFromSmarts('[CD3]=[OD1]') 
22  amidePatt = Chem.MolFromSmarts('[CD3](=[OD1])-[#7]') 
23  esterPatt = Chem.MolFromSmarts('C(=[OD1])-O-[#6]') 
24  carboxylatePatt = Chem.MolFromSmarts('C(=[OD1])-[OX1]') 
25  carboxylPatt = Chem.MolFromSmarts('C(=[OD1])-[OX2]') 
26   
27  specialCases = ((carboxylatePatt, 97), (esterPatt, 96), (carboxylPatt, 98), (amidePatt, 95), 
28                  (ketonePatt, 94), (aldehydePatt, 93)) 
29   
30   
31 -def SATISTypes(mol, neighborsToInclude=4):
32 """ returns SATIS codes for all atoms in a molecule 33 34 The SATIS definition used is from: 35 J. Chem. Inf. Comput. Sci. _39_ 751-757 (1999) 36 37 each SATIS code is a string consisting of _neighborsToInclude_ + 1 38 2 digit numbers 39 40 **Arguments** 41 42 - mol: a molecule 43 44 - neighborsToInclude (optional): the number of neighbors to include 45 in the SATIS codes 46 47 **Returns** 48 49 a list of strings nAtoms long 50 51 """ 52 nAtoms = mol.GetNumAtoms() 53 atoms = mol.GetAtoms() 54 atomicNums = [atom.GetAtomicNum() for atom in atoms] 55 56 # Collect all atom indices that match one of the special patterns 57 specialCaseMatches = [] 58 for patt, specialCaseIdx in specialCases: 59 matches = mol.GetSubstructMatches(patt) 60 if matches: 61 matches = set(itertools.chain(*matches)) 62 specialCaseMatches.append((specialCaseIdx, matches)) 63 64 codes = [None] * nAtoms 65 for i, atom in enumerate(atoms): 66 code = [99] * (neighborsToInclude + 1) 67 68 # Atom 69 code[0] = min(atom.GetAtomicNum(), 99) 70 71 # Get atomic numbers of connected neighbours and use for code 72 otherIndices = [x.GetIdx() for x in atom.GetNeighbors()] 73 otherNums = sorted([atomicNums[x] for x in otherIndices] + [1] * atom.GetTotalNumHs()) 74 if len(otherNums) > neighborsToInclude: 75 # Get the last neighborsToInclude elements from otherNums 76 otherNums = otherNums[-neighborsToInclude:] 77 for j, otherNum in enumerate(otherNums, 1): 78 code[j] = min(otherNum, 99) 79 80 # Handle special cases where we have less than neighborsToInclude neighbors 81 if len(otherNums) < neighborsToInclude and code[0] in [6, 8]: 82 atomIdx = atom.GetIdx() 83 for specialCaseIdx, matches in specialCaseMatches: 84 if atomIdx in matches: 85 code[-1] = specialCaseIdx 86 break 87 88 codes[i] = ''.join('%02d' % (x) for x in code) 89 return codes
90