1
2
3
4
5
6
7
8
9
10 """ Functionality for SATIS typing atoms
11
12 """
13 from __future__ import print_function
14
15 import itertools
16
17 from rdkit import Chem
18
19
20 aldehydePatt = Chem.MolFromSmarts('[CD2]=[OD1]')
21 ketonePatt = Chem.MolFromSmarts('[CD3]=[OD1]')
22 amidePatt = Chem.MolFromSmarts('[CD3](=[OD1])-[#7]')
23 esterPatt = Chem.MolFromSmarts('C(=[OD1])-O-[#6]')
24 carboxylatePatt = Chem.MolFromSmarts('C(=[OD1])-[OX1]')
25 carboxylPatt = Chem.MolFromSmarts('C(=[OD1])-[OX2]')
26
27 specialCases = ((carboxylatePatt, 97), (esterPatt, 96), (carboxylPatt, 98), (amidePatt, 95),
28 (ketonePatt, 94), (aldehydePatt, 93))
29
30
32 """ returns SATIS codes for all atoms in a molecule
33
34 The SATIS definition used is from:
35 J. Chem. Inf. Comput. Sci. _39_ 751-757 (1999)
36
37 each SATIS code is a string consisting of _neighborsToInclude_ + 1
38 2 digit numbers
39
40 **Arguments**
41
42 - mol: a molecule
43
44 - neighborsToInclude (optional): the number of neighbors to include
45 in the SATIS codes
46
47 **Returns**
48
49 a list of strings nAtoms long
50
51 """
52 nAtoms = mol.GetNumAtoms()
53 atoms = mol.GetAtoms()
54 atomicNums = [atom.GetAtomicNum() for atom in atoms]
55
56
57 specialCaseMatches = []
58 for patt, specialCaseIdx in specialCases:
59 matches = mol.GetSubstructMatches(patt)
60 if matches:
61 matches = set(itertools.chain(*matches))
62 specialCaseMatches.append((specialCaseIdx, matches))
63
64 codes = [None] * nAtoms
65 for i, atom in enumerate(atoms):
66 code = [99] * (neighborsToInclude + 1)
67
68
69 code[0] = min(atom.GetAtomicNum(), 99)
70
71
72 otherIndices = [x.GetIdx() for x in atom.GetNeighbors()]
73 otherNums = sorted([atomicNums[x] for x in otherIndices] + [1] * atom.GetTotalNumHs())
74 if len(otherNums) > neighborsToInclude:
75
76 otherNums = otherNums[-neighborsToInclude:]
77 for j, otherNum in enumerate(otherNums, 1):
78 code[j] = min(otherNum, 99)
79
80
81 if len(otherNums) < neighborsToInclude and code[0] in [6, 8]:
82 atomIdx = atom.GetIdx()
83 for specialCaseIdx, matches in specialCaseMatches:
84 if atomIdx in matches:
85 code[-1] = specialCaseIdx
86 break
87
88 codes[i] = ''.join('%02d' % (x) for x in code)
89 return codes
90