1
2
3
4
5
6
7
8
9
10 """ Contains an implementation of Physicochemical property fingerprints, as
11 described in:
12 Kearsley, S. K. et al.
13 "Chemical Similarity Using Physiochemical Property Descriptors."
14 J. Chem.Inf. Model. 36, 118-127 (1996)
15
16 The fingerprints can be accessed through the following functions:
17 - GetBPFingerprint
18 - GetBTFingerprint
19
20 """
21 import os.path
22 import re
23
24 from rdkit import Chem
25 from rdkit import RDConfig
26 from rdkit.Chem import rdMolDescriptors
27 from rdkit.Chem.rdMolDescriptors import GetAtomPairFingerprint, GetTopologicalTorsionFingerprint
28
29 numPathBits = rdMolDescriptors.AtomPairsParameters.numPathBits
30 _maxPathLen = (1 << numPathBits) - 1
31 numFpBits = numPathBits + 2 * rdMolDescriptors.AtomPairsParameters.codeSize
32 fpLen = 1 << numFpBits
33
34
35 -def _readPattyDefs(fname=os.path.join(RDConfig.RDDataDir, 'SmartsLib', 'patty_rules.txt')):
36 with open(fname, 'r') as inf:
37 lines = [x.strip().split('# ')[0].strip() for x in inf]
38 splitl = [re.split('[ ]+', x) for x in lines if x != '']
39 matchers = []
40 for tpl in splitl:
41 if len(tpl) > 1:
42 mol = Chem.MolFromSmarts(tpl[0])
43 if mol is None:
44 continue
45 nm = tpl[1]
46 matchers.append((mol, nm))
47 return matchers
48
49
50 _pattyDefs = None
51
52
54 """
55
56 >>> from rdkit import Chem
57 >>> AssignPattyTypes(Chem.MolFromSmiles('OCC(=O)O'))
58 ['POL', 'HYD', 'OTH', 'ANI', 'ANI']
59
60 """
61 global _pattyDefs
62 if defns is None:
63 if _pattyDefs is None:
64 _pattyDefs = _readPattyDefs()
65 defns = _pattyDefs
66 res = [''] * mol.GetNumAtoms()
67 for matcher, nm in defns:
68 matches = mol.GetSubstructMatches(matcher, uniquify=False)
69 for match in matches:
70 res[match[0]] = nm
71 return res
72
73
74 typMap = dict(CAT=1, ANI=2, POL=3, DON=4, ACC=5, HYD=6, OTH=7)
75
76
78 """
79 >>> from rdkit import Chem
80 >>> fp = GetBPFingerprint(Chem.MolFromSmiles('OCC(=O)O'))
81 >>> fp.GetTotalVal()
82 10
83 >>> nze=fp.GetNonzeroElements()
84 >>> sorted([(k,v) for k,v in nze.items()])
85 [(32834, 1), (49219, 2), (98370, 2), (98401, 1), (114753, 2), (114786, 1), (114881, 1)]
86
87 """
88 typs = [typMap[x] for x in AssignPattyTypes(mol)]
89 fp = fpfn(mol, atomInvariants=typs)
90 return fp
91
92
94 """
95 >>> from rdkit import Chem
96 >>> mol = Chem.MolFromSmiles('OCC(N)O')
97 >>> AssignPattyTypes(mol)
98 ['POL', 'HYD', 'HYD', 'CAT', 'POL']
99 >>> fp = GetBTFingerprint(mol)
100 >>> fp.GetTotalVal()
101 2
102 >>> nze=fp.GetNonzeroElements()
103 >>> sorted([(k,v) for k,v in nze.items()])
104 [(538446850..., 1), (538446852..., 1)]
105
106 """
107 return GetBPFingerprint(mol, fpfn=fpfn)
108
109
110
111
112
113
115 import sys
116 import doctest
117 failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose)
118 sys.exit(failed)
119
120
121 if __name__ == '__main__':
122 _runDoctests()
123