1
2
3
4
5
6
7
8
9
10 """
11 Supplies a class for working with molecules from databases
12 """
13 import sys
14
15 from rdkit import Chem
16 from rdkit.Chem.Suppliers.MolSupplier import MolSupplier
17
18
21
22
24 """
25 new molecules come back with all additional fields from the
26 database set in a "_fieldsFromDb" data member
27
28 """
29
30 - def __init__(self, dbResults, molColumnFormats={'SMILES': 'SMI',
31 'SMI': 'SMI',
32 'MOLPKL': 'PKL'}, nameCol='', transformFunc=None,
33 **kwargs):
34 """
35
36 DbResults should be a subclass of Dbase.DbResultSet.DbResultBase
37
38 """
39 self._data = dbResults
40 self._colNames = [x.upper() for x in self._data.GetColumnNames()]
41 nameCol = nameCol.upper()
42 self.molCol = -1
43 self.transformFunc = transformFunc
44 try:
45 self.nameCol = self._colNames.index(nameCol)
46 except ValueError:
47 self.nameCol = -1
48 for name in molColumnFormats:
49 name = name.upper()
50 try:
51 idx = self._colNames.index(name)
52 except ValueError:
53 pass
54 else:
55 self.molCol = idx
56 self.molFmt = molColumnFormats[name]
57 break
58 if self.molCol < 0:
59 raise ValueError('DbResultSet has no recognizable molecule column')
60 del self._colNames[self.molCol]
61 self._colNames = tuple(self._colNames)
62 self._numProcessed = 0
63
66
68 data = list(data)
69 molD = data[self.molCol]
70 del data[self.molCol]
71 self._numProcessed += 1
72 try:
73 if self.molFmt == 'SMI':
74 newM = Chem.MolFromSmiles(str(molD))
75 if not newM:
76 warning('Problems processing mol %d, smiles: %s\n' % (self._numProcessed, molD))
77 elif self.molFmt == 'PKL':
78 newM = Chem.Mol(str(molD))
79 except Exception:
80 import traceback
81 traceback.print_exc()
82 newM = None
83 else:
84 if newM and self.transformFunc:
85 try:
86 newM = self.transformFunc(newM, data)
87 except Exception:
88 import traceback
89 traceback.print_exc()
90 newM = None
91 if newM:
92 newM._fieldsFromDb = data
93 nFields = len(data)
94 for i in range(nFields):
95 newM.SetProp(self._colNames[i], str(data[i]))
96 if self.nameCol >= 0:
97 newM.SetProp('_Name', str(data[self.nameCol]))
98 return newM
99
100
102 """ DbMol supplier supporting only forward iteration
103
104
105 new molecules come back with all additional fields from the
106 database set in a "_fieldsFromDb" data member
107
108 """
109
110 - def __init__(self, dbResults, **kwargs):
111 """
112
113 DbResults should be an iterator for Dbase.DbResultSet.DbResultBase
114
115 """
116 DbMolSupplier.__init__(self, dbResults, **kwargs)
117 self.Reset()
118
120 self._dataIter = iter(self._data)
121
123 """
124
125 NOTE: this has side effects
126
127 """
128 try:
129 d = self._dataIter.next()
130 except StopIteration:
131 d = None
132 if d is not None:
133 newM = self._BuildMol(d)
134 else:
135 newM = None
136
137 return newM
138
139
141
142 - def __init__(self, dbResults, **kwargs):
143 """
144
145 DbResults should be a Dbase.DbResultSet.RandomAccessDbResultSet
146
147 """
148 DbMolSupplier.__init__(self, dbResults, **kwargs)
149 self._pos = -1
150
152 return len(self._data)
153
155 newD = self._data[idx]
156 return self._BuildMol(newD)
157
160
162 self._pos += 1
163 res = None
164 if self._pos < len(self):
165 res = self[self._pos]
166 return res
167