Package rdkit :: Package VLib :: Package NodeLib :: Module DbPickleSupplier
[hide private]
[frames] | no frames]

Source Code for Module rdkit.VLib.NodeLib.DbPickleSupplier

  1  #  $Id$ 
  2  # 
  3  #  Copyright (C) 2004 Rational Discovery LLC 
  4  #     All Rights Reserved 
  5  # 
  6  from __future__ import print_function 
  7  from rdkit import RDConfig 
  8  import sys, os.path 
  9  from rdkit.VLib.Supply import SupplyNode 
 10  from rdkit.six.moves import cPickle 
 11   
 12  if RDConfig.usePgSQL: 
 13    from pyPgSQL import PgSQL as sql 
 14   
15 - class _lazyDataSeq:
16 """ 17 These classes are used to speed up (a lot) the process of 18 pulling pickled objects from PostgreSQL databases. Instead of 19 having to use all of PgSQL's typechecking, we'll make a lot of 20 assumptions about what's coming out of the Db and its layout. 21 The results can lead to drastic improvements in perfomance. 22 23 """ 24
25 - def __init__(self, cursor, cmd, pickleCol=1, depickle=1, klass=None):
26 self.cursor = cursor 27 self.cmd = cmd 28 self._first = 0 29 self._pickleCol = pickleCol 30 self._depickle = depickle 31 self._klass = klass
32
33 - def _validate(self):
34 curs = self.cursor 35 if not curs or \ 36 curs.closed or \ 37 curs.conn is None or \ 38 (curs.res.resultType != sql.RESULT_DQL and curs.closed is None): 39 raise ValueError('bad cursor') 40 if curs.res.nfields and curs.res.nfields < 2: 41 raise ValueError('invalid number of results returned (%d), must be at least 2' % 42 curs.res.nfields) 43 desc1 = curs.description[self._pickleCol] 44 ftv = desc1[self._pickleCol].value 45 if ftv != sql.BINARY: 46 raise TypeError('pickle column (%d) of bad type' % self._pickleCol)
47
48 - def __iter__(self):
49 try: 50 self.cursor.execute(self.cmd) 51 except Exception: 52 import traceback 53 traceback.print_exc() 54 print('COMMAND:', self.cmd) 55 raise 56 self._first = 1 57 self._validate() 58 return self
59
60 - def next(self):
61 curs = self.cursor 62 if not curs or \ 63 curs.closed or \ 64 curs.conn is None or \ 65 curs.res is None or \ 66 (curs.res.resultType != sql.RESULT_DQL and curs.closed is None): 67 raise StopIteration 68 if not self._first: 69 res = curs.conn.conn.query('fetch 1 from "%s"' % self.cursor.name) 70 71 if res.ntuples == 0: 72 raise StopIteration 73 else: 74 if res.nfields < 2: 75 raise ValueError('bad result: %s' % str(res)) 76 t = [res.getvalue(0, x) for x in range(res.nfields)] 77 val = t[self._pickleCol] 78 else: 79 t = curs.fetchone() 80 val = str(t[self._pickleCol]) 81 self._first = 0 82 if self._depickle: 83 if not self._klass: 84 fp = cPickle.loads(val) 85 else: 86 fp = self._klass(val) 87 fields = list(t) 88 del fields[self._pickleCol] 89 fp._fieldsFromDb = fields 90 else: 91 fp = list(t) 92 return fp
93
94 - class _dataSeq(_lazyDataSeq):
95
96 - def __init__(self, cursor, cmd, pickleCol=1, depickle=1):
97 self.cursor = cursor 98 self.cmd = cmd 99 self.res = None 100 self.rowCount = -1 101 self.idx = 0 102 self._pickleCol = pickleCol 103 self._depickle = depickle
104
105 - def __iter__(self):
106 self.cursor.execute(self.cmd) 107 self._first = self.cursor.fetchone() 108 self._validate() 109 self.res = self.cursor.conn.conn.query('fetch all from "%s"' % self.cursor.name) 110 self.rowCount = self.res.ntuples + 1 111 self.idx = 0 112 if self.res.nfields < 2: 113 raise ValueError('bad query result' % str(res)) 114 115 return self
116
117 - def next(self):
118 if self.idx >= self.rowCount: 119 raise StopIteration 120 121 fp = self[self.idx] 122 self.idx += 1 123 124 return fp
125
126 - def __len__(self):
127 return self.rowCount
128
129 - def __getitem__(self, idx):
130 if self.res is None: 131 self.cursor.execute(self.cmd) 132 self._first = self.cursor.fetchone() 133 self._validate() 134 self.res = self.cursor.conn.conn.query('fetch all from "%s"' % self.cursor.name) 135 self.rowCount = self.res.ntuples + 1 136 self.idx = 0 137 if self.res.nfields < 2: 138 raise ValueError('bad query result' % str(res)) 139 140 if idx < 0: 141 idx = self.rowCount + idx 142 if idx < 0 or (idx >= 0 and idx >= self.rowCount): 143 raise IndexError 144 if idx == 0: 145 val = str(self._first[self._pickleCol]) 146 t = list(self._first) 147 else: 148 val = self.res.getvalue(self.idx - 1, self._pickleCol) 149 t = [self.res.getvalue(self.idx - 1, x) for x in range(self.res.nfields)] 150 if self._depickle: 151 try: 152 fp = cPickle.loads(val) 153 except Exception: 154 import logging 155 del t[self._pickleCol] 156 logging.exception('Depickling failure in row: %s' % str(t)) 157 raise 158 del t[self._pickleCol] 159 fp._fieldsFromDb = t 160 else: 161 fp = t 162 return fp
163 else: 164 _dataSeq = None 165 166
167 -class DbPickleSupplyNode(SupplyNode):
168 """ Supplies pickled objects from a db result set: 169 170 Sample Usage: 171 >>> from rdkit.Dbase.DbConnection import DbConnect 172 173 """ 174
175 - def __init__(self, cursor, cmd, binaryCol, **kwargs):
176 SupplyNode.__init__(self, **kwargs) 177 self._dbResults = dbResults 178 self._supplier = DbMolSupplier.RandomAccessDbMolSupplier(self._dbResults, **kwargs)
179
180 - def reset(self):
181 SupplyNode.reset(self) 182 self._supplier.Reset()
183
184 - def next(self):
185 """ 186 187 """ 188 return self._supplier.next()
189 190
191 -def GetNode(dbName, tableName):
192 from rdkit.Dbase.DbConnection import DbConnect 193 conn = DbConnect(dbName, tableName) 194 return DbMolSupplyNode(conn.GetData())
195 196 197 #------------------------------------ 198 # 199 # doctest boilerplate 200 #
201 -def _test():
202 import doctest, sys 203 return doctest.testmod(sys.modules["__main__"])
204 205 206 if __name__ == '__main__': 207 import sys 208 failed, tried = _test() 209 sys.exit(failed) 210