Package rdkit :: Package Chem :: Package Fingerprints :: Module DbFpSupplier
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.Fingerprints.DbFpSupplier

  1  # 
  2  # Copyright (C) 2003-2006 greg Landrum and Rational Discovery LLC 
  3  # 
  4  #   @@ All Rights Reserved @@ 
  5  #  This file is part of the RDKit. 
  6  #  The contents are covered by the terms of the BSD license 
  7  #  which is included in the file license.txt, found at the root 
  8  #  of the RDKit source tree. 
  9  # 
 10  """ Supplies a class for working with fingerprints from databases 
 11  #DOC 
 12   
 13  """ 
 14  from rdkit import DataStructs 
 15  from rdkit import six 
 16  from rdkit.VLib.Node import VLibNode 
 17  from rdkit.six.moves import cPickle 
 18   
 19   
20 -class DbFpSupplier(VLibNode):
21 """ 22 new fps come back with all additional fields from the 23 database set in a "_fieldsFromDb" data member 24 25 """ 26
27 - def __init__(self, dbResults, fpColName='AutoFragmentFp', usePickles=True):
28 """ 29 30 DbResults should be a subclass of Dbase.DbResultSet.DbResultBase 31 32 """ 33 VLibNode.__init__(self) 34 self._usePickles = usePickles 35 self._data = dbResults 36 self._fpColName = fpColName.upper() 37 self._colNames = [x.upper() for x in self._data.GetColumnNames()] 38 if self._fpColName not in self._colNames: 39 raise ValueError('fp column name "%s" not found in result set: %s' % 40 (self._fpColName, str(self._colNames))) 41 self.fpCol = self._colNames.index(self._fpColName) 42 del self._colNames[self.fpCol] 43 self._colNames = tuple(self._colNames) 44 self._numProcessed = 0
45
46 - def GetColumnNames(self):
47 return self._colNames
48
49 - def _BuildFp(self, data):
50 data = list(data) 51 if six.PY3: 52 pkl = bytes(data[self.fpCol], encoding='Latin1') 53 else: 54 pkl = str(data[self.fpCol]) 55 del data[self.fpCol] 56 self._numProcessed += 1 57 try: 58 if self._usePickles: 59 newFp = cPickle.loads(pkl, encoding='bytes') 60 else: 61 newFp = DataStructs.ExplicitBitVect(pkl) 62 except Exception: 63 import traceback 64 traceback.print_exc() 65 newFp = None 66 if newFp: 67 newFp._fieldsFromDb = data 68 return newFp
69
70 - def next(self):
71 itm = self.NextItem() 72 if itm is None: 73 raise StopIteration 74 return itm
75 76 __next__ = next # py3
77 78
79 -class ForwardDbFpSupplier(DbFpSupplier):
80 """ DbFp supplier supporting only forward iteration 81 82 >>> from rdkit import RDConfig 83 >>> from rdkit.Dbase.DbConnection import DbConnect 84 >>> fName = RDConfig.RDTestDatabase 85 >>> conn = DbConnect(fName,'simple_combined') 86 >>> suppl = ForwardDbFpSupplier(conn.GetData()) 87 88 we can loop over the supplied fingerprints: 89 >>> fps = [] 90 >>> for fp in suppl: 91 ... fps.append(fp) 92 >>> len(fps) 93 12 94 95 """ 96
97 - def __init__(self, *args, **kwargs):
98 DbFpSupplier.__init__(self, *args, **kwargs) 99 self.reset()
100
101 - def reset(self):
102 DbFpSupplier.reset(self) 103 self._dataIter = iter(self._data)
104
105 - def NextItem(self):
106 """ 107 108 NOTE: this has side effects 109 110 """ 111 try: 112 d = next(self._dataIter) 113 except StopIteration: 114 d = None 115 if d is not None: 116 newFp = self._BuildFp(d) 117 else: 118 newFp = None 119 return newFp
120 121
122 -class RandomAccessDbFpSupplier(DbFpSupplier):
123 """ DbFp supplier supporting random access: 124 >>> import os.path 125 >>> from rdkit import RDConfig 126 >>> from rdkit.Dbase.DbConnection import DbConnect 127 >>> fName = RDConfig.RDTestDatabase 128 >>> conn = DbConnect(fName,'simple_combined') 129 >>> suppl = RandomAccessDbFpSupplier(conn.GetData()) 130 >>> len(suppl) 131 12 132 133 we can pull individual fingerprints: 134 >>> fp = suppl[5] 135 >>> fp.GetNumBits() 136 128 137 >>> fp.GetNumOnBits() 138 54 139 140 a standard loop over the fingerprints: 141 >>> fps = [] 142 >>> for fp in suppl: 143 ... fps.append(fp) 144 >>> len(fps) 145 12 146 147 or we can use an indexed loop: 148 >>> fps = [None]*len(suppl) 149 >>> for i in range(len(suppl)): 150 ... fps[i] = suppl[i] 151 >>> len(fps) 152 12 153 154 """ 155
156 - def __init__(self, *args, **kwargs):
157 DbFpSupplier.__init__(self, *args, **kwargs) 158 self.reset()
159
160 - def __len__(self):
161 return len(self._data)
162
163 - def __getitem__(self, idx):
164 newD = self._data[idx] 165 return self._BuildFp(newD)
166
167 - def reset(self):
168 self._pos = -1
169
170 - def NextItem(self):
171 self._pos += 1 172 res = None 173 if self._pos < len(self): 174 res = self[self._pos] 175 return res
176 177 178 # ------------------------------------ 179 # 180 # doctest boilerplate 181 #
182 -def _runDoctests(verbose=None): # pragma: nocover
183 import sys 184 import doctest 185 failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) 186 sys.exit(failed) 187 188 189 if __name__ == '__main__': # pragma: nocover 190 _runDoctests() 191