Package rdkit :: Package DataStructs :: Module VectCollection
[hide private]
[frames] | no frames]

Source Code for Module rdkit.DataStructs.VectCollection

  1  # $Id$ 
  2  # 
  3  #  Copyright (C) 2005-2006 greg landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved @@ 
  6  #  This file is part of the RDKit. 
  7  #  The contents are covered by the terms of the BSD license 
  8  #  which is included in the file license.txt, found at the root 
  9  #  of the RDKit source tree. 
 10  # 
 11  from __future__ import print_function 
 12  import copy 
 13  import struct 
 14  from rdkit.six import iterkeys 
 15  from rdkit import six 
 16  from rdkit import DataStructs 
 17   
 18   
19 -class VectCollection(object):
20 """ 21 22 >>> vc = VectCollection() 23 >>> bv1 = DataStructs.ExplicitBitVect(10) 24 >>> bv1.SetBitsFromList((1,3,5)) 25 >>> vc.AddVect(1,bv1) 26 >>> bv1 = DataStructs.ExplicitBitVect(10) 27 >>> bv1.SetBitsFromList((6,8)) 28 >>> vc.AddVect(2,bv1) 29 >>> len(vc) 30 10 31 >>> vc.GetNumBits() 32 10 33 >>> vc[0] 34 0 35 >>> vc[1] 36 1 37 >>> vc[9] 38 0 39 >>> vc[6] 40 1 41 >>> vc.GetBit(6) 42 1 43 >>> list(vc.GetOnBits()) 44 [1, 3, 5, 6, 8] 45 46 keys must be unique, so adding a duplicate replaces the 47 previous values: 48 >>> bv1 = DataStructs.ExplicitBitVect(10) 49 >>> bv1.SetBitsFromList((7,9)) 50 >>> vc.AddVect(1,bv1) 51 >>> len(vc) 52 10 53 >>> vc[1] 54 0 55 >>> vc[9] 56 1 57 >>> vc[6] 58 1 59 60 we can also query the children: 61 >>> vc.NumChildren() 62 2 63 >>> cs = vc.GetChildren() 64 >>> id,fp = cs[0] 65 >>> id 66 1 67 >>> list(fp.GetOnBits()) 68 [7, 9] 69 >>> id,fp = cs[1] 70 >>> id 71 2 72 >>> list(fp.GetOnBits()) 73 [6, 8] 74 75 attach/detach operations: 76 >>> bv1 = DataStructs.ExplicitBitVect(10) 77 >>> bv1.SetBitsFromList((5,6)) 78 >>> vc.AddVect(3,bv1) 79 >>> vc.NumChildren() 80 3 81 >>> list(vc.GetOnBits()) 82 [5, 6, 7, 8, 9] 83 >>> vc.DetachVectsNotMatchingBit(6) 84 >>> vc.NumChildren() 85 2 86 >>> list(vc.GetOnBits()) 87 [5, 6, 8] 88 89 90 >>> bv1 = DataStructs.ExplicitBitVect(10) 91 >>> bv1.SetBitsFromList((7,9)) 92 >>> vc.AddVect(1,bv1) 93 >>> vc.NumChildren() 94 3 95 >>> list(vc.GetOnBits()) 96 [5, 6, 7, 8, 9] 97 >>> vc.DetachVectsMatchingBit(6) 98 >>> vc.NumChildren() 99 1 100 >>> list(vc.GetOnBits()) 101 [7, 9] 102 103 104 to copy VectCollections, use the copy module: 105 >>> bv1 = DataStructs.ExplicitBitVect(10) 106 >>> bv1.SetBitsFromList((5,6)) 107 >>> vc.AddVect(3,bv1) 108 >>> list(vc.GetOnBits()) 109 [5, 6, 7, 9] 110 >>> vc2 = copy.copy(vc) 111 >>> vc.DetachVectsNotMatchingBit(6) 112 >>> list(vc.GetOnBits()) 113 [5, 6] 114 >>> list(vc2.GetOnBits()) 115 [5, 6, 7, 9] 116 117 The Uniquify() method can be used to remove duplicate vectors: 118 >>> vc = VectCollection() 119 >>> bv1 = DataStructs.ExplicitBitVect(10) 120 >>> bv1.SetBitsFromList((7,9)) 121 >>> vc.AddVect(1,bv1) 122 >>> vc.AddVect(2,bv1) 123 >>> bv1 = DataStructs.ExplicitBitVect(10) 124 >>> bv1.SetBitsFromList((2,3,5)) 125 >>> vc.AddVect(3,bv1) 126 >>> vc.NumChildren() 127 3 128 >>> vc.Uniquify() 129 >>> vc.NumChildren() 130 2 131 132 133 """ 134
135 - def __init__(self):
136 self.__vects = {} 137 self.__orVect = None 138 self.__numBits = -1 139 self.__needReset = True
140
141 - def GetOrVect(self):
142 if self.__needReset: 143 self.Reset() 144 return self.__orVect
145 146 orVect = property(GetOrVect) 147
148 - def AddVect(self, idx, vect):
149 self.__vects[idx] = vect 150 self.__needReset = True
151
152 - def Reset(self):
153 if not self.__needReset: 154 return 155 self.__orVect = None 156 if not self.__vects: 157 return 158 ks = list(iterkeys(self.__vects)) 159 self.__orVect = copy.copy(self.__vects[ks[0]]) 160 self.__numBits = self.__orVect.GetNumBits() 161 for i in range(1, len(ks)): 162 self.__orVect |= self.__vects[ks[i]] 163 self.__needReset = False
164
165 - def NumChildren(self):
166 return len(self.__vects.keys())
167
168 - def GetChildren(self):
169 return tuple(self.__vects.items())
170
171 - def __getitem__(self, idx):
172 if self.__needReset: 173 self.Reset() 174 return self.__orVect.GetBit(idx)
175 176 GetBit = __getitem__ 177
178 - def __len__(self):
179 if self.__needReset: 180 self.Reset() 181 return self.__numBits
182 183 GetNumBits = __len__ 184
185 - def GetOnBits(self):
186 if self.__needReset: 187 self.Reset() 188 return self.__orVect.GetOnBits()
189
190 - def DetachVectsNotMatchingBit(self, bit):
191 items = list(self.__vects.items()) 192 for k, v in items: 193 if not v.GetBit(bit): 194 del (self.__vects[k]) 195 self.__needReset = True
196
197 - def DetachVectsMatchingBit(self, bit):
198 items = list(self.__vects.items()) 199 for k, v in items: 200 if v.GetBit(bit): 201 del (self.__vects[k]) 202 self.__needReset = True
203
204 - def Uniquify(self, verbose=False):
205 obls = {} 206 for k, v in self.__vects.items(): 207 obls[k] = list(v.GetOnBits()) 208 209 keys = list(self.__vects.keys()) 210 nKeys = len(keys) 211 keep = list(self.__vects.keys()) 212 for i in range(nKeys): 213 k1 = keys[i] 214 if k1 in keep: 215 obl1 = obls[k1] 216 idx = keys.index(k1) 217 for j in range(idx + 1, nKeys): 218 k2 = keys[j] 219 if k2 in keep: 220 obl2 = obls[k2] 221 if obl1 == obl2: 222 keep.remove(k2) 223 224 self.__needsReset = True 225 tmp = {} 226 for k in keep: 227 tmp[k] = self.__vects[k] 228 if verbose: 229 print('uniquify:', len(self.__vects), '->', len(tmp)) 230 self.__vects = tmp
231 232 # 233 # set up our support for pickling: 234 #
235 - def __getstate__(self):
236 pkl = struct.pack('<I', len(self.__vects)) 237 for k, v in self.__vects.items(): 238 pkl += struct.pack('<I', k) 239 p = v.ToBinary() 240 l = len(p) 241 pkl += struct.pack('<I', l) 242 pkl += struct.pack('%ds' % (l), p) 243 return pkl
244
245 - def __setstate__(self, pkl):
246 if six.PY3 and isinstance(pkl, str): 247 pkl = bytes(pkl, encoding='Latin1') 248 249 self.__vects = {} 250 self.__orVect = None 251 self.__numBits = -1 252 self.__needReset = True 253 szI = struct.calcsize('I') 254 offset = 0 255 nToRead = struct.unpack('<I', pkl[offset:offset + szI])[0] 256 offset += szI 257 for _ in range(nToRead): 258 k = struct.unpack('<I', pkl[offset:offset + szI])[0] 259 offset += szI 260 l = struct.unpack('<I', pkl[offset:offset + szI])[0] 261 offset += szI 262 sz = struct.calcsize('%ds' % l) 263 bv = DataStructs.ExplicitBitVect(struct.unpack('%ds' % l, pkl[offset:offset + sz])[0]) 264 offset += sz 265 self.AddVect(k, bv)
266 267 268 # ------------------------------------ 269 # 270 # doctest boilerplate 271 #
272 -def _runDoctests(verbose=None): # pragma: nocover
273 import sys 274 import doctest 275 failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) 276 sys.exit(failed) 277 278 279 if __name__ == '__main__': # pragma: nocover 280 _runDoctests() 281