Package rdkit :: Package Chem :: Package Fingerprints :: Module SimilarityScreener
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.Fingerprints.SimilarityScreener

  1  # $Id$ 
  2  # 
  3  # Copyright (C) 2003-2006 Greg Landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved @@ 
  6  #  This file is part of the RDKit. 
  7  #  The contents are covered by the terms of the BSD license 
  8  #  which is included in the file license.txt, found at the root 
  9  #  of the RDKit source tree. 
 10  # 
 11  """ class definitions for similarity screening 
 12   
 13  See _SimilarityScreener_ for overview of required API 
 14   
 15  """ 
 16  from rdkit import DataStructs 
 17  from rdkit import six 
 18  from rdkit.DataStructs import TopNContainer 
 19   
 20   
21 -class SimilarityScreener(object):
22 """ base class 23 24 important attributes: 25 probe: the probe fingerprint against which we screen. 26 27 metric: a function that takes two arguments and returns a similarity 28 measure between them 29 30 dataSource: the source pool from which to draw, needs to support 31 a next() method 32 33 fingerprinter: a function that takes a molecule and returns a 34 fingerprint of the appropriate format 35 36 37 **Notes** 38 subclasses must support either an iterator interface 39 or __len__ and __getitem__ 40 """ 41
42 - def __init__(self, probe=None, metric=None, dataSource=None, fingerprinter=None):
43 self.metric = metric 44 self.dataSource = dataSource 45 self.fingerprinter = fingerprinter 46 self.probe = probe
47
48 - def Reset(self):
49 """ used to reset screeners that behave as iterators """ 50 pass
51 52 # FIX: add setters/getters for attributes
53 - def SetProbe(self, probeFingerprint):
54 """ sets our probe fingerprint """ 55 self.probe = probeFingerprint
56
57 - def GetSingleFingerprint(self, probe):
58 """ returns a fingerprint for a single probe object 59 60 This is potentially useful in initializing our internal 61 probe object. 62 63 """ 64 return self.fingerprinter(probe)
65 66
67 -class ThresholdScreener(SimilarityScreener):
68 """ Used to return all compounds that have a similarity 69 to the probe beyond a threshold value 70 71 **Notes**: 72 73 - This is as lazy as possible, so the data source isn't 74 queried until the client asks for a hit. 75 76 - In addition to being lazy, this class is as thin as possible. 77 (Who'd have thought it was possible!) 78 Hits are *not* stored locally, so if a client resets 79 the iteration and starts over, the same amount of work must 80 be done to retrieve the hits. 81 82 - The thinness and laziness forces us to support only forward 83 iteration (not random access) 84 85 """ 86
87 - def __init__(self, threshold, **kwargs):
88 SimilarityScreener.__init__(self, **kwargs) 89 self.threshold = threshold 90 self.dataIter = iter(self.dataSource)
91 # FIX: add setters/getters for attributes 92
93 - def _nextMatch(self):
94 """ *Internal use only* """ 95 done = 0 96 res = None 97 sim = 0 98 while not done: 99 # this is going to crap out when the data source iterator finishes, 100 # that's how we stop when no match is found 101 obj = six.next(self.dataIter) 102 fp = self.fingerprinter(obj) 103 sim = DataStructs.FingerprintSimilarity(fp, self.probe, self.metric) 104 if sim >= self.threshold: 105 res = obj 106 done = 1 107 return sim, res
108
109 - def Reset(self):
110 """ used to reset our internal state so that iteration 111 starts again from the beginning 112 """ 113 self.dataSource.reset() 114 self.dataIter = iter(self.dataSource)
115
116 - def __iter__(self):
117 """ returns an iterator for this screener 118 """ 119 self.Reset() 120 return self
121
122 - def next(self):
123 """ required part of iterator interface """ 124 return self._nextMatch()
125 126 __next__ = next
127 128
129 -class TopNScreener(SimilarityScreener):
130 """ A screener that only returns the top N hits found 131 132 **Notes** 133 134 - supports forward iteration and getitem 135 136 """ 137
138 - def __init__(self, num, **kwargs):
139 SimilarityScreener.__init__(self, **kwargs) 140 self.numToGet = num 141 self.topN = None 142 self._pos = 0
143
144 - def Reset(self):
145 self._pos = 0
146
147 - def __iter__(self):
148 if self.topN is None: 149 self._initTopN() 150 self.Reset() 151 return self
152
153 - def next(self):
154 if self._pos >= self.numToGet: 155 raise StopIteration 156 else: 157 res = self.topN[self._pos] 158 self._pos += 1 159 return res
160 161 __next__ = next 162
163 - def _initTopN(self):
164 self.topN = TopNContainer.TopNContainer(self.numToGet) 165 for obj in self.dataSource: 166 fp = self.fingerprinter(obj) 167 sim = DataStructs.FingerprintSimilarity(fp, self.probe, self.metric) 168 self.topN.Insert(sim, obj)
169
170 - def __len__(self):
171 if self.topN is None: 172 self._initTopN() 173 return self.numToGet
174
175 - def __getitem__(self, idx):
176 if self.topN is None: 177 self._initTopN() 178 return self.topN[idx]
179