1
2
3
4
5
6
7
8
9
10
11 """ class definitions for similarity screening
12
13 See _SimilarityScreener_ for overview of required API
14
15 """
16 from rdkit import DataStructs
17 from rdkit import six
18 from rdkit.DataStructs import TopNContainer
19
20
22 """ base class
23
24 important attributes:
25 probe: the probe fingerprint against which we screen.
26
27 metric: a function that takes two arguments and returns a similarity
28 measure between them
29
30 dataSource: the source pool from which to draw, needs to support
31 a next() method
32
33 fingerprinter: a function that takes a molecule and returns a
34 fingerprint of the appropriate format
35
36
37 **Notes**
38 subclasses must support either an iterator interface
39 or __len__ and __getitem__
40 """
41
42 - def __init__(self, probe=None, metric=None, dataSource=None, fingerprinter=None):
43 self.metric = metric
44 self.dataSource = dataSource
45 self.fingerprinter = fingerprinter
46 self.probe = probe
47
49 """ used to reset screeners that behave as iterators """
50 pass
51
52
54 """ sets our probe fingerprint """
55 self.probe = probeFingerprint
56
58 """ returns a fingerprint for a single probe object
59
60 This is potentially useful in initializing our internal
61 probe object.
62
63 """
64 return self.fingerprinter(probe)
65
66
68 """ Used to return all compounds that have a similarity
69 to the probe beyond a threshold value
70
71 **Notes**:
72
73 - This is as lazy as possible, so the data source isn't
74 queried until the client asks for a hit.
75
76 - In addition to being lazy, this class is as thin as possible.
77 (Who'd have thought it was possible!)
78 Hits are *not* stored locally, so if a client resets
79 the iteration and starts over, the same amount of work must
80 be done to retrieve the hits.
81
82 - The thinness and laziness forces us to support only forward
83 iteration (not random access)
84
85 """
86
87 - def __init__(self, threshold, **kwargs):
91
92
94 """ *Internal use only* """
95 done = 0
96 res = None
97 sim = 0
98 while not done:
99
100
101 obj = six.next(self.dataIter)
102 fp = self.fingerprinter(obj)
103 sim = DataStructs.FingerprintSimilarity(fp, self.probe, self.metric)
104 if sim >= self.threshold:
105 res = obj
106 done = 1
107 return sim, res
108
110 """ used to reset our internal state so that iteration
111 starts again from the beginning
112 """
113 self.dataSource.reset()
114 self.dataIter = iter(self.dataSource)
115
117 """ returns an iterator for this screener
118 """
119 self.Reset()
120 return self
121
123 """ required part of iterator interface """
124 return self._nextMatch()
125
126 __next__ = next
127
128
130 """ A screener that only returns the top N hits found
131
132 **Notes**
133
134 - supports forward iteration and getitem
135
136 """
137
143
146
152
154 if self._pos >= self.numToGet:
155 raise StopIteration
156 else:
157 res = self.topN[self._pos]
158 self._pos += 1
159 return res
160
161 __next__ = next
162
169
171 if self.topN is None:
172 self._initTopN()
173 return self.numToGet
174
179