11 #ifndef RD_MAXMINPICKER_H 12 #define RD_MAXMINPICKER_H 21 #include <boost/random.hpp> 26 class distmatFunctor {
28 distmatFunctor(
const double *distMat) : dp_distMat(distMat){};
29 double operator()(
unsigned int i,
unsigned int j) {
34 const double *dp_distMat;
72 unsigned int pickSize)
const;
76 unsigned int pickSize,
82 unsigned int pickSize,
84 double &threshold)
const;
122 int seed = -1)
const {
125 if (poolSize < pickSize)
127 distmatFunctor functor(distMat);
128 return this->lazyPick(functor, poolSize, pickSize, firstPicks, seed);
133 unsigned int pickSize)
const {
135 return pick(distMat, poolSize, pickSize, iv);
147 template <
typename T>
149 unsigned int pickSize,
151 int seed,
double &threshold)
const {
154 if (poolSize < pickSize)
159 unsigned int memsize = (
unsigned int)(poolSize *
sizeof(
MaxMinPickInfo));
165 memset(pinfo, 0, memsize);
167 picks.reserve(pickSize);
168 unsigned int picked = 0;
172 if (firstPicks.empty()) {
175 typedef boost::uniform_int<> distrib_type;
176 typedef boost::variate_generator<rng_type &, distrib_type> source_type;
177 rng_type generator(42u);
178 distrib_type dist(0, poolSize - 1);
179 source_type randomSource(generator, dist);
180 if (seed > 0) generator.seed(static_cast<rng_type::result_type>(seed));
182 pick = randomSource();
184 picks.push_back(pick);
186 pinfo[pick].
picks = 1;
190 for (RDKit::INT_VECT::const_iterator pIdx = firstPicks.begin();
191 pIdx != firstPicks.end(); ++pIdx) {
192 pick =
static_cast<unsigned int>(*pIdx);
193 if (pick >= poolSize) {
197 picks.push_back(pick);
198 pinfo[pick].
picks = 1;
203 if (picked >= pickSize) {
209 unsigned int pool_list = 0;
210 unsigned int *prev = &pool_list;
212 for (
unsigned int i = 0; i < poolSize; i++)
213 if (pinfo[i].picks == 0) {
215 prev = &pinfo[i].
next;
219 unsigned int poolIdx;
220 unsigned int pickIdx;
227 pinfo[poolIdx].
dist_bound = func(poolIdx, pickIdx);
228 pinfo[poolIdx].
picks = 1;
229 prev = &pinfo[poolIdx].
next;
230 }
while (*prev != 0);
233 double maxOFmin = -1.0;
234 double tmpThreshold = -1.0;
235 while (picked < pickSize) {
236 unsigned int *pick_prev = 0;
242 if (minTOi > maxOFmin) {
243 unsigned int pi = pinfo[poolIdx].
picks;
244 while (pi < picked) {
245 unsigned int picki = picks[pi];
247 double dist = func(poolIdx, picki);
249 if (dist <= minTOi) {
251 if (minTOi <= maxOFmin)
break;
255 pinfo[poolIdx].
picks = pi;
256 if (minTOi > maxOFmin) {
262 prev = &pinfo[poolIdx].
next;
263 }
while (*prev != 0);
266 if (threshold >= 0.0 && maxOFmin < threshold)
break;
267 tmpThreshold = maxOFmin;
269 *pick_prev = pinfo[pick].
next;
270 picks.push_back(pick);
274 threshold = tmpThreshold;
279 template <
typename T>
281 unsigned int pickSize,
284 double threshold = -1.0;
289 template <
typename T>
291 unsigned int pickSize)
const {
293 double threshold = -1.0;
std::list< int > INT_LIST
boost::minstd_rand rng_type
#define CHECK_INVARIANT(expr, mess)
double getDistFromLTM(const double *distMat, unsigned int i, unsigned int j)
function to lookup distance from 1D lower triangular distance matrix
RDKit::INT_VECT lazyPick(T &func, unsigned int poolSize, unsigned int pickSize) const
Contains the implementation for a lazy MaxMin diversity picker.
std::vector< int > INT_VECT
RDKit::INT_VECT pick(const double *distMat, unsigned int poolSize, unsigned int pickSize) const
Implements the MaxMin algorithm for picking a subset of item from a pool.
RDKit::INT_VECT pick(const double *distMat, unsigned int poolSize, unsigned int pickSize, RDKit::INT_VECT firstPicks, int seed=-1) const
Contains the implementation for the MaxMin diversity picker.
MaxMinPicker()
Default Constructor.
Class to allow us to throw a ValueError from C++ and have it make it back to Python.
Abstract base class to do perform item picking (typically molecules) using a distance matrix...