RDKit
Open-source cheminformatics and machine learning.
EnumerationStrategyBase.h
Go to the documentation of this file.
1 //
2 // Copyright (c) 2015, Novartis Institutes for BioMedical Research Inc.
3 // All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following
13 // disclaimer in the documentation and/or other materials provided
14 // with the distribution.
15 // * Neither the name of Novartis Institutes for BioMedical Research Inc.
16 // nor the names of its contributors may be used to endorse or promote
17 // products derived from this software without specific prior written
18 // permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //
32 #include <RDGeneral/export.h>
33 #ifndef ENUMERATION_STRATEGY_H
34 #define ENUMERATION_STRATEGY_H
35 
36 #include "EnumerateTypes.h"
37 #include "../Reaction.h"
38 #include <vector>
40 #include <cstdint>
41 #ifdef RDK_USE_BOOST_SERIALIZATION
42 #include <boost/serialization/assume_abstract.hpp>
43 #include <boost/serialization/vector.hpp>
44 // the next two includes need to be there for boost 1.56
45 #include <boost/serialization/singleton.hpp>
46 #include <boost/serialization/extended_type_info.hpp>
47 #include <boost/serialization/shared_ptr.hpp>
48 #endif
50 
51 #include <GraphMol/RDKitBase.h>
52 
53 namespace RDKit {
54 
55 //! class for flagging enumeration strategy errors
57  : public std::exception {
58  public:
59  EnumerationStrategyException(const char *msg) : _msg(msg){};
60  EnumerationStrategyException(const std::string &msg) : _msg(msg){};
61  const char *what() const noexcept override { return _msg.c_str(); };
62  const char *message() const noexcept { return what(); };
64 
65  private:
66  std::string _msg;
67 };
68 
69 //! Return the number of elements per input vector
70 /*! \param bbs vector<vector<T> >
71 
72  \result vector<unint64_t> number of elements in each vector
73  */
74 template <class T>
76  const std::vector<std::vector<T>> &bbs) {
78  for (size_t i = 0; i < bbs.size(); ++i) sizes.push_back(bbs[i].size());
79  return sizes;
80 }
81 
82 //! getSizesFromReactants
83 //! Helper function for enumeration, bbs are stored in a
84 //! std::vector< std::vector<boost:shared_ptr<ROMol> >
85 //
87  const std::vector<MOL_SPTR_VECT> &bbs);
88 
89 //! getReactantsFromRGroups
90 //! Helper function for enumeration, bbs are stored in a
91 //! std::vector< std::vector<boost:shared_ptr<ROMol> >
92 //
94 getReactantsFromRGroups(const std::vector<MOL_SPTR_VECT> &bbs,
95  const EnumerationTypes::RGROUPS &rgroups);
96 
97 //! computeNumProducts
98 //! Returns the number of possible product combination from
99 //! The given numbers of building blocks for each rgroup
100 //! or EnumerationStrategyBase::EnumerationOverflow if the
101 //! number will not fit into the machines integer type.
102 //! n.b. An overflow simply means there are a lot of products
103 //! not that they cannot be enumerated
105  const EnumerationTypes::RGROUPS &sizes);
106 
107 //! Base Class for enumeration strategies
108 //! Usage:
109 //! EnumerationStrategyBase must be initialized with both a reaction
110 //! and the building block (molecule) vector to be sampled.
111 //!
112 //! \verbatim
113 //! EnumerationStrategyBase &eb = ...
114 //! if(eb) { // can we get another entry
115 //! const std::vector<int> &v = eb.next();
116 //! v[0] // RGroup 0 position
117 //! v[1] // RGroup 1 position...
118 //! }
119 //! \endverbatim
120 
122  protected:
123  EnumerationTypes::RGROUPS m_permutation; // where are we currently?
125  m_permutationSizes; // m_permutationSizes num bbs per group
126  boost::uint64_t
127  m_numPermutations; // total number of permutations for this group
128  // -1 if > ssize_t::max
129  public:
130  static const boost::uint64_t EnumerationOverflow =
131  static_cast<boost::uint64_t>(-1);
133  : m_permutation(), m_permutationSizes(), m_numPermutations() {}
134 
136 
137  virtual const char *type() const { return "EnumerationStrategyBase"; }
138 
139  //! Initialize the enumerator based on the reaction and the
140  //! supplied building blocks
141  //! This is the standard API point.
142  //! This calls the derived class's initializeStrategy method which must be implemented
143  void initialize(const ChemicalReaction &reaction,
144  const EnumerationTypes::BBS &building_blocks) {
145  // default initialization, may be overridden (sets the # reactants
146  // and computes the default # of permutations)
147  m_permutationSizes = getSizesFromBBs(building_blocks);
148  m_permutation.resize(m_permutationSizes.size());
149 
150  m_numPermutations = computeNumProducts(m_permutationSizes);
151  std::fill(m_permutation.begin(), m_permutation.end(), 0);
152 
153  initializeStrategy(reaction, building_blocks);
154  }
155 
156  // ! Initialize derived class. Must exist.
157  // ! EnumerationStrategyBase structures are already initialized:
158  // ! m_permutationSizes - [ length of building blocks for each reactant set ]
159  // ! m_numPermutations - number of possible permutations ( -1 if not computable )
160  // ! m_permutation - the first permutation, always the first supplied reactants
161  virtual void initializeStrategy(
162  const ChemicalReaction &reaction,
163  const EnumerationTypes::BBS &building_blocks) = 0;
164 
165  //! returns true if there are more permutations left
166  //! random enumerators may always return true...
167  virtual operator bool() const = 0;
168 
169  //! The current permutation {r1, r2, ...}
170  virtual const EnumerationTypes::RGROUPS &next() = 0;
171 
172  //! copy the enumeration strategy complete with current state
173  virtual EnumerationStrategyBase *copy() const = 0;
174 
175  //! The current position in the enumeration
176  const EnumerationTypes::RGROUPS &getPosition() const { return m_permutation; }
177 
178  //! a result of EnumerationOverflow indicates that the number of
179  //! permutations is not computable with the current
180  //! rdlonglong size.
181  boost::uint64_t getNumPermutations() const { return m_numPermutations; }
182 
183  //! Returns how many permutations have been processed by this strategy
184  virtual boost::uint64_t getPermutationIdx() const = 0;
185 
186  //! Skip the specified number of permutations (useful for
187  //! resetting state to a known position)
188  bool skip(boost::uint64_t skipCount) {
189  for (boost::uint64_t i = 0; i < skipCount; ++i) next();
190  return true;
191  }
192 
193  protected:
194  //! Initialize the internal data structures
195  //! i.e. RGROUPS = {10,40,50};
197  m_permutation.resize(rgroups.size());
198  m_permutationSizes = rgroups;
199  m_numPermutations = computeNumProducts(m_permutationSizes);
200  std::fill(m_permutation.begin(), m_permutation.end(), 0);
201  }
202 
203  private:
204  friend class boost::serialization::access;
205  template <class Archive>
206  void serialize(Archive &ar, const unsigned int /*version*/) {
207  ar &m_permutation;
208  ar &m_permutationSizes;
209  ar &m_numPermutations;
210  }
211 };
212 #ifdef RDK_USE_BOOST_SERIALIZATION
213 BOOST_SERIALIZATION_ASSUME_ABSTRACT(EnumerationStrategyBase)
214 #endif
215 } // namespace RDKit
216 
217 #ifdef RDK_USE_BOOST_SERIALIZATION
218 BOOST_CLASS_VERSION(RDKit::EnumerationStrategyBase, 1)
219 #endif
220 
221 #endif
RDKit::EnumerationStrategyException
class for flagging enumeration strategy errors
Definition: EnumerationStrategyBase.h:57
RDKit::EnumerationStrategyBase::initializeStrategy
virtual void initializeStrategy(const ChemicalReaction &reaction, const EnumerationTypes::BBS &building_blocks)=0
RDKit::EnumerationStrategyBase::m_numPermutations
boost::uint64_t m_numPermutations
Definition: EnumerationStrategyBase.h:127
RDKit::EnumerationStrategyBase::getPermutationIdx
virtual boost::uint64_t getPermutationIdx() const =0
Returns how many permutations have been processed by this strategy.
RDKit::EnumerationStrategyException::~EnumerationStrategyException
~EnumerationStrategyException() noexcept
Definition: EnumerationStrategyBase.h:63
RDKit::EnumerationStrategyException::what
const char * what() const noexcept override
Definition: EnumerationStrategyBase.h:61
BoostStartInclude.h
RDKit::EnumerationStrategyBase::next
virtual const EnumerationTypes::RGROUPS & next()=0
The current permutation {r1, r2, ...}.
RDKit::EnumerationStrategyBase::getPosition
const EnumerationTypes::RGROUPS & getPosition() const
The current position in the enumeration.
Definition: EnumerationStrategyBase.h:176
RDKit::EnumerationStrategyBase::getNumPermutations
boost::uint64_t getNumPermutations() const
Definition: EnumerationStrategyBase.h:181
RDKit::getReactantsFromRGroups
RDKIT_CHEMREACTIONS_EXPORT MOL_SPTR_VECT getReactantsFromRGroups(const std::vector< MOL_SPTR_VECT > &bbs, const EnumerationTypes::RGROUPS &rgroups)
RDKit::EnumerationStrategyBase::internalInitialize
void internalInitialize(const EnumerationTypes::RGROUPS &rgroups)
Definition: EnumerationStrategyBase.h:196
RDKit::EnumerationStrategyException::EnumerationStrategyException
EnumerationStrategyException(const char *msg)
Definition: EnumerationStrategyBase.h:59
BoostEndInclude.h
RDKit::EnumerationStrategyBase::skip
bool skip(boost::uint64_t skipCount)
Definition: EnumerationStrategyBase.h:188
RDKit::EnumerationStrategyBase::m_permutationSizes
EnumerationTypes::RGROUPS m_permutationSizes
Definition: EnumerationStrategyBase.h:125
RDKitBase.h
pulls in the core RDKit functionality
RDKit::MOL_SPTR_VECT
std::vector< boost::shared_ptr< ROMol > > MOL_SPTR_VECT
Definition: FragCatParams.h:20
RDKit::EnumerationTypes::BBS
std::vector< MOL_SPTR_VECT > BBS
Definition: EnumerateTypes.h:42
RDKit::EnumerationStrategyBase::EnumerationStrategyBase
EnumerationStrategyBase()
Definition: EnumerationStrategyBase.h:132
RDKit::ChemicalReaction
This is a class for storing and applying general chemical reactions.
Definition: Reaction.h:120
RDKit::EnumerationStrategyException::EnumerationStrategyException
EnumerationStrategyException(const std::string &msg)
Definition: EnumerationStrategyBase.h:60
RDKit::EnumerationStrategyBase::type
virtual const char * type() const
Definition: EnumerationStrategyBase.h:137
RDKit::EnumerationStrategyBase
Definition: EnumerationStrategyBase.h:121
RDKit::getSizesFromReactants
RDKIT_CHEMREACTIONS_EXPORT EnumerationTypes::RGROUPS getSizesFromReactants(const std::vector< MOL_SPTR_VECT > &bbs)
RDKit::EnumerationStrategyBase::copy
virtual EnumerationStrategyBase * copy() const =0
copy the enumeration strategy complete with current state
RDKit::EnumerationTypes::RGROUPS
std::vector< boost::uint64_t > RGROUPS
Definition: EnumerateTypes.h:56
RDKit
Std stuff.
Definition: Atom.h:30
RDKit::computeNumProducts
RDKIT_CHEMREACTIONS_EXPORT boost::uint64_t computeNumProducts(const EnumerationTypes::RGROUPS &sizes)
EnumerateTypes.h
RDKit::EnumerationStrategyBase::~EnumerationStrategyBase
virtual ~EnumerationStrategyBase()
Definition: EnumerationStrategyBase.h:135
RDKIT_CHEMREACTIONS_EXPORT
#define RDKIT_CHEMREACTIONS_EXPORT
Definition: export.h:60
RDKit::getSizesFromBBs
EnumerationTypes::RGROUPS getSizesFromBBs(const std::vector< std::vector< T >> &bbs)
Return the number of elements per input vector.
Definition: EnumerationStrategyBase.h:75
RDKit::EnumerationStrategyBase::m_permutation
EnumerationTypes::RGROUPS m_permutation
Definition: EnumerationStrategyBase.h:123
RDKit::EnumerationStrategyException::message
const char * message() const noexcept
Definition: EnumerationStrategyBase.h:62
export.h