RDKit
Open-source cheminformatics and machine learning.
MolStandardize/Tautomer.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2018 Susan H. Leung
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #ifndef RD_TAUTOMER_H
12 #define RD_TAUTOMER_H
13 
14 #include <boost/function.hpp>
15 #include <string>
16 #include <Catalogs/Catalog.h>
19 #include <boost/dynamic_bitset.hpp>
20 
21 namespace RDKit {
22 class ROMol;
23 class RWMol;
24 
25 namespace MolStandardize {
26 
27 typedef RDCatalog::HierarchCatalog<TautomerCatalogEntry, TautomerCatalogParams,
28  int>
30 
31 namespace TautomerScoringFunctions {
32 const std::string tautomerScoringVersion = "1.0.0";
33 
37 
38 inline int scoreTautomer(const ROMol &mol) {
39  return scoreRings(mol) + scoreSubstructs(mol) + scoreHeteroHs(mol);
40 }
41 } // namespace TautomerScoringFunctions
42 
44  public:
45  TautomerEnumerator() = delete;
46  TautomerEnumerator(TautomerCatalog *tautCat) : dp_catalog(tautCat){};
48  : dp_catalog(other.dp_catalog){};
50  if (this == &other) return *this;
51  dp_catalog = other.dp_catalog;
52  return *this;
53  }
54 
55  //! returns all tautomers for the input molecule
56  /*!
57  The enumeration rules are inspired by the publication:
58  M. Sitzmann et al., “Tautomerism in Large Databases.”, JCAMD 24:521 (2010)
59  https://doi.org/10.1007/s10822-010-9346-4
60 
61  \param mol: the molecule to be enumerated
62  \param modifiedAtoms: if provided this is used to return which atoms are
63  modified during the tautomerization
64  \param modifiedBonds: if provided this is used to return which bonds are
65  modified during the tautomerization
66 
67  Note: the definitions used here are that the atoms modified during
68  tautomerization are the atoms at the beginning and end of each tautomer
69  transform (the H "donor" and H "acceptor" in the transform) and the bonds
70  modified during transformation are any bonds whose order is changed during
71  the tautomer transform (these are the bonds between the "donor" and the
72  "acceptor")
73 
74  */
75  std::vector<ROMOL_SPTR> enumerate(
76  const ROMol &mol, boost::dynamic_bitset<> *modifiedAtoms = nullptr,
77  boost::dynamic_bitset<> *modifiedBonds = nullptr) const;
78 
79  //! returns the canonical tautomer from a set of possible tautomers
80  /*!
81  Note that the canonical tautomer is very likely not the most stable tautomer
82  for any given conditions. The default scoring rules are designed to produce
83  "reasonable" tautomers, but the primary concern is that the results are
84  canonical: you always get the same canonical tautomer for a molecule
85  regardless of what the input tautomer or atom ordering were.
86 
87  The default scoring scheme is inspired by the publication:
88  M. Sitzmann et al., “Tautomerism in Large Databases.”, JCAMD 24:521 (2010)
89  https://doi.org/10.1007/s10822-010-9346-4
90 
91  */
92  ROMol *pickCanonical(const std::vector<ROMOL_SPTR> &tautomers,
93  boost::function<int(const ROMol &mol)> scoreFunc =
95 
96  //! returns the canonical tautomer for a molecule
97  /*!
98  Note that the canonical tautomer is very likely not the most stable tautomer
99  for any given conditions. The default scoring rules are designed to produce
100  "reasonable" tautomers, but the primary concern is that the results are
101  canonical: you always get the same canonical tautomer for a molecule
102  regardless of what the input tautomer or atom ordering were.
103 
104  The default scoring scheme is inspired by the publication:
105  M. Sitzmann et al., “Tautomerism in Large Databases.”, JCAMD 24:521 (2010)
106  https://doi.org/10.1007/s10822-010-9346-4
107 
108  */
109  ROMol *canonicalize(const ROMol &mol,
110  boost::function<int(const ROMol &mol)> scoreFunc =
112  auto tautomers = enumerate(mol);
113  if (!tautomers.size()) {
115  << "no tautomers found, returning input molecule" << std::endl;
116  return new ROMol(mol);
117  }
118  return pickCanonical(tautomers, scoreFunc);
119  };
120 
121  private:
122  std::shared_ptr<TautomerCatalog> dp_catalog;
123 }; // TautomerEnumerator class
124 
125 } // namespace MolStandardize
126 } // namespace RDKit
127 
128 #endif
BOOST_LOG
#define BOOST_LOG(__arg__)
Definition: RDLog.h:88
RDKit::MolStandardize::TautomerScoringFunctions::scoreHeteroHs
RDKIT_MOLSTANDARDIZE_EXPORT int scoreHeteroHs(const ROMol &mol)
TautomerCatalogEntry.h
RDKit::MolStandardize::TautomerEnumerator::TautomerEnumerator
TautomerEnumerator(TautomerCatalog *tautCat)
Definition: MolStandardize/Tautomer.h:46
RDKit::MolStandardize::TautomerEnumerator::operator=
TautomerEnumerator & operator=(const TautomerEnumerator &other)
Definition: MolStandardize/Tautomer.h:49
RDKit::MolStandardize::TautomerEnumerator::TautomerEnumerator
TautomerEnumerator(const TautomerEnumerator &other)
Definition: MolStandardize/Tautomer.h:47
RDKit::MolStandardize::TautomerEnumerator::TautomerEnumerator
TautomerEnumerator()=delete
RDKit::MolStandardize::TautomerScoringFunctions::scoreTautomer
int scoreTautomer(const ROMol &mol)
Definition: MolStandardize/Tautomer.h:38
RDKit::MolStandardize::TautomerEnumerator::canonicalize
ROMol * canonicalize(const ROMol &mol, boost::function< int(const ROMol &mol)> scoreFunc=TautomerScoringFunctions::scoreTautomer) const
returns the canonical tautomer for a molecule
Definition: MolStandardize/Tautomer.h:109
RDCatalog::HierarchCatalog
A Catalog with a hierarchical structure.
Definition: Catalog.h:135
Catalog.h
TautomerCatalogParams.h
RDKit::ROMol
Definition: ROMol.h:171
RDKit::MolStandardize::TautomerScoringFunctions::scoreSubstructs
RDKIT_MOLSTANDARDIZE_EXPORT int scoreSubstructs(const ROMol &mol)
RDKit::MolStandardize::TautomerCatalog
RDCatalog::HierarchCatalog< TautomerCatalogEntry, TautomerCatalogParams, int > TautomerCatalog
Definition: MolStandardize/Tautomer.h:29
RDKit
Std stuff.
Definition: Atom.h:30
RDKit::MolStandardize::TautomerScoringFunctions::tautomerScoringVersion
const std::string tautomerScoringVersion
Definition: MolStandardize/Tautomer.h:32
RDKit::MolStandardize::TautomerEnumerator::enumerate
std::vector< ROMOL_SPTR > enumerate(const ROMol &mol, boost::dynamic_bitset<> *modifiedAtoms=nullptr, boost::dynamic_bitset<> *modifiedBonds=nullptr) const
returns all tautomers for the input molecule
RDKit::MolStandardize::TautomerScoringFunctions::scoreRings
RDKIT_MOLSTANDARDIZE_EXPORT int scoreRings(const ROMol &mol)
rdWarningLog
RDKIT_RDGENERAL_EXPORT std::shared_ptr< boost::logging::rdLogger > rdWarningLog
RDKIT_MOLSTANDARDIZE_EXPORT
#define RDKIT_MOLSTANDARDIZE_EXPORT
Definition: export.h:437
RDKit::MolStandardize::TautomerEnumerator
Definition: MolStandardize/Tautomer.h:43
RDKit::MolStandardize::TautomerEnumerator::pickCanonical
ROMol * pickCanonical(const std::vector< ROMOL_SPTR > &tautomers, boost::function< int(const ROMol &mol)> scoreFunc=TautomerScoringFunctions::scoreTautomer) const
returns the canonical tautomer from a set of possible tautomers
export.h