RDKit
Open-source cheminformatics and machine learning.
SmilesParse.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2001-2020 Greg Landrum and Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #ifndef RD_SMILESPARSE_H
12 #define RD_SMILESPARSE_H
13 
14 #include <GraphMol/RWMol.h>
16 #include <string>
17 #include <exception>
18 #include <map>
19 
20 namespace RDKit {
21 
23  int debugParse = 0; /**< enable debugging in the SMILES parser*/
24  bool sanitize = true; /**< sanitize the molecule after building it */
25  std::map<std::string, std::string> *replacements =
26  nullptr; /**< allows SMILES "macros" */
27  bool allowCXSMILES = true; /**< recognize and parse CXSMILES*/
28  bool strictCXSMILES =
29  true; /**< throw an exception if the CXSMILES parsing fails */
30  bool parseName = false; /**< parse (and set) the molecule name as well */
31  bool removeHs = true; /**< remove Hs after constructing the molecule */
32 };
33 RDKIT_SMILESPARSE_EXPORT RWMol *SmilesToMol(const std::string &smi,
34  const SmilesParserParams &params);
35 
36 RDKIT_SMILESPARSE_EXPORT Atom *SmilesToAtom(const std::string &smi);
37 RDKIT_SMILESPARSE_EXPORT Bond *SmilesToBond(const std::string &smi);
38 
39 //! Construct a molecule from a SMILES string
40 /*!
41  \param smi the SMILES to convert
42  \param debugParse toggles verbose debugging information from the parser
43  \param sanitize toggles H removal and sanitization of the molecule
44  \param replacements a string->string map of replacement strings. See below
45  for more information about replacements.
46 
47  \return a pointer to the new molecule; the caller is responsible for free'ing
48  this.
49 
50  The optional replacements map can be used to do string substitution of
51  abbreviations
52  in the input SMILES. The set of substitutions is repeatedly looped through
53  until
54  the string no longer changes. It is the responsibility of the caller to make
55  sure
56  that substitutions results in legal and sensible SMILES.
57 
58  Examples of substitutions:
59  \code
60  CC{Q}C with {"{Q}":"OCCO"} -> CCOCCOC
61  C{A}C{Q}C with {"{Q}":"OCCO", "{A}":"C1(CC1)"} -> CC1(CC1)COCCOC
62  C{A}C{Q}C with {"{Q}":"{X}CC{X}", "{A}":"C1CC1", "{X}":"N"} -> CC1CC1CCNCCNC
63  \endcode
64 
65  */
67  const std::string &smi, int debugParse = 0, bool sanitize = true,
68  std::map<std::string, std::string> *replacements = 0) {
69  SmilesParserParams params;
70  params.debugParse = debugParse;
71  params.replacements = replacements;
72  if (sanitize) {
73  params.sanitize = true;
74  params.removeHs = true;
75  } else {
76  params.sanitize = false;
77  params.removeHs = false;
78  }
79  return SmilesToMol(smi, params);
80 };
81 
82 //! Construct a molecule from a SMARTS string
83 /*!
84  \param sma the SMARTS to convert
85  \param debugParse toggles verbose debugging information from the parser
86  \param mergeHs toggles merging H atoms in the SMARTS into neighboring
87  atoms
88  \param replacements a string->string map of replacement strings.
89  \see SmilesToMol for more information about replacements
90 
91  \return a pointer to the new molecule; the caller is responsible for free'ing
92  this.
93  */
95  const std::string &sma, int debugParse = 0, bool mergeHs = false,
96  std::map<std::string, std::string> *replacements = 0);
97 
98 RDKIT_SMILESPARSE_EXPORT Atom *SmartsToAtom(const std::string &sma);
99 RDKIT_SMILESPARSE_EXPORT Bond *SmartsToBond(const std::string &sma);
100 
101 class RDKIT_SMILESPARSE_EXPORT SmilesParseException : public std::exception {
102  public:
103  SmilesParseException(const char *msg) : _msg(msg){};
104  SmilesParseException(const std::string msg) : _msg(msg){};
105  const char *what() const noexcept override { return _msg.c_str(); };
106  const char *message() const noexcept { return what(); };
107  ~SmilesParseException() noexcept {};
108 
109  private:
110  std::string _msg;
111 };
112 
113 inline std::unique_ptr<RDKit::RWMol> operator"" _smiles(const char *text,
114  size_t len) {
115  std::string smi(text, len);
116  RWMol *ptr = nullptr;
117  try {
118  ptr = SmilesToMol(smi);
119  } catch (const RDKit::MolSanitizeException &) {
120  ptr = nullptr;
121  }
122  return std::unique_ptr<RWMol>(ptr);
123 }
124 inline std::unique_ptr<RDKit::RWMol> operator"" _smarts(const char *text,
125  size_t len) {
126  std::string smi(text, len);
127  // no need for exception handling here: SmartsToMol() doesn't do
128  // sanitization
129  RWMol *ptr = SmartsToMol(smi);
130  return std::unique_ptr<RWMol>(ptr);
131 }
132 
133 } // namespace RDKit
134 
135 #endif
RDKit::SmilesParserParams::debugParse
int debugParse
Definition: SmilesParse.h:23
RDKit::SmilesParseException::what
const char * what() const noexcept override
Definition: SmilesParse.h:105
RDKit::SmilesParserParams::sanitize
bool sanitize
Definition: SmilesParse.h:24
RDKit::Bond
class for representing a bond
Definition: Bond.h:47
RDKit::SmilesToAtom
RDKIT_SMILESPARSE_EXPORT Atom * SmilesToAtom(const std::string &smi)
RDKit::RWMol
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:31
RDKit::SmilesParseException::message
const char * message() const noexcept
Definition: SmilesParse.h:106
RDKit::SmartsToMol
RDKIT_SMILESPARSE_EXPORT RWMol * SmartsToMol(const std::string &sma, int debugParse=0, bool mergeHs=false, std::map< std::string, std::string > *replacements=0)
Construct a molecule from a SMARTS string.
RDKit::Atom
The class for representing atoms.
Definition: Atom.h:69
RDKit::SmilesToMol
RDKIT_SMILESPARSE_EXPORT RWMol * SmilesToMol(const std::string &smi, const SmilesParserParams &params)
RDKit::SmilesToBond
RDKIT_SMILESPARSE_EXPORT Bond * SmilesToBond(const std::string &smi)
RDKit::MolSanitizeException
class for flagging sanitization errors
Definition: SanitException.h:27
RDKit::SmilesParseException
Definition: SmilesParse.h:101
RDKit::SmilesParserParams::replacements
std::map< std::string, std::string > * replacements
Definition: SmilesParse.h:25
RDKit::SmilesParserParams::removeHs
bool removeHs
Definition: SmilesParse.h:31
SanitException.h
RDKit::SmilesParseException::SmilesParseException
SmilesParseException(const char *msg)
Definition: SmilesParse.h:103
RDKit
Std stuff.
Definition: Atom.h:30
RDKit::SmilesParseException::SmilesParseException
SmilesParseException(const std::string msg)
Definition: SmilesParse.h:104
RDKIT_SMILESPARSE_EXPORT
#define RDKIT_SMILESPARSE_EXPORT
Definition: export.h:645
RDKit::SmilesParserParams
Definition: SmilesParse.h:22
RDKit::MolOps::removeHs
RDKIT_GRAPHMOL_EXPORT ROMol * removeHs(const ROMol &mol, bool implicitOnly=false, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
RDKit::SmilesParseException::~SmilesParseException
~SmilesParseException() noexcept
Definition: SmilesParse.h:107
RWMol.h
Defines the editable molecule class RWMol.
RDKit::SmartsToAtom
RDKIT_SMILESPARSE_EXPORT Atom * SmartsToAtom(const std::string &sma)
RDKit::SmartsToBond
RDKIT_SMILESPARSE_EXPORT Bond * SmartsToBond(const std::string &sma)
export.h