RDKit
Open-source cheminformatics and machine learning.
FileParsers.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2002-2013 Greg Landrum, Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #ifndef _RD_FILEPARSERS_H
12 #define _RD_FILEPARSERS_H
13 
14 #include <RDGeneral/types.h>
15 #include <GraphMol/RDKitBase.h>
16 
17 #include <string>
18 #include <iostream>
19 #include <vector>
20 #include <exception>
21 
22 #include <boost/shared_ptr.hpp>
23 
24 namespace RDKit {
25 const int MOLFILE_MAXLINE = 256;
26 RDKIT_FILEPARSERS_EXPORT std::string strip(const std::string &orig);
27 
28 class MolFileUnhandledFeatureException : public std::exception {
29  public:
30  //! construct with an error message
31  explicit MolFileUnhandledFeatureException(const char *msg) : _msg(msg){};
32  //! construct with an error message
33  explicit MolFileUnhandledFeatureException(const std::string msg)
34  : _msg(msg){};
35  //! get the error message
36  const char *what() const noexcept override { return _msg.c_str(); };
37  const char *message() const noexcept { return what(); };
38  ~MolFileUnhandledFeatureException() noexcept override{};
39 
40  private:
41  std::string _msg;
42 };
43 
44 //-----
45 // mol files
46 //-----
47 typedef std::vector<RWMOL_SPTR> RWMOL_SPTR_VECT;
48 // \brief construct a molecule from MDL mol data in a stream
49 /*!
50  * \param inStream - stream containing the data
51  * \param line - current line number (used for error reporting)
52  * \param sanitize - toggles sanitization and stereochemistry
53  * perception of the molecule
54  * \param removeHs - toggles removal of Hs from the molecule. H removal
55  * is only done if the molecule is sanitized
56  * \param line - current line number (used for error reporting)
57  * \param strictParsing - if not set, the parser is more lax about correctness
58  * of the contents.
59  *
60  */
62  unsigned int &line,
63  bool sanitize = true,
64  bool removeHs = true,
65  bool strictParsing = true);
66 // \overload
68  unsigned int &line,
69  bool sanitize = true,
70  bool removeHs = true,
71  bool strictParsing = true);
72 // \brief construct a molecule from an MDL mol block
73 /*!
74  * \param molBlock - string containing the mol block
75  * \param sanitize - toggles sanitization and stereochemistry
76  * perception of the molecule
77  * \param removeHs - toggles removal of Hs from the molecule. H removal
78  * is only done if the molecule is sanitized
79  * \param strictParsing - if set, the parser is more lax about correctness
80  * of the contents.
81  */
82 RDKIT_FILEPARSERS_EXPORT RWMol *MolBlockToMol(const std::string &molBlock,
83  bool sanitize = true,
84  bool removeHs = true,
85  bool strictParsing = true);
86 
87 // \brief construct a molecule from an MDL mol file
88 /*!
89  * \param fName - string containing the file name
90  * \param sanitize - toggles sanitization and stereochemistry
91  * perception of the molecule
92  * \param removeHs - toggles removal of Hs from the molecule. H removal
93  * is only done if the molecule is sanitized
94  * \param strictParsing - if set, the parser is more lax about correctness
95  * of the contents.
96  */
97 RDKIT_FILEPARSERS_EXPORT RWMol *MolFileToMol(const std::string &fName,
98  bool sanitize = true,
99  bool removeHs = true,
100  bool strictParsing = true);
101 
102 // \brief generates an MDL mol block for a molecule
103 /*!
104  * \param mol - the molecule in question
105  * \param includeStereo - toggles inclusion of stereochemistry information
106  * \param confId - selects the conformer to be used
107  * \param kekulize - triggers kekulization of the molecule before it is
108  * written
109  * \param forceV3000 - force generation a V3000 mol block (happens
110  * automatically with
111  * more than 999 atoms or bonds)
112  */
114  bool includeStereo = true,
115  int confId = -1,
116  bool kekulize = true,
117  bool forceV3000 = false);
118 
119 // \brief generates an MDL v3000 mol block for a molecule
120 /*!
121  * \param mol - the molecule in question
122  * \param includeStereo - toggles inclusion of stereochemistry information
123  * \param confId - selects the conformer to be used
124  * \param kekulize - triggers kekulization of the molecule before it is
125  * written
126  */
127 inline std::string MolToV3KMolBlock(const ROMol &mol, bool includeStereo = true,
128  int confId = -1, bool kekulize = true) {
129  return MolToMolBlock(mol, includeStereo, confId, kekulize, true);
130 }
131 
132 // \brief Writes a molecule to an MDL mol file
133 /*!
134  * \param mol - the molecule in question
135  * \param fName - the name of the file to use
136  * \param includeStereo - toggles inclusion of stereochemistry information
137  * \param confId - selects the conformer to be used
138  * \param kekulize - triggers kekulization of the molecule before it is
139  * written
140  * \param forceV3000 - force generation a V3000 mol block (happens
141  * automatically with
142  * more than 999 atoms or bonds)
143  */
145  const ROMol &mol, const std::string &fName, bool includeStereo = true,
146  int confId = -1, bool kekulize = true, bool forceV3000 = false);
147 
148 // \brief Writes a molecule to an MDL V3000 mol file
149 /*!
150  * \param mol - the molecule in question
151  * \param fName - the name of the file to use
152  * \param includeStereo - toggles inclusion of stereochemistry information
153  * \param confId - selects the conformer to be used
154  * \param kekulize - triggers kekulization of the molecule before it is
155  * written
156  */
157 inline void MolToV3KMolFile(const ROMol &mol, const std::string &fName,
158  bool includeStereo = true, int confId = -1,
159  bool kekulize = true) {
160  MolToMolFile(mol, fName, includeStereo, confId, kekulize, true);
161 }
162 
164  int confId = -1);
165 
167  const std::string &fName,
168  int confId = -1);
169 
170 //-----
171 // TPL handling:
172 //-----
173 
174 //! \brief translate TPL data (BioCad format) into a multi-conf molecule
175 /*!
176  \param inStream: the stream from which to read
177  \param line: used to track the line number of errors
178  \param sanitize: toggles sanitization and stereochemistry
179  perception of the molecule
180  \param skipFirstConf: according to the TPL format description, the atomic
181  coords in the atom-information block describe the first
182  conformation and the first conf block describes second
183  conformation. The CombiCode, on the other hand, writes
184  the first conformation data both to the atom-information
185  block and to the first conf block. We want to be able to
186  read CombiCode-style tpls, so we'll allow this
187  mis-feature
188  to be parsed when this flag is set.
189 */
191  unsigned int &line,
192  bool sanitize = true,
193  bool skipFirstConf = false);
194 
195 //! \brief construct a multi-conf molecule from a TPL (BioCad format) file
196 /*!
197  \param fName: the name of the file from which to read
198  \param sanitize: toggles sanitization and stereochemistry
199  perception of the molecule
200  \param skipFirstConf: according to the TPL format description, the atomic
201  coords in the atom-information block describe the first
202  conformation and the first conf block describes second
203  conformation. The CombiCode, on the other hand, writes
204  the first conformation data both to the atom-information
205  block and to the first conf block. We want to be able to
206  read CombiCode-style tpls, so we'll allow this
207  mis-feature
208  to be parsed when this flag is set.
209 */
210 RDKIT_FILEPARSERS_EXPORT RWMol *TPLFileToMol(const std::string &fName,
211  bool sanitize = true,
212  bool skipFirstConf = false);
213 
215  const ROMol &mol, const std::string &partialChargeProp = "_GasteigerCharge",
216  bool writeFirstConfTwice = false);
218  const ROMol &mol, const std::string &fName,
219  const std::string &partialChargeProp = "_GasteigerCharge",
220  bool writeFirstConfTwice = false);
221 
222 //-----
223 // MOL2 handling
224 //-----
225 
226 typedef enum {
227  CORINA = 0 //! supports output from Corina and some dbtranslate output
229 
230 // \brief construct a molecule from a Tripos mol2 file
231 /*!
232  *
233  * \param fName - string containing the file name
234  * \param sanitize - toggles sanitization of the molecule
235  * \param removeHs - toggles removal of Hs from the molecule. H removal
236  * is only done if the molecule is sanitized
237  * \param variant - the atom type definitions to use
238  * \param cleanupSubstructures - toggles recognition and cleanup of common
239  * substructures
240  */
241 RDKIT_FILEPARSERS_EXPORT RWMol *Mol2FileToMol(const std::string &fName,
242  bool sanitize = true,
243  bool removeHs = true,
244  Mol2Type variant = CORINA,
245  bool cleanupSubstructures = true);
246 
247 // \brief construct a molecule from Tripos mol2 data in a stream
248 /*!
249  * \param inStream - stream containing the data
250  * \param sanitize - toggles sanitization of the molecule
251  * \param removeHs - toggles removal of Hs from the molecule. H removal
252  * is only done if the molecule is sanitized
253  * \param variant - the atom type definitions to use
254  * \param cleanupSubstructures - toggles recognition and cleanup of common
255  * substructures
256  */
258  std::istream *inStream, bool sanitize = true, bool removeHs = true,
259  Mol2Type variant = CORINA, bool cleanupSubstructures = true);
260 // \overload
262  std::istream &inStream, bool sanitize = true, bool removeHs = true,
263  Mol2Type variant = CORINA, bool cleanupSubstructures = true);
264 
265 // \brief construct a molecule from a Tripos mol2 block
266 /*!
267  * \param molBlock - string containing the mol block
268  * \param sanitize - toggles sanitization of the molecule
269  * \param removeHs - toggles removal of Hs from the molecule. H removal
270  * is only done if the molecule is sanitized
271  * \param variant - the atom type definitions to use
272  * \param cleanupSubstructures - toggles recognition and cleanup of common
273  * substructures
274  */
276  const std::string &molBlock, bool sanitize = true, bool removeHs = true,
277  Mol2Type variant = CORINA, bool cleanupSubstructures = true);
278 
280  bool sanitize = true,
281  bool removeHs = true,
282  unsigned int flavor = 0,
283  bool proximityBonding = true);
284 
286  bool sanitize = true,
287  bool removeHs = true,
288  unsigned int flavor = 0,
289  bool proximityBonding = true);
291  std::istream *inStream, bool sanitize = true, bool removeHs = true,
292  unsigned int flavor = 0, bool proximityBonding = true);
294  std::istream &inStream, bool sanitize = true, bool removeHs = true,
295  unsigned int flavor = 0, bool proximityBonding = true);
296 RDKIT_FILEPARSERS_EXPORT RWMol *PDBFileToMol(const std::string &fname,
297  bool sanitize = true,
298  bool removeHs = true,
299  unsigned int flavor = 0,
300  bool proximityBonding = true);
301 
302 // \brief generates an PDB block for a molecule
303 /*!
304  * \param mol - the molecule in question
305  * \param confId - selects the conformer to be used
306  * \param flavor - controls what gets written:
307  * flavor & 1 : Write MODEL/ENDMDL lines around each record
308  * flavor & 2 : Don't write any CONECT records
309  * flavor & 4 : Write CONECT records in both directions
310  * flavor & 8 : Don't use multiple CONECTs to encode bond order
311  * flavor & 16 : Write MASTER record
312  * flavor & 32 : Write TER record
313  */
315  int confId = -1,
316  unsigned int flavor = 0);
317 // \brief Writes a molecule to an MDL mol file
318 /*!
319  * \param mol - the molecule in question
320  * \param fName - the name of the file to use
321  * \param confId - selects the conformer to be used
322  * \param flavor - controls what gets written:
323  * flavor & 1 : Write MODEL/ENDMDL lines around each record
324  * flavor & 2 : Don't write any CONECT records
325  * flavor & 4 : Write CONECT records in both directions
326  * flavor & 8 : Don't use multiple CONECTs to encode bond order
327  * flavor & 16 : Write MASTER record
328  * flavor & 32 : Write TER record
329  */
331  const std::string &fname,
332  int confId = -1,
333  unsigned int flavor = 0);
334 
335 // \brief reads a molecule from the metadata in an RDKit-generated SVG file
336 /*!
337  * \param svg - string containing the SVG
338  * \param sanitize - toggles sanitization of the molecule
339  * \param removeHs - toggles removal of Hs from the molecule. H removal
340  * is only done if the molecule is sanitized
341  *
342  * **NOTE** This functionality should be considered beta.
343  */
345  bool sanitize = true,
346  bool removeHs = true);
347 /*! \overload
348  */
350  bool sanitize = true,
351  bool removeHs = true);
352 
353 inline std::unique_ptr<RDKit::RWMol> operator"" _ctab(const char *text,
354  size_t len) {
355  std::string data(text, len);
356  RWMol *ptr = nullptr;
357  try {
358  ptr = MolBlockToMol(data);
359  } catch (const RDKit::MolSanitizeException &) {
360  ptr = nullptr;
361  }
362  return std::unique_ptr<RWMol>(ptr);
363 }
364 inline std::unique_ptr<RDKit::RWMol> operator"" _mol2(const char *text,
365  size_t len) {
366  std::string data(text, len);
367  RWMol *ptr = nullptr;
368  try {
369  ptr = Mol2BlockToMol(data);
370  } catch (const RDKit::MolSanitizeException &) {
371  ptr = nullptr;
372  }
373  return std::unique_ptr<RWMol>(ptr);
374 }
375 
376 inline std::unique_ptr<RDKit::RWMol> operator"" _pdb(const char *text,
377  size_t len) {
378  std::string data(text, len);
379  RWMol *ptr = nullptr;
380  try {
381  ptr = PDBBlockToMol(data);
382  } catch (const RDKit::MolSanitizeException &) {
383  ptr = nullptr;
384  }
385  return std::unique_ptr<RWMol>(ptr);
386 }
387 
388 } // namespace RDKit
389 
390 #endif
RDKit::MolToPDBFile
RDKIT_FILEPARSERS_EXPORT void MolToPDBFile(const ROMol &mol, const std::string &fname, int confId=-1, unsigned int flavor=0)
RDKit::MolToTPLText
RDKIT_FILEPARSERS_EXPORT std::string MolToTPLText(const ROMol &mol, const std::string &partialChargeProp="_GasteigerCharge", bool writeFirstConfTwice=false)
RDKit::Mol2Type
Mol2Type
Definition: FileParsers.h:226
RDKit::MOLFILE_MAXLINE
const int MOLFILE_MAXLINE
Definition: FileParsers.h:25
RDKit::MolFileUnhandledFeatureException::MolFileUnhandledFeatureException
MolFileUnhandledFeatureException(const char *msg)
construct with an error message
Definition: FileParsers.h:31
RDKit::MolFileUnhandledFeatureException::message
const char * message() const noexcept
Definition: FileParsers.h:37
RDKit::MolToTPLFile
RDKIT_FILEPARSERS_EXPORT void MolToTPLFile(const ROMol &mol, const std::string &fName, const std::string &partialChargeProp="_GasteigerCharge", bool writeFirstConfTwice=false)
types.h
RDKit::CORINA
@ CORINA
Definition: FileParsers.h:227
RDKIT_FILEPARSERS_EXPORT
#define RDKIT_FILEPARSERS_EXPORT
Definition: export.h:216
RDKit::RWMol
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:31
RDKit::Mol2DataStreamToMol
RDKIT_FILEPARSERS_EXPORT RWMol * Mol2DataStreamToMol(std::istream *inStream, bool sanitize=true, bool removeHs=true, Mol2Type variant=CORINA, bool cleanupSubstructures=true)
RDKit::MolToV3KMolFile
void MolToV3KMolFile(const ROMol &mol, const std::string &fName, bool includeStereo=true, int confId=-1, bool kekulize=true)
Definition: FileParsers.h:157
RDKit::strip
RDKIT_FILEPARSERS_EXPORT std::string strip(const std::string &orig)
RDKit::MolToXYZBlock
RDKIT_FILEPARSERS_EXPORT std::string MolToXYZBlock(const ROMol &mol, int confId=-1)
RDKit::TPLFileToMol
RDKIT_FILEPARSERS_EXPORT RWMol * TPLFileToMol(const std::string &fName, bool sanitize=true, bool skipFirstConf=false)
construct a multi-conf molecule from a TPL (BioCad format) file
RDKit::MolFileUnhandledFeatureException::~MolFileUnhandledFeatureException
~MolFileUnhandledFeatureException() noexcept override
Definition: FileParsers.h:38
RDKit::ROMol
Definition: ROMol.h:171
RDKitBase.h
pulls in the core RDKit functionality
RDKit::MolSanitizeException
class for flagging sanitization errors
Definition: SanitException.h:27
RDKit::MolToXYZFile
RDKIT_FILEPARSERS_EXPORT void MolToXYZFile(const ROMol &mol, const std::string &fName, int confId=-1)
RDKit::MolBlockToMol
RDKIT_FILEPARSERS_EXPORT RWMol * MolBlockToMol(const std::string &molBlock, bool sanitize=true, bool removeHs=true, bool strictParsing=true)
RDKit::MolFileUnhandledFeatureException
Definition: FileParsers.h:28
RDKit::MolToMolFile
RDKIT_FILEPARSERS_EXPORT void MolToMolFile(const ROMol &mol, const std::string &fName, bool includeStereo=true, int confId=-1, bool kekulize=true, bool forceV3000=false)
RDKit::MolDataStreamToMol
RDKIT_FILEPARSERS_EXPORT RWMol * MolDataStreamToMol(std::istream *inStream, unsigned int &line, bool sanitize=true, bool removeHs=true, bool strictParsing=true)
RDKit::MolToV3KMolBlock
std::string MolToV3KMolBlock(const ROMol &mol, bool includeStereo=true, int confId=-1, bool kekulize=true)
Definition: FileParsers.h:127
RDKit::TPLDataStreamToMol
RDKIT_FILEPARSERS_EXPORT RWMol * TPLDataStreamToMol(std::istream *inStream, unsigned int &line, bool sanitize=true, bool skipFirstConf=false)
translate TPL data (BioCad format) into a multi-conf molecule
RDKit::Mol2FileToMol
RDKIT_FILEPARSERS_EXPORT RWMol * Mol2FileToMol(const std::string &fName, bool sanitize=true, bool removeHs=true, Mol2Type variant=CORINA, bool cleanupSubstructures=true)
RDKit::MolFileUnhandledFeatureException::what
const char * what() const noexcept override
get the error message
Definition: FileParsers.h:36
RDKit::RDKitSVGToMol
RDKIT_FILEPARSERS_EXPORT RWMol * RDKitSVGToMol(const std::string &svg, bool sanitize=true, bool removeHs=true)
RDKit
Std stuff.
Definition: Atom.h:30
RDKit::RWMOL_SPTR_VECT
std::vector< RWMOL_SPTR > RWMOL_SPTR_VECT
Definition: FileParsers.h:47
RDKit::PDBBlockToMol
RDKIT_FILEPARSERS_EXPORT RWMol * PDBBlockToMol(const char *str, bool sanitize=true, bool removeHs=true, unsigned int flavor=0, bool proximityBonding=true)
RDKit::MolOps::removeHs
RDKIT_GRAPHMOL_EXPORT ROMol * removeHs(const ROMol &mol, bool implicitOnly=false, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
RDKit::MolFileUnhandledFeatureException::MolFileUnhandledFeatureException
MolFileUnhandledFeatureException(const std::string msg)
construct with an error message
Definition: FileParsers.h:33
RDKit::MolToMolBlock
RDKIT_FILEPARSERS_EXPORT std::string MolToMolBlock(const ROMol &mol, bool includeStereo=true, int confId=-1, bool kekulize=true, bool forceV3000=false)
RDKit::PDBFileToMol
RDKIT_FILEPARSERS_EXPORT RWMol * PDBFileToMol(const std::string &fname, bool sanitize=true, bool removeHs=true, unsigned int flavor=0, bool proximityBonding=true)
RDKit::Mol2BlockToMol
RDKIT_FILEPARSERS_EXPORT RWMol * Mol2BlockToMol(const std::string &molBlock, bool sanitize=true, bool removeHs=true, Mol2Type variant=CORINA, bool cleanupSubstructures=true)
RDKit::MolFileToMol
RDKIT_FILEPARSERS_EXPORT RWMol * MolFileToMol(const std::string &fName, bool sanitize=true, bool removeHs=true, bool strictParsing=true)
RDKit::PDBDataStreamToMol
RDKIT_FILEPARSERS_EXPORT RWMol * PDBDataStreamToMol(std::istream *inStream, bool sanitize=true, bool removeHs=true, unsigned int flavor=0, bool proximityBonding=true)
RDKit::MolToPDBBlock
RDKIT_FILEPARSERS_EXPORT std::string MolToPDBBlock(const ROMol &mol, int confId=-1, unsigned int flavor=0)
export.h