BALL  1.5.0
GAFFCESParser.h
Go to the documentation of this file.
1 // -*- Mode: C++; tab-width: 2; -*-
2 // vi: set ts=2:
3 
4 #ifndef BALL_MOLMEC_AMBER_GAFFCESPARSER_H
5 #define BALL_MOLMEC_AMBER_GAFFCESPARSER_H
6 
7 #ifndef BALL_KERNEL_ATOM_H
8 #include <BALL/KERNEL/atom.h>
9 #endif
10 
11 #ifndef BALL_DATATYPE_STRING_H
12 #include <BALL/DATATYPE/string.h>
13 #endif
14 
15 #include <set>
16 #include <map>
17 #include <vector>
18 
19 namespace BALL
20 {
21 
23  {
24  public:
25 
26  //atomic property string
27  class APSMatcher
28  {
29  public:
30  //encode Ringatomtypes
31  enum APSType
32  {
69  };
70 
71  class APSTerm
72  {
73  public:
74  APSTerm(APSType new_type, int new_feature_number)
75  : type(new_type),
76  feature_number(new_feature_number)
77  {}
78 
80  //contain the number of occurence of a given feature
82  };
83 
84  //check if current atom is in a ring
85  bool isRingAtom(Atom& atom);
86  //check if the current atom is in a ring with size n
87  //and return the number of occurence
88  int isNRingAtom(Size size, Atom& atom);
89  //check if the current atom is not a ringatom or
90  //in a ten-membered or larger ring
91  bool isNonRingAtom(Atom& atom);
92 
93  bool checkGAFFProperties(Atom& atom, Atom& predecessor, APSTerm aps);
94 
95 
96  APSMatcher();
97  ~APSMatcher();
98 
99  String printAPS();
100 
101  // check if atom matches atomic property string
102  bool operator() (Atom& atom, Atom& predecessor);
103 
104  //store atomic property string
105  //external vector: all AND (",") types
106  //internal vector: all OR (".")types
107  std::vector < std::vector< APSTerm> > aps_terms;
108 
109  protected:
110  bool hasBond_(Atom* atom, Atom* predecessor, int bond_type, int feature_number);
111  };
112 
113  struct State
114  {
115  //not "thread-safe"
117 
120  };
121 
122 
123 
124  //chemical environment string
126  {
127  public:
128  //encode wildcard elements
130  {
131  XA, XB, XC, XD, XX
132  };
133 
135  : parent(0),
136  atom_to_test(0),
137  parser_(parser)
138  {
140  };
141 
142  virtual ~CESPredicate();
143 
144  //initialize stringToWildcard map
145  void initStringToWildcard();
146  std::map<String, CESwildcards> getStringToWildcard();
147 
148  //add a CESwildcardsConnectionPredicate to "predicate tree"
149  void addCESwildcardsConnectionPredicate(String wildcard, Size partners);
150  //add a CESwilddcardsPredicate to "predicate tree"
151  void addCESwildcardsPredicate(String wildcard);
152  //add an CESelementPredicate to "predicate tree"
153  void addCESelementPredicate(String name);
154  //add an CESelementConnectionPredicate to "predicate tree"
155  void addCESelementConnectionPredicate(Size partners, String name);
156  // add a TruePredicate to "predicate tree"
157  void addTruePredicate() const;
158 
159  //check if atom and its environment match predicates
160  virtual bool operator () (Atom& atom);
161  //check if atom matches "predicates in predicate-tree"
162  virtual bool match(Atom&){return false;};
163  //delete children
164  void clear();
165 
166  // check whether this atom is contained on a path to the root
167  bool alreadySeenThisAtom(Atom* atom);
168 
169  //to expand aps_term in aps_matcher object
170  void addNewAND();
171  void addNewOR(APSMatcher::APSType aps, int feature_number);
172 
173  //store existing atomic property string
175 
176  //all CESPredicates for current_predicate ->children of current_predicate
177  std::vector<CESPredicate*> children;
178  //CESPredicate, that has current_predicate in his children vector
180 
181  //The atom we are trying to match to this predicate
183  protected:
184  //map to convert String into wildcard-element
185  std::map<String, CESwildcards > stringToWildcard_;
187  };
188 
189  //element-name of the partnerAtom and number of its connected atoms
191  {
192  public:
194  : CESPredicate(parser),
195  numberOfPartners_(0),
196  elementName_("NoName")
197  {};
198 
200 
201  void setNumberOfPartners(Size number);
202  void setElementName(String name);
205  //check if atom matches predicate
206  bool match(Atom& atom);
207  protected:
210  };
211 
212  //element-name of the partnerAtom
214  {
215  public:
217  : CESPredicate(parser),
218  elementName_("NoName")
219  {
220  };
221 
223 
224  void setElementName(String name);
226  //check if atom matches predicate
227  bool match(Atom& atom);
228  protected:
230  };
231 
232  //XA,XB,XC,XD,XX
234  {
235  public:
237  : CESPredicate(parser)
238  {};
239 
241 
242  void setWildcards(String new_wildcard);
244  //checks if atom matches the given wildcard-element (XA, XB, XC, XD, XX)
245  bool matchWildcards(Atom& atom);
246  //check if atom matches predicate
247  bool match(Atom& atom);
248  protected:
250  };
251 
252  //XA,XB,XC,XD,XX and number of partnerAtoms
254  {
255  public:
257  : CESPredicate(parser),
259  {};
261 
262  void setNumberOfPartners(Size number);
263  void setWildcards(String new_wildcard);
266  //checks if atom matches the given wildcard-element (XA, XB, XC, XD, XX)
267  bool matchWildcards(Atom& atom);
268  //check if atom matches predicate
269  bool match(Atom& atom);
270  protected:
273  };
274 
275  //string is "*" which means always true
277  {
278  public:
280  : CESPredicate(parser)
281  {};
283  //check if atom matches predicate (always true!)
284  bool match(Atom&) { return true; }
285  };
286 
287  //Parser-match-Function checking if atom's environment matches the "predicate tree"
288  bool match(Atom& atom) const;
289 
290  //initialize Set of Elementsymbols
291  void initElementSymbols();
292  const std::set<String>& getElementSymbols();
293 
294  GAFFCESParser();
295  GAFFCESParser(const String& cesstring);
296  ~GAFFCESParser();
297 
298  //for lexer/parser
299  Size read(char* buf, Size max_size);
300 
301  static State state;
302 
305  //fixed root of the "predicate tree"
307  //parent-predicate of current_predicate
309  //"predicate-node" in the "predicate tree" we actually considering
311 
312 
313  //parse chemical environment string
314  bool parse(const String& cesstring);
315  //check if any atom matches parsed ces_string
316  bool GAFFCESatomMatcher(Atom& atom, const String& cesstring);
317  //start filling the children vector for a current predicate
318  void startChildPredicates();
319  //end up filling the children vector for a current predicate
320  void endChildPredicates();
321 
322  protected:
323  //current chemical environment string
325  //set with all valid element symbols
326  std::set<String> element_symbols_;
327  //for Parser/Lexer function YYINPUT
329  };
330 
331 }
332 
333 #endif
BALL::GAFFCESParser::CESelementPredicate::CESelementPredicate
CESelementPredicate(GAFFCESParser *parser)
Definition: GAFFCESParser.h:216
BALL::GAFFCESParser::APSMatcher::PURE_SINGLE_BOND
Definition: GAFFCESParser.h:47
BALL::GAFFCESParser::APSMatcher::TRIPLE_BOND_TO_PARENT
Definition: GAFFCESParser.h:60
BALL::GAFFCESParser::initElementSymbols
void initElementSymbols()
BALL::GAFFCESParser::CESelementConnectionPredicate::setElementName
void setElementName(String name)
BALL::GAFFCESParser::CESwildcardsConnectionPredicate::match
bool match(Atom &atom)
BALL::GAFFCESParser::APSMatcher::PURE_DOUBLE_BOND
Definition: GAFFCESParser.h:53
BALL::GAFFCESParser::CESPredicate::initStringToWildcard
void initStringToWildcard()
BALL::GAFFCESParser::APSMatcher::NO_DOUBLE_BOND_TO_PARENT
Definition: GAFFCESParser.h:58
BALL::GAFFCESParser::CESwildcardsConnectionPredicate::~CESwildcardsConnectionPredicate
~CESwildcardsConnectionPredicate()
BALL::GAFFCESParser::CESPredicate::XX
Definition: GAFFCESParser.h:131
BALL::GAFFCESParser::CESwildcardsConnectionPredicate::numberOfPartners_
Size numberOfPartners_
Definition: GAFFCESParser.h:272
BALL::GAFFCESParser::APSMatcher::IS_6_RING_ATOM
Definition: GAFFCESParser.h:43
BALL::GAFFCESParser
Definition: GAFFCESParser.h:22
BALL::GAFFCESParser::APSMatcher::IS_RING_ATOM
Definition: GAFFCESParser.h:33
BALL::GAFFCESParser::CESwildcardsPredicate::CESwildcardsPredicate
CESwildcardsPredicate(GAFFCESParser *parser)
Definition: GAFFCESParser.h:236
BALL::GAFFCESParser::APSMatcher::APSTerm::feature_number
int feature_number
Definition: GAFFCESParser.h:81
BALL::GAFFCESParser::APSMatcher::DOUBLE_BOND_TO_PARENT
Definition: GAFFCESParser.h:57
BALL::GAFFCESParser::APSMatcher::~APSMatcher
~APSMatcher()
BALL::GAFFCESParser::CESPredicate::XD
Definition: GAFFCESParser.h:131
BALL::GAFFCESParser::APSMatcher::APSTerm::APSTerm
APSTerm(APSType new_type, int new_feature_number)
Definition: GAFFCESParser.h:74
BALL::GAFFCESParser::CESelementConnectionPredicate::~CESelementConnectionPredicate
~CESelementConnectionPredicate()
BALL::GAFFCESParser::CESPredicate::XB
Definition: GAFFCESParser.h:131
BALL::GAFFCESParser::APSMatcher::DELOCALIZED_BOND
Definition: GAFFCESParser.h:62
BALL::GAFFCESParser::APSMatcher::APSType
APSType
Definition: GAFFCESParser.h:31
BALL::GAFFCESParser::CESwildcardsConnectionPredicate::setNumberOfPartners
void setNumberOfPartners(Size number)
BALL::GAFFCESParser::CESelementConnectionPredicate::match
bool match(Atom &atom)
BALL::GAFFCESParser::CESwildcardsPredicate::wildcards_
CESwildcards wildcards_
Definition: GAFFCESParser.h:249
BALL::GAFFCESParser::CESwildcardsPredicate::match
bool match(Atom &atom)
BALL::GAFFCESParser::TruePredicate
Definition: GAFFCESParser.h:276
BALL::GAFFCESParser::APSMatcher::hasBond_
bool hasBond_(Atom *atom, Atom *predecessor, int bond_type, int feature_number)
BALL::GAFFCESParser::APSMatcher::IS_5_RING_ATOM
Definition: GAFFCESParser.h:42
BALL::GAFFCESParser::element_symbols_
std::set< String > element_symbols_
Definition: GAFFCESParser.h:326
BALL::GAFFCESParser::CESwildcardsPredicate::getWildcards
CESwildcards getWildcards()
BALL::GAFFCESParser::CESPredicate::clear
void clear()
BALL::GAFFCESParser::APSMatcher::IS_8_RING_ATOM
Definition: GAFFCESParser.h:45
BALL::GAFFCESParser::CESPredicate::parser_
GAFFCESParser * parser_
Definition: GAFFCESParser.h:186
BALL::GAFFCESParser::CESwildcardsConnectionPredicate::setWildcards
void setWildcards(String new_wildcard)
BALL::GAFFCESParser::APSMatcher::AROMATIC_BOND_TO_PARENT
Definition: GAFFCESParser.h:66
BALL::GAFFCESParser::CESPredicate::addNewOR
void addNewOR(APSMatcher::APSType aps, int feature_number)
BALL::GAFFCESParser::CESwildcardsPredicate
Definition: GAFFCESParser.h:233
BALL::GAFFCESParser::APSMatcher::APSTerm::type
APSType type
Definition: GAFFCESParser.h:79
BALL::GAFFCESParser::endChildPredicates
void endChildPredicates()
BALL::GAFFCESParser::APSMatcher::IS_NON_RING_ATOM
Definition: GAFFCESParser.h:34
BALL::GAFFCESParser::APSMatcher::NO_PURE_DOUBLE_BOND_TO_PARENT
Definition: GAFFCESParser.h:55
BALL::GAFFCESParser::APSMatcher::NO_AROMATIC_BOND_TO_PARENT
Definition: GAFFCESParser.h:67
BALL::GAFFCESParser::CESPredicate
Definition: GAFFCESParser.h:125
BALL_SIZE_TYPE
BALL::GAFFCESParser::APSMatcher::PURE_SINGLE_BOND_TO_PARENT
Definition: GAFFCESParser.h:48
BALL::GAFFCESParser::TruePredicate::match
bool match(Atom &)
Definition: GAFFCESParser.h:284
BALL::GAFFCESParser::CESPredicate::addNewAND
void addNewAND()
BALL::GAFFCESParser::parse
bool parse(const String &cesstring)
BALL::GAFFCESParser::current_root_predicate
CESPredicate * current_root_predicate
Definition: GAFFCESParser.h:308
BALL::GAFFCESParser::APSMatcher::APSMatcher
APSMatcher()
BALL::GAFFCESParser::state
static State state
Definition: GAFFCESParser.h:301
BALL::GAFFCESParser::CESPredicate::addTruePredicate
void addTruePredicate() const
BALL::GAFFCESParser::APSMatcher::operator()
bool operator()(Atom &atom, Atom &predecessor)
BALL::GAFFCESParser::startChildPredicates
void startChildPredicates()
BALL::GAFFCESParser::State::current_aps_type
APSMatcher::APSType current_aps_type
Definition: GAFFCESParser.h:118
BALL::GAFFCESParser::CESPredicate::CESPredicate
CESPredicate(GAFFCESParser *parser)
Definition: GAFFCESParser.h:134
BALL::GAFFCESParser::read_start_
Position read_start_
Definition: GAFFCESParser.h:328
BALL::GAFFCESParser::CESelementPredicate::~CESelementPredicate
~CESelementPredicate()
BALL::Atom
Definition: atom.h:87
BALL::GAFFCESParser::APSMatcher::APSTerm
Definition: GAFFCESParser.h:71
BALL::GAFFCESParser::CESelementConnectionPredicate::elementName_
String elementName_
Definition: GAFFCESParser.h:209
BALL::GAFFCESParser::CESelementConnectionPredicate::getNumberOfPartners
Size getNumberOfPartners()
BALL::GAFFCESParser::APSMatcher::DOUBLE_BOND
Definition: GAFFCESParser.h:56
BALL
Definition: constants.h:12
BALL::GAFFCESParser::CESelementPredicate::match
bool match(Atom &atom)
BALL::GAFFCESParser::CESPredicate::atom_to_test
Atom * atom_to_test
Definition: GAFFCESParser.h:182
BALL::GAFFCESParser::APSMatcher::TRIPLE_BOND
Definition: GAFFCESParser.h:59
BALL::GAFFCESParser::APSMatcher::isNRingAtom
int isNRingAtom(Size size, Atom &atom)
BALL::String
Definition: string.h:56
BALL::GAFFCESParser::APSMatcher::AROMATIC_BOND
Definition: GAFFCESParser.h:65
BALL::GAFFCESParser::APSMatcher::printAPS
String printAPS()
BALL::GAFFCESParser::CESwildcardsPredicate::setWildcards
void setWildcards(String new_wildcard)
BALL::GAFFCESParser::match
bool match(Atom &atom) const
BALL::GAFFCESParser::CESPredicate::XA
Definition: GAFFCESParser.h:131
BALL::GAFFCESParser::CESPredicate::aps_matcher
APSMatcher aps_matcher
Definition: GAFFCESParser.h:174
BALL::GAFFCESParser::APSMatcher::isNonRingAtom
bool isNonRingAtom(Atom &atom)
BALL::GAFFCESParser::CESwildcardsConnectionPredicate::getNumberOfPartners
Size getNumberOfPartners()
BALL::GAFFCESParser::APSMatcher::IS_3_RING_ATOM
Definition: GAFFCESParser.h:40
BALL::GAFFCESParser::GAFFCESatomMatcher
bool GAFFCESatomMatcher(Atom &atom, const String &cesstring)
BALL::GAFFCESParser::APSMatcher::DELOCALIZED_BOND_TO_PARENT
Definition: GAFFCESParser.h:63
BALL::GAFFCESParser::CESPredicate::alreadySeenThisAtom
bool alreadySeenThisAtom(Atom *atom)
BALL::GAFFCESParser::root
TruePredicate root
The root atom has no CESPredicate to test, so it's considered always true.
Definition: GAFFCESParser.h:304
BALL::GAFFCESParser::APSMatcher::IS_OTHER_RING
Definition: GAFFCESParser.h:39
BALL::GAFFCESParser::CESelementPredicate
Definition: GAFFCESParser.h:213
BALL::GAFFCESParser::CESwildcardsConnectionPredicate::wildcards_
CESwildcards wildcards_
Definition: GAFFCESParser.h:271
BALL::GAFFCESParser::CESwildcardsPredicate::matchWildcards
bool matchWildcards(Atom &atom)
BALL::GAFFCESParser::getElementSymbols
const std::set< String > & getElementSymbols()
BALL::GAFFCESParser::APSMatcher::SINGLE_BOND_TO_PARENT
Definition: GAFFCESParser.h:51
BALL::GAFFCESParser::read
Size read(char *buf, Size max_size)
BALL::GAFFCESParser::CESelementPredicate::getElementName
String getElementName()
BALL::GAFFCESParser::APSMatcher::IS_PURELY_AROMATIC
Definition: GAFFCESParser.h:37
BALL::GAFFCESParser::current_predicate
CESPredicate * current_predicate
Definition: GAFFCESParser.h:310
atom.h
BALL::GAFFCESParser::CESPredicate::children
std::vector< CESPredicate * > children
Definition: GAFFCESParser.h:177
BALL::GAFFCESParser::CESelementPredicate::elementName_
String elementName_
Definition: GAFFCESParser.h:229
BALL::GAFFCESParser::APSMatcher::SINGLE_BOND
Definition: GAFFCESParser.h:50
string.h
BALL::GAFFCESParser::CESwildcardsConnectionPredicate::matchWildcards
bool matchWildcards(Atom &atom)
BALL::GAFFCESParser::CESPredicate::addCESelementPredicate
void addCESelementPredicate(String name)
BALL::GAFFCESParser::CESwildcardsConnectionPredicate
Definition: GAFFCESParser.h:253
BALL::GAFFCESParser::APSMatcher::NO_SINGLE_BOND_TO_PARENT
Definition: GAFFCESParser.h:52
BALL::GAFFCESParser::APSMatcher::NO_PURE_SINGLE_BOND_TO_PARENT
Definition: GAFFCESParser.h:49
BALL::GAFFCESParser::APSMatcher::checkGAFFProperties
bool checkGAFFProperties(Atom &atom, Atom &predecessor, APSTerm aps)
BALL::GAFFCESParser::CESPredicate::operator()
virtual bool operator()(Atom &atom)
BALL::GAFFCESParser::CESelementConnectionPredicate
Definition: GAFFCESParser.h:190
BALL::GAFFCESParser::CESelementConnectionPredicate::numberOfPartners_
Size numberOfPartners_
Definition: GAFFCESParser.h:208
BALL::GAFFCESParser::CESPredicate::getStringToWildcard
std::map< String, CESwildcards > getStringToWildcard()
BALL::GAFFCESParser::CESPredicate::addCESelementConnectionPredicate
void addCESelementConnectionPredicate(Size partners, String name)
BALL::GAFFCESParser::APSMatcher::PURE_DOUBLE_BOND_TO_PARENT
Definition: GAFFCESParser.h:54
BALL::GAFFCESParser::CESPredicate::addCESwildcardsPredicate
void addCESwildcardsPredicate(String wildcard)
BALL::GAFFCESParser::State
Definition: GAFFCESParser.h:113
BALL::GAFFCESParser::APSMatcher::aps_terms
std::vector< std::vector< APSTerm > > aps_terms
Definition: GAFFCESParser.h:107
BALL::GAFFCESParser::CESelementConnectionPredicate::CESelementConnectionPredicate
CESelementConnectionPredicate(GAFFCESParser *parser)
Definition: GAFFCESParser.h:193
BALL::GAFFCESParser::APSMatcher::IS_9_RING_ATOM
Definition: GAFFCESParser.h:46
BALL::GAFFCESParser::cesstring_
String cesstring_
Definition: GAFFCESParser.h:324
BALL::GAFFCESParser::TruePredicate::~TruePredicate
~TruePredicate()
Definition: GAFFCESParser.h:282
BALL::GAFFCESParser::State::current_parser
GAFFCESParser * current_parser
Definition: GAFFCESParser.h:116
BALL::GAFFCESParser::CESPredicate::parent
CESPredicate * parent
Definition: GAFFCESParser.h:179
BALL::GAFFCESParser::APSMatcher::NO_TRIPLE_BOND_TO_PARENT
Definition: GAFFCESParser.h:61
BALL::GAFFCESParser::State::feature_number
int feature_number
Definition: GAFFCESParser.h:119
BALL::GAFFCESParser::CESPredicate::addCESwildcardsConnectionPredicate
void addCESwildcardsConnectionPredicate(String wildcard, Size partners)
BALL::GAFFCESParser::CESPredicate::stringToWildcard_
std::map< String, CESwildcards > stringToWildcard_
Definition: GAFFCESParser.h:185
BALL::GAFFCESParser::CESwildcardsConnectionPredicate::CESwildcardsConnectionPredicate
CESwildcardsConnectionPredicate(GAFFCESParser *parser)
Definition: GAFFCESParser.h:256
BALL::GAFFCESParser::APSMatcher::IS_PLANAR
Definition: GAFFCESParser.h:35
BALL::GAFFCESParser::CESelementPredicate::setElementName
void setElementName(String name)
BALL::GAFFCESParser::CESwildcardsConnectionPredicate::getWildcards
CESwildcards getWildcards()
BALL::GAFFCESParser::APSMatcher::APS_TRUE
Definition: GAFFCESParser.h:68
BALL::GAFFCESParser::APSMatcher::NO_DELOCALIZED_BOND_TO_PARENT
Definition: GAFFCESParser.h:64
BALL::GAFFCESParser::APSMatcher::isRingAtom
bool isRingAtom(Atom &atom)
BALL::GAFFCESParser::~GAFFCESParser
~GAFFCESParser()
BALL::GAFFCESParser::CESPredicate::match
virtual bool match(Atom &)
Definition: GAFFCESParser.h:162
BALL::GAFFCESParser::CESwildcardsPredicate::~CESwildcardsPredicate
~CESwildcardsPredicate()
BALL::GAFFCESParser::TruePredicate::TruePredicate
TruePredicate(GAFFCESParser *parser)
Definition: GAFFCESParser.h:279
BALL::GAFFCESParser::APSMatcher::IS_7_RING_ATOM
Definition: GAFFCESParser.h:44
BALL::GAFFCESParser::APSMatcher::IS_PLANAR_WITH_DB_TO_NR
Definition: GAFFCESParser.h:36
BALL::GAFFCESParser::CESPredicate::XC
Definition: GAFFCESParser.h:131
BALL::GAFFCESParser::APSMatcher
Definition: GAFFCESParser.h:27
BALL::GAFFCESParser::CESPredicate::CESwildcards
CESwildcards
Definition: GAFFCESParser.h:129
BALL::GAFFCESParser::APSMatcher::IS_4_RING_ATOM
Definition: GAFFCESParser.h:41
BALL::GAFFCESParser::APSMatcher::IS_PURELY_ALIPHATIC
Definition: GAFFCESParser.h:38
BALL::GAFFCESParser::CESelementConnectionPredicate::getElementName
String getElementName()
BALL::GAFFCESParser::CESPredicate::~CESPredicate
virtual ~CESPredicate()
BALL::GAFFCESParser::GAFFCESParser
GAFFCESParser()
BALL::GAFFCESParser::CESelementConnectionPredicate::setNumberOfPartners
void setNumberOfPartners(Size number)
BALL::GAFFCESParser::root_predicate
CESPredicate * root_predicate
Definition: GAFFCESParser.h:306