Lucene++ - a full-featured, c++ search engine
API Documentation


StandardTokenizerImpl.h
Go to the documentation of this file.
1 // Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
6 
7 #ifndef STANDARDTOKENIZERIMPL_H
8 #define STANDARDTOKENIZERIMPL_H
9 
10 #include "LuceneObject.h"
11 
12 namespace Lucene {
13 
15 public:
19 
21 
23 
24 protected:
26  static const int32_t ZZ_BUFFERSIZE;
27 
29  static CharArray _ZZ_CMAP;
30  static const wchar_t ZZ_CMAP_PACKED[];
31  static const int32_t ZZ_CMAP_LENGTH;
32  static const int32_t ZZ_CMAP_PACKED_LENGTH;
33 
35  static void ZZ_CMAP_INIT();
36  static const wchar_t* ZZ_CMAP();
37 
39  static IntArray _ZZ_ACTION;
40  static const wchar_t ZZ_ACTION_PACKED_0[];
41  static const int32_t ZZ_ACTION_LENGTH;
42  static const int32_t ZZ_ACTION_PACKED_LENGTH;
43 
45  static void ZZ_ACTION_INIT();
46  static const int32_t* ZZ_ACTION();
47 
49  static IntArray _ZZ_ROWMAP;
50  static const wchar_t ZZ_ROWMAP_PACKED_0[];
51  static const int32_t ZZ_ROWMAP_LENGTH;
52  static const int32_t ZZ_ROWMAP_PACKED_LENGTH;
53 
55  static void ZZ_ROWMAP_INIT();
56  static const int32_t* ZZ_ROWMAP();
57 
59  static IntArray _ZZ_TRANS;
60  static const wchar_t ZZ_TRANS_PACKED_0[];
61  static const int32_t ZZ_TRANS_LENGTH;
62  static const int32_t ZZ_TRANS_PACKED_LENGTH;
63 
65  static void ZZ_TRANS_INIT();
66  static const int32_t* ZZ_TRANS();
67 
68  // error codes
69  static const int32_t ZZ_UNKNOWN_ERROR;
70  static const int32_t ZZ_NO_MATCH;
71  static const int32_t ZZ_PUSHBACK_2BIG;
72 
73  static const wchar_t* ZZ_ERROR_MSG[];
74 
76  static IntArray _ZZ_ATTRIBUTE;
77  static const wchar_t ZZ_ATTRIBUTE_PACKED_0[];
78  static const int32_t ZZ_ATTRIBUTE_LENGTH;
79  static const int32_t ZZ_ATTRIBUTE_PACKED_LENGTH;
80 
82  static void ZZ_ATTRIBUTE_INIT();
83  static const int32_t* ZZ_ATTRIBUTE();
84 
87 
89  int32_t zzState;
90 
92  int32_t zzLexicalState;
93 
95  CharArray zzBuffer;
96 
98  int32_t zzMarkedPos;
99 
101  int32_t zzPushbackPos;
102 
104  int32_t zzCurrentPos;
105 
107  int32_t zzStartRead;
108 
110  int32_t zzEndRead;
111 
113  int32_t yyline;
114 
116  int32_t _yychar;
117 
119  int32_t yycolumn;
120 
122  bool zzAtBOL;
123 
125  bool zzAtEOF;
126 
127 public:
129  static const int32_t YYEOF;
130 
132  static const int32_t YYINITIAL;
133 
134 public:
135  int32_t yychar();
136 
138  void reset(const ReaderPtr& r);
139 
141  void getText(const TokenPtr& t);
142 
144  void getText(const TermAttributePtr& t);
145 
147  void yyclose();
148 
155  void yyreset(const ReaderPtr& reader);
156 
158  int32_t yystate();
159 
162  void yybegin(int32_t newState);
163 
165  String yytext();
166 
172  wchar_t yycharat(int32_t pos);
173 
175  int32_t yylength();
176 
181  void yypushback(int32_t number);
182 
185  int32_t getNextToken();
186 
187 protected:
189  bool zzRefill();
190 
200  void zzScanError(int32_t errorCode);
201 };
202 
203 }
204 
205 #endif
Lucene::StandardTokenizerImpl::ZZ_ROWMAP
static const int32_t * ZZ_ROWMAP()
Lucene::StandardTokenizerImpl::getText
void getText(const TokenPtr &t)
Fills Lucene token with the current token text.
Lucene::StandardTokenizerImpl::ZZ_ROWMAP_LENGTH
static const int32_t ZZ_ROWMAP_LENGTH
Definition: StandardTokenizerImpl.h:51
Lucene::StandardTokenizerImpl
Definition: StandardTokenizerImpl.h:14
Lucene::StandardTokenizerImpl::ZZ_ACTION
static const int32_t * ZZ_ACTION()
Lucene::StandardTokenizerImpl::ZZ_ATTRIBUTE_PACKED_0
static const wchar_t ZZ_ATTRIBUTE_PACKED_0[]
Definition: StandardTokenizerImpl.h:77
LUCENE_CLASS
#define LUCENE_CLASS(Name)
Definition: LuceneObject.h:24
Lucene::StandardTokenizerImpl::ZZ_NO_MATCH
static const int32_t ZZ_NO_MATCH
Definition: StandardTokenizerImpl.h:70
Lucene::StandardTokenizerImpl::yylength
int32_t yylength()
Returns the length of the matched text region.
Lucene::StandardTokenizerImpl::yyclose
void yyclose()
Closes the input stream.
Lucene::StandardTokenizerImpl::yybegin
void yybegin(int32_t newState)
Enters a new lexical state.
Lucene::StandardTokenizerImpl::zzAtEOF
bool zzAtEOF
zzAtEOF == true if the scanner is at the EOF
Definition: StandardTokenizerImpl.h:125
Lucene::ReaderPtr
boost::shared_ptr< Reader > ReaderPtr
Definition: LuceneTypes.h:547
Lucene::StandardTokenizerImpl::zzState
int32_t zzState
The current state of the DFA.
Definition: StandardTokenizerImpl.h:89
Lucene::StandardTokenizerImpl::zzMarkedPos
int32_t zzMarkedPos
The text position at the last accepting state.
Definition: StandardTokenizerImpl.h:98
Lucene::StandardTokenizerImpl::ZZ_ROWMAP_PACKED_LENGTH
static const int32_t ZZ_ROWMAP_PACKED_LENGTH
Definition: StandardTokenizerImpl.h:52
Lucene::StandardTokenizerImpl::ZZ_ERROR_MSG
static const wchar_t * ZZ_ERROR_MSG[]
Definition: StandardTokenizerImpl.h:73
Lucene::StandardTokenizerImpl::zzEndRead
int32_t zzEndRead
EndRead marks the last character in the buffer, that has been read from input.
Definition: StandardTokenizerImpl.h:110
Lucene::StandardTokenizerImpl::ZZ_CMAP_INIT
static void ZZ_CMAP_INIT()
Translates characters to character classes.
Lucene::StandardTokenizerImpl::yystate
int32_t yystate()
Returns the current lexical state.
Lucene::StandardTokenizerImpl::zzPushbackPos
int32_t zzPushbackPos
The text position at the last state to be included in yytext.
Definition: StandardTokenizerImpl.h:101
Lucene::StandardTokenizerImpl::yycolumn
int32_t yycolumn
The number of characters from the last newline up to the start of the matched text.
Definition: StandardTokenizerImpl.h:119
Lucene::StandardTokenizerImpl::ZZ_ACTION_LENGTH
static const int32_t ZZ_ACTION_LENGTH
Definition: StandardTokenizerImpl.h:41
Lucene::StandardTokenizerImpl::zzBuffer
CharArray zzBuffer
This buffer contains the current text to be matched and is the source of the yytext() string.
Definition: StandardTokenizerImpl.h:95
Lucene::StandardTokenizerImpl::ZZ_TRANS
static const int32_t * ZZ_TRANS()
Lucene::StandardTokenizerImpl::ZZ_CMAP
static const wchar_t * ZZ_CMAP()
Lucene::StandardTokenizerImpl::ZZ_ROWMAP_INIT
static void ZZ_ROWMAP_INIT()
Translates a state to a row index in the transition table.
Lucene::StandardTokenizerImpl::_ZZ_CMAP
static CharArray _ZZ_CMAP
Translates characters to character classes.
Definition: StandardTokenizerImpl.h:29
Lucene::StandardTokenizerImpl::zzLexicalState
int32_t zzLexicalState
The current lexical state.
Definition: StandardTokenizerImpl.h:92
Lucene::TokenPtr
boost::shared_ptr< Token > TokenPtr
Definition: LuceneTypes.h:59
Lucene::StandardTokenizerImpl::zzCurrentPos
int32_t zzCurrentPos
The current text position in the buffer.
Definition: StandardTokenizerImpl.h:104
Lucene::StandardTokenizerImpl::ZZ_ACTION_INIT
static void ZZ_ACTION_INIT()
Translates DFA states to action switch labels.
Lucene::StandardTokenizerImpl::zzScanError
void zzScanError(int32_t errorCode)
Reports an error that occurred while scanning.
Lucene
Definition: AbstractAllTermDocs.h:12
Lucene::StandardTokenizerImpl::ZZ_CMAP_LENGTH
static const int32_t ZZ_CMAP_LENGTH
Definition: StandardTokenizerImpl.h:31
Lucene::StandardTokenizerImpl::ZZ_ATTRIBUTE_LENGTH
static const int32_t ZZ_ATTRIBUTE_LENGTH
Definition: StandardTokenizerImpl.h:78
Lucene::LuceneObject
Base class for all Lucene classes.
Definition: LuceneObject.h:31
Lucene::StandardTokenizerImpl::yypushback
void yypushback(int32_t number)
Pushes the specified amount of characters back into the input stream.
Lucene::StandardTokenizerImpl::ZZ_CMAP_PACKED_LENGTH
static const int32_t ZZ_CMAP_PACKED_LENGTH
Definition: StandardTokenizerImpl.h:32
Lucene::StandardTokenizerImpl::ZZ_ACTION_PACKED_0
static const wchar_t ZZ_ACTION_PACKED_0[]
Definition: StandardTokenizerImpl.h:40
Lucene::StandardTokenizerImpl::ZZ_ATTRIBUTE_PACKED_LENGTH
static const int32_t ZZ_ATTRIBUTE_PACKED_LENGTH
Definition: StandardTokenizerImpl.h:79
Lucene::StandardTokenizerImpl::zzReader
ReaderPtr zzReader
The input device.
Definition: StandardTokenizerImpl.h:86
Lucene::StandardTokenizerImpl::_ZZ_ROWMAP
static IntArray _ZZ_ROWMAP
Translates a state to a row index in the transition table.
Definition: StandardTokenizerImpl.h:49
Lucene::StandardTokenizerImpl::ZZ_UNKNOWN_ERROR
static const int32_t ZZ_UNKNOWN_ERROR
Definition: StandardTokenizerImpl.h:69
Lucene::StandardTokenizerImpl::ZZ_TRANS_LENGTH
static const int32_t ZZ_TRANS_LENGTH
Definition: StandardTokenizerImpl.h:61
Lucene::StandardTokenizerImpl::ZZ_ROWMAP_PACKED_0
static const wchar_t ZZ_ROWMAP_PACKED_0[]
Definition: StandardTokenizerImpl.h:50
Lucene::StandardTokenizerImpl::YYINITIAL
static const int32_t YYINITIAL
Lexical states.
Definition: StandardTokenizerImpl.h:132
Lucene::StandardTokenizerImpl::ZZ_ATTRIBUTE
static const int32_t * ZZ_ATTRIBUTE()
Lucene::StandardTokenizerImpl::yyreset
void yyreset(const ReaderPtr &reader)
Resets the scanner to read from a new input stream. Does not close the old reader.
Lucene::StandardTokenizerImpl::reset
void reset(const ReaderPtr &r)
Resets the Tokenizer to a new Reader.
Lucene::StandardTokenizerImpl::ZZ_TRANS_INIT
static void ZZ_TRANS_INIT()
The transition table of the DFA.
Lucene::StandardTokenizerImpl::StandardTokenizerImpl
StandardTokenizerImpl(const ReaderPtr &in)
Creates a new scanner.
Lucene::StandardTokenizerImpl::ZZ_TRANS_PACKED_0
static const wchar_t ZZ_TRANS_PACKED_0[]
Definition: StandardTokenizerImpl.h:60
Lucene::StandardTokenizerImpl::yycharat
wchar_t yycharat(int32_t pos)
Returns the character at position pos from the matched text.
Lucene::StandardTokenizerImpl::_yychar
int32_t _yychar
The number of characters up to the start of the matched text.
Definition: StandardTokenizerImpl.h:116
Lucene::TermAttributePtr
boost::shared_ptr< TermAttribute > TermAttributePtr
Definition: LuceneTypes.h:58
Lucene::StandardTokenizerImpl::_ZZ_ACTION
static IntArray _ZZ_ACTION
Translates DFA states to action switch labels.
Definition: StandardTokenizerImpl.h:39
Lucene::StandardTokenizerImpl::yyline
int32_t yyline
Number of newlines encountered up to the start of the matched text.
Definition: StandardTokenizerImpl.h:113
Lucene::StandardTokenizerImpl::zzAtBOL
bool zzAtBOL
zzAtBOL == true if the scanner is currently at the beginning of a line
Definition: StandardTokenizerImpl.h:122
Lucene::StandardTokenizerImpl::ZZ_CMAP_PACKED
static const wchar_t ZZ_CMAP_PACKED[]
Definition: StandardTokenizerImpl.h:30
Lucene::StandardTokenizerImpl::ZZ_ATTRIBUTE_INIT
static void ZZ_ATTRIBUTE_INIT()
ZZ_ATTRIBUTE[aState] contains the attributes of state aState.
Lucene::StandardTokenizerImpl::zzStartRead
int32_t zzStartRead
StartRead marks the beginning of the yytext() string in the buffer.
Definition: StandardTokenizerImpl.h:107
Lucene::StandardTokenizerImpl::getNextToken
int32_t getNextToken()
Resumes scanning until the next regular expression is matched, the end of input is encountered or an ...
Lucene::StandardTokenizerImpl::YYEOF
static const int32_t YYEOF
This character denotes the end of file.
Definition: StandardTokenizerImpl.h:129
Lucene::StandardTokenizerImpl::yychar
int32_t yychar()
Lucene::StandardTokenizerImpl::_ZZ_TRANS
static IntArray _ZZ_TRANS
The transition table of the DFA.
Definition: StandardTokenizerImpl.h:59
Lucene::StandardTokenizerImpl::~StandardTokenizerImpl
virtual ~StandardTokenizerImpl()
Lucene::StandardTokenizerImpl::zzRefill
bool zzRefill()
Refills the input buffer.
Lucene::StandardTokenizerImpl::ZZ_ACTION_PACKED_LENGTH
static const int32_t ZZ_ACTION_PACKED_LENGTH
Definition: StandardTokenizerImpl.h:42
Lucene::StandardTokenizerImpl::ZZ_BUFFERSIZE
static const int32_t ZZ_BUFFERSIZE
Initial size of the lookahead buffer.
Definition: StandardTokenizerImpl.h:22
Lucene::StandardTokenizerImpl::yytext
String yytext()
Returns the text matched by the current regular expression.
Lucene::StandardTokenizerImpl::ZZ_PUSHBACK_2BIG
static const int32_t ZZ_PUSHBACK_2BIG
Definition: StandardTokenizerImpl.h:71
Lucene::StandardTokenizerImpl::getText
void getText(const TermAttributePtr &t)
Fills TermAttribute with the current token text.
Lucene::StandardTokenizerImpl::ZZ_TRANS_PACKED_LENGTH
static const int32_t ZZ_TRANS_PACKED_LENGTH
Definition: StandardTokenizerImpl.h:62
LuceneObject.h
Lucene::StandardTokenizerImpl::_ZZ_ATTRIBUTE
static IntArray _ZZ_ATTRIBUTE
ZZ_ATTRIBUTE[aState] contains the attributes of state aState.
Definition: StandardTokenizerImpl.h:76

clucene.sourceforge.net