Lucene++ - a full-featured, c++ search engine
API Documentation


Token.h
Go to the documentation of this file.
1 // Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
6 
7 #ifndef TOKEN_H
8 #define TOKEN_H
9 
10 #include "Attribute.h"
11 #include "AttributeSource.h"
12 
13 namespace Lucene {
14 
76 class LPPAPI Token : public Attribute {
77 public:
79  Token();
80 
84  Token(int32_t start, int32_t end);
85 
90  Token(int32_t start, int32_t end, const String& type);
91 
96  Token(int32_t start, int32_t end, int32_t flags);
97 
103  Token(const String& text, int32_t start, int32_t end);
104 
111  Token(const String& text, int32_t start, int32_t end, const String& type);
112 
119  Token(const String& text, int32_t start, int32_t end, int32_t flags);
120 
122  Token(CharArray startTermBuffer, int32_t termBufferOffset, int32_t termBufferLength, int32_t start, int32_t end);
123 
124  virtual ~Token();
125 
127 
128 public:
129  static const String& DEFAULT_TYPE();
130 
131 protected:
132  static const int32_t MIN_BUFFER_SIZE;
133 
134  CharArray _termBuffer;
135  int32_t _termLength;
136  int32_t _startOffset;
137  int32_t _endOffset;
138  String _type;
139  int32_t flags;
142 
143 public:
163  virtual void setPositionIncrement(int32_t positionIncrement);
164 
167  virtual int32_t getPositionIncrement();
168 
174  virtual String term();
175 
180  virtual void setTermBuffer(const wchar_t* buffer, int32_t offset, int32_t length);
181 
184  virtual void setTermBuffer(const String& buffer);
185 
190  virtual void setTermBuffer(const String& buffer, int32_t offset, int32_t length);
191 
195  virtual CharArray termBuffer();
196 
198  virtual wchar_t* termBufferArray();
199 
206  virtual CharArray resizeTermBuffer(int32_t newSize);
207 
209  virtual int32_t termLength();
210 
215  virtual void setTermLength(int32_t length);
216 
222  virtual int32_t startOffset();
223 
226  virtual void setStartOffset(int32_t offset);
227 
230  virtual int32_t endOffset();
231 
234  virtual void setEndOffset(int32_t offset);
235 
238  virtual void setOffset(int32_t startOffset, int32_t endOffset);
239 
241  virtual String type();
242 
245  virtual void setType(const String& type);
246 
252  virtual int32_t getFlags();
253 
255  virtual void setFlags(int32_t flags);
256 
259 
261  virtual void setPayload(const PayloadPtr& payload);
262 
263  virtual String toString();
264 
266  virtual void clear();
267 
269 
272  TokenPtr clone(CharArray newTermBuffer, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset);
273 
274  virtual bool equals(const LuceneObjectPtr& other);
275  virtual int32_t hashCode();
276 
280  TokenPtr reinit(CharArray newTermBuffer, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset, const String& newType);
281 
285  TokenPtr reinit(CharArray newTermBuffer, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset);
286 
290  TokenPtr reinit(const String& newTerm, int32_t newStartOffset, int32_t newEndOffset, const String& newType);
291 
295  TokenPtr reinit(const String& newTerm, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset, const String& newType);
296 
300  TokenPtr reinit(const String& newTerm, int32_t newStartOffset, int32_t newEndOffset);
301 
305  TokenPtr reinit(const String& newTerm, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset);
306 
308  void reinit(const TokenPtr& prototype);
309 
311  void reinit(const TokenPtr& prototype, const String& newTerm);
312 
314  void reinit(const TokenPtr& prototype, CharArray newTermBuffer, int32_t offset, int32_t length);
315 
316  virtual void copyTo(const AttributePtr& target);
317 
320 
321 protected:
323  void ConstructToken(int32_t start, int32_t end, const String& type, int32_t flags);
324 
328  void growTermBuffer(int32_t newSize);
329 
331 
334 };
335 
339 public:
342 
344 
345 protected:
347 
348 public:
349  virtual AttributePtr createAttributeInstance(const String& className);
350  virtual bool equals(const LuceneObjectPtr& other);
351  virtual int32_t hashCode();
352 };
353 
354 }
355 
356 #endif
Lucene::Token::setPayload
virtual void setPayload(const PayloadPtr &payload)
Sets this Token's payload.
Lucene::TokenAttributeFactory::equals
virtual bool equals(const LuceneObjectPtr &other)
Return whether two objects are equal.
Lucene::Token::setType
virtual void setType(const String &type)
Set the lexical type.
Lucene::Token::reinit
TokenPtr reinit(CharArray newTermBuffer, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset)
Shorthand for calling clear, setTermBuffer(char[], int, int), setStartOffset, setEndOffset,...
Lucene::Token::Token
Token(const String &text, int32_t start, int32_t end, const String &type)
Constructs a Token with the given term text, start and end offsets and type. NOTE: for better indexin...
Lucene::Token::clone
TokenPtr clone(CharArray newTermBuffer, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset)
Makes a clone, but replaces the term buffer and start/end offset in the process. This is more efficie...
Lucene::Token::Token
Token(int32_t start, int32_t end, int32_t flags)
Constructs a Token with null text and start and end offsets plus flags.
Lucene::Token::setTermBuffer
virtual void setTermBuffer(const wchar_t *buffer, int32_t offset, int32_t length)
Copies the contents of buffer, starting at offset for length characters, into the termBuffer array.
Lucene::Token::setOffset
virtual void setOffset(int32_t startOffset, int32_t endOffset)
Set the starting and ending offset.
LUCENE_CLASS
#define LUCENE_CLASS(Name)
Definition: LuceneObject.h:24
Lucene::Token::equals
virtual bool equals(const LuceneObjectPtr &other)
All values used for computation of hashCode() should be checked here for equality.
Lucene::Token::setFlags
virtual void setFlags(int32_t flags)
Lucene::Token::initTermBuffer
void initTermBuffer()
Lucene::TokenAttributeFactory::hashCode
virtual int32_t hashCode()
Return hash code for this object.
Lucene::LuceneObjectPtr
boost::shared_ptr< LuceneObject > LuceneObjectPtr
Definition: LuceneTypes.h:539
Lucene::Token::Token
Token(const String &text, int32_t start, int32_t end, int32_t flags)
Constructs a Token with the given term text, start and end offsets and flags. NOTE: for better indexi...
Lucene::Token::reinit
TokenPtr reinit(const String &newTerm, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset)
Shorthand for calling clear, setTermBuffer(String, int, int), setStartOffset, setEndOffset,...
Lucene::Attribute
Base class for Attributes that can be added to a AttributeSource.
Definition: Attribute.h:18
Lucene::Token::reinit
void reinit(const TokenPtr &prototype)
Copy the prototype token's fields into this one. Note: Payloads are shared.
Lucene::TokenAttributeFactory
Creates a TokenAttributeFactory returning Token as instance for the basic attributes and for all othe...
Definition: Token.h:338
Lucene::Token::growTermBuffer
void growTermBuffer(int32_t newSize)
Allocates a buffer char[] of at least newSize, without preserving the existing content....
Lucene::Token::type
virtual String type()
Returns this Token's lexical type. Defaults to "word".
Lucene::Token::getPayload
virtual PayloadPtr getPayload()
Returns this Token's payload.
Lucene::AttributeFactory
Definition: AttributeSource.h:14
Lucene::Token::reinit
TokenPtr reinit(CharArray newTermBuffer, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset, const String &newType)
Shorthand for calling clear, setTermBuffer(char[], int, int), setStartOffset, setEndOffset,...
Lucene::Token::setPositionIncrement
virtual void setPositionIncrement(int32_t positionIncrement)
Set the position increment. This determines the position of this token relative to the previous Token...
Lucene::Token::reinit
void reinit(const TokenPtr &prototype, CharArray newTermBuffer, int32_t offset, int32_t length)
Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared.
Lucene::TokenPtr
boost::shared_ptr< Token > TokenPtr
Definition: LuceneTypes.h:59
Lucene::Token::Token
Token(const String &text, int32_t start, int32_t end)
Constructs a Token with the given term text, start and end offsets. The type defaults to "word....
Lucene::Token::_type
String _type
Definition: Token.h:138
Lucene::Token::termBuffer
virtual CharArray termBuffer()
Returns the internal termBuffer character array which you can then directly alter....
Lucene
Definition: AbstractAllTermDocs.h:12
Lucene::TokenAttributeFactory::TokenAttributeFactory
TokenAttributeFactory(const AttributeFactoryPtr &delegate)
Lucene::Token::setEndOffset
virtual void setEndOffset(int32_t offset)
Set the ending offset.
Lucene::Token::_termLength
int32_t _termLength
Definition: Token.h:135
Lucene::Token::term
virtual String term()
Returns the Token's term text.
Lucene::Token::getPositionIncrement
virtual int32_t getPositionIncrement()
Returns the position increment of this Token.
AttributeSource.h
Lucene::Token::hashCode
virtual int32_t hashCode()
Subclasses must implement this method and should compute a hashCode similar to this:
Lucene::Token::resizeTermBuffer
virtual CharArray resizeTermBuffer(int32_t newSize)
Grows the termBuffer to at least size newSize, preserving the existing content. Note: If the next ope...
Lucene::Token::MIN_BUFFER_SIZE
static const int32_t MIN_BUFFER_SIZE
Definition: Token.h:132
Lucene::Token::reinit
TokenPtr reinit(const String &newTerm, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset, const String &newType)
Shorthand for calling clear, setTermBuffer(String), setStartOffset, setEndOffset, setType.
Lucene::Token::DEFAULT_TYPE
static const String & DEFAULT_TYPE()
Lucene::Token::Token
Token(int32_t start, int32_t end)
Constructs a Token with null text and start and end offsets.
Lucene::TokenAttributeFactory::~TokenAttributeFactory
virtual ~TokenAttributeFactory()
Lucene::Token::setTermBuffer
virtual void setTermBuffer(const String &buffer)
Copies the contents of buffer into the termBuffer array.
Lucene::Token::Token
Token()
Constructs a Token will null text.
Lucene::AttributePtr
boost::shared_ptr< Attribute > AttributePtr
Definition: LuceneTypes.h:518
Lucene::Token::TOKEN_ATTRIBUTE_FACTORY
static AttributeFactoryPtr TOKEN_ATTRIBUTE_FACTORY()
Convenience factory that returns Token as implementation for the basic attributes.
Lucene::Token::flags
int32_t flags
Definition: Token.h:139
Lucene::Token::clone
virtual LuceneObjectPtr clone(const LuceneObjectPtr &other=LuceneObjectPtr())
Shallow clone. Subclasses must override this if they need to clone any members deeply.
Lucene::Token::positionIncrement
int32_t positionIncrement
Definition: Token.h:141
Lucene::PayloadPtr
boost::shared_ptr< Payload > PayloadPtr
Definition: LuceneTypes.h:198
Lucene::Token::reinit
void reinit(const TokenPtr &prototype, const String &newTerm)
Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared.
Lucene::Token::_startOffset
int32_t _startOffset
Definition: Token.h:136
Lucene::Token::setStartOffset
virtual void setStartOffset(int32_t offset)
Set the starting offset.
Lucene::Token::toString
virtual String toString()
Returns a string representation of the object.
Lucene::Token::_termBuffer
CharArray _termBuffer
Definition: Token.h:134
Lucene::Token::clearNoTermBuffer
void clearNoTermBuffer()
Like clear() but doesn't clear termBuffer/text.
Lucene::Token::payload
PayloadPtr payload
Definition: Token.h:140
Lucene::TokenAttributeFactory::createAttributeInstance
virtual AttributePtr createAttributeInstance(const String &className)
returns an Attribute.
Lucene::Token::startOffset
virtual int32_t startOffset()
Returns this Token's starting offset, the position of the first character corresponding to this token...
Lucene::Token::endOffset
virtual int32_t endOffset()
Returns this Token's ending offset, one greater than the position of the last character corresponding...
Lucene::TokenAttributeFactory::delegate
AttributeFactoryPtr delegate
Definition: Token.h:343
Lucene::Token::~Token
virtual ~Token()
Lucene::Token::Token
Token(CharArray startTermBuffer, int32_t termBufferOffset, int32_t termBufferLength, int32_t start, int32_t end)
Constructs a Token with the given term buffer (offset and length), start and end offsets.
Lucene::Token::getFlags
virtual int32_t getFlags()
Get the bitset for any bits that have been set. This is completely distinct from type(),...
Lucene::Token::Token
Token(int32_t start, int32_t end, const String &type)
Constructs a Token with null text and start and end offsets plus the Token type.
Lucene::Token::setTermLength
virtual void setTermLength(int32_t length)
Set number of valid characters (length of the term) in the termBuffer array. Use this to truncate the...
Lucene::Token::copyTo
virtual void copyTo(const AttributePtr &target)
Copies the values from this Attribute into the passed-in target attribute. The target implementation ...
Lucene::Token::reinit
TokenPtr reinit(const String &newTerm, int32_t newStartOffset, int32_t newEndOffset)
Shorthand for calling clear, setTermBuffer(String), setStartOffset, setEndOffset, setType on Token::D...
Attribute.h
Lucene::Token::setTermBuffer
virtual void setTermBuffer(const String &buffer, int32_t offset, int32_t length)
Copies the contents of buffer, starting at offset and continuing for length characters,...
Lucene::Token::clear
virtual void clear()
Resets the term text, payload, flags, and positionIncrement, startOffset, endOffset and token type to...
Lucene::Token::ConstructToken
void ConstructToken(int32_t start, int32_t end, const String &type, int32_t flags)
Construct Token and initialize values.
Lucene::AttributeFactoryPtr
boost::shared_ptr< AttributeFactory > AttributeFactoryPtr
Definition: LuceneTypes.h:519
Lucene::Token::termBufferArray
virtual wchar_t * termBufferArray()
Optimized implementation of termBuffer.
Lucene::Token
A Token is an occurrence of a term from the text of a field. It consists of a term's text,...
Definition: Token.h:76
Lucene::Token::_endOffset
int32_t _endOffset
Definition: Token.h:137
Lucene::Token::reinit
TokenPtr reinit(const String &newTerm, int32_t newStartOffset, int32_t newEndOffset, const String &newType)
Shorthand for calling clear, setTermBuffer(String), setStartOffset, setEndOffset, setType.
Lucene::Token::termLength
virtual int32_t termLength()
Return number of valid characters (length of the term) in the termBuffer array.

clucene.sourceforge.net