Lucene++ - a full-featured, c++ search engine
API Documentation


IndexReader.h
Go to the documentation of this file.
1 // Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
6 
7 #ifndef INDEXREADER_H
8 #define INDEXREADER_H
9 
10 #include "SegmentInfos.h"
11 
12 namespace Lucene {
13 
39 class LPPAPI IndexReader : public LuceneObject {
40 public:
42  virtual ~IndexReader();
43 
45 
46 public:
48  enum FieldOption {
70  FIELD_OPTION_TERMVECTOR_WITH_POSITION_OFFSET
71  };
72 
73  static const int32_t DEFAULT_TERMS_INDEX_DIVISOR;
74 
75 protected:
76  bool closed;
78  int32_t refCount;
79 
80 public:
82  int32_t getRefCount();
83 
90  void incRef();
91 
95  void decRef();
96 
99  static IndexReaderPtr open(const DirectoryPtr& directory);
100 
106  static IndexReaderPtr open(const DirectoryPtr& directory, bool readOnly);
107 
113  static IndexReaderPtr open(const IndexCommitPtr& commit, bool readOnly);
114 
122  static IndexReaderPtr open(const DirectoryPtr& directory, const IndexDeletionPolicyPtr& deletionPolicy, bool readOnly);
123 
137  static IndexReaderPtr open(const DirectoryPtr& directory, const IndexDeletionPolicyPtr& deletionPolicy, bool readOnly, int32_t termInfosIndexDivisor);
138 
147  static IndexReaderPtr open(const IndexCommitPtr& commit, const IndexDeletionPolicyPtr& deletionPolicy, bool readOnly);
148 
162  static IndexReaderPtr open(const IndexCommitPtr& commit, const IndexDeletionPolicyPtr& deletionPolicy, bool readOnly, int32_t termInfosIndexDivisor);
163 
198 
201  virtual IndexReaderPtr reopen(bool openReadOnly);
202 
206  virtual IndexReaderPtr reopen(const IndexCommitPtr& commit);
207 
217 
219  virtual LuceneObjectPtr clone(bool openReadOnly, const LuceneObjectPtr& other = LuceneObjectPtr());
220 
225 
228  static int64_t lastModified(const DirectoryPtr& directory2);
229 
234  static int64_t getCurrentVersion(const DirectoryPtr& directory);
235 
239  static MapStringString getCommitUserData(const DirectoryPtr& directory);
240 
254  virtual int64_t getVersion();
255 
258  virtual MapStringString getCommitUserData();
259 
274  virtual bool isCurrent();
275 
279  virtual bool isOptimized();
280 
290  virtual Collection<TermFreqVectorPtr> getTermFreqVectors(int32_t docNumber) = 0;
291 
301  virtual TermFreqVectorPtr getTermFreqVector(int32_t docNumber, const String& field) = 0;
302 
308  virtual void getTermFreqVector(int32_t docNumber, const String& field, const TermVectorMapperPtr& mapper) = 0;
309 
313  virtual void getTermFreqVector(int32_t docNumber, const TermVectorMapperPtr& mapper) = 0;
314 
319  static bool indexExists(const DirectoryPtr& directory);
320 
322  virtual int32_t numDocs() = 0;
323 
326  virtual int32_t maxDoc() = 0;
327 
329  int32_t numDeletedDocs();
330 
337  virtual DocumentPtr document(int32_t n);
338 
359  virtual DocumentPtr document(int32_t n, const FieldSelectorPtr& fieldSelector) = 0;
360 
362  virtual bool isDeleted(int32_t n) = 0;
363 
365  virtual bool hasDeletions() = 0;
366 
368  virtual bool hasChanges();
369 
371  virtual bool hasNorms(const String& field);
372 
376  virtual ByteArray norms(const String& field) = 0;
377 
381  virtual void norms(const String& field, ByteArray norms, int32_t offset) = 0;
382 
392  virtual void setNorm(int32_t doc, const String& field, uint8_t value);
393 
398  virtual void setNorm(int32_t doc, const String& field, double value);
399 
404  virtual TermEnumPtr terms() = 0;
405 
410  virtual TermEnumPtr terms(const TermPtr& t) = 0;
411 
413  virtual int32_t docFreq(const TermPtr& t) = 0;
414 
420  virtual TermDocsPtr termDocs(const TermPtr& term);
421 
423  virtual TermDocsPtr termDocs() = 0;
424 
432  virtual TermPositionsPtr termPositions(const TermPtr& term);
433 
435  virtual TermPositionsPtr termPositions() = 0;
436 
442  virtual void deleteDocument(int32_t docNum);
443 
450  virtual int32_t deleteDocuments(const TermPtr& term);
451 
453  virtual void undeleteAll();
454 
455  void flush();
456 
459  void flush(MapStringString commitUserData);
460 
464  void commit(MapStringString commitUserData);
465 
468  void close();
469 
473  virtual HashSet<String> getFieldNames(FieldOption fieldOption) = 0;
474 
478 
484  static void main(Collection<String> args);
485 
494 
505 
507 
510 
515  virtual int64_t getUniqueTermCount();
516 
519  virtual int32_t getTermInfosIndexDivisor();
520 
521 protected:
522  void ensureOpen();
523 
524  static IndexReaderPtr open(const DirectoryPtr& directory, const IndexDeletionPolicyPtr& deletionPolicy, const IndexCommitPtr& commit, bool readOnly, int32_t termInfosIndexDivisor);
525 
527  virtual void doSetNorm(int32_t doc, const String& field, uint8_t value) = 0;
528 
531  virtual void doDelete(int32_t docNum) = 0;
532 
534  virtual void doUndeleteAll() = 0;
535 
538  virtual void acquireWriteLock();
539 
543  void commit();
544 
546  virtual void doCommit(MapStringString commitUserData) = 0;
547 
549  virtual void doClose() = 0;
550 
551  friend class DirectoryReader;
552  friend class ParallelReader;
553 };
554 
555 }
556 
557 #endif
Lucene::IndexReader::docFreq
virtual int32_t docFreq(const TermPtr &t)=0
Returns the number of documents containing the term t.
Lucene::IndexReader::getTermFreqVector
virtual void getTermFreqVector(int32_t docNumber, const String &field, const TermVectorMapperPtr &mapper)=0
Load the Term Vector into a user-defined data structure instead of relying on the parallel arrays of ...
Lucene::IndexReader::incRef
void incRef()
Increments the refCount of this IndexReader instance. RefCounts are used to determine when a reader c...
Lucene::IndexReader::DEFAULT_TERMS_INDEX_DIVISOR
static const int32_t DEFAULT_TERMS_INDEX_DIVISOR
Definition: IndexReader.h:73
LUCENE_CLASS
#define LUCENE_CLASS(Name)
Definition: LuceneObject.h:24
Lucene::TermPtr
boost::shared_ptr< Term > TermPtr
Definition: LuceneTypes.h:233
Lucene::IndexReader::hasNorms
virtual bool hasNorms(const String &field)
Returns true if there are norms stored for this field.
Lucene::IndexReader::getTermFreqVector
virtual TermFreqVectorPtr getTermFreqVector(int32_t docNumber, const String &field)=0
Return a term frequency vector for the specified document and field. The returned vector contains ter...
Lucene::IndexReader::getTermInfosIndexDivisor
virtual int32_t getTermInfosIndexDivisor()
For IndexReader implementations that use TermInfosReader to read terms, this returns the current inde...
Lucene::Collection
Utility template class to handle collections that can be safely copied and shared.
Definition: Collection.h:17
Lucene::IndexReader::getFieldNames
virtual HashSet< String > getFieldNames(FieldOption fieldOption)=0
Get a list of unique field names that exist in this index and have the specified field option informa...
Lucene::IndexReader::numDocs
virtual int32_t numDocs()=0
Returns the number of documents in this index.
Lucene::LuceneObjectPtr
boost::shared_ptr< LuceneObject > LuceneObjectPtr
Definition: LuceneTypes.h:539
Lucene::IndexReader::open
static IndexReaderPtr open(const DirectoryPtr &directory, const IndexDeletionPolicyPtr &deletionPolicy, bool readOnly, int32_t termInfosIndexDivisor)
Returns an IndexReader reading the index in the given Directory, with a custom IndexDeletionPolicy....
Lucene::IndexReader::clone
virtual LuceneObjectPtr clone(bool openReadOnly, const LuceneObjectPtr &other=LuceneObjectPtr())
Clones the IndexReader and optionally changes readOnly. A readOnly reader cannot open a writable read...
Lucene::IndexReader::getCommitUserData
virtual MapStringString getCommitUserData()
Retrieve the String userData optionally passed to IndexWriter::commit. This will return null if Index...
Lucene::IndexReader::isDeleted
virtual bool isDeleted(int32_t n)=0
Returns true if document n has been deleted.
Lucene::IndexReader::getCommitUserData
static MapStringString getCommitUserData(const DirectoryPtr &directory)
Reads commitUserData, previously passed to IndexWriter#commit(MapStringString), from current index se...
Lucene::TermVectorMapperPtr
boost::shared_ptr< TermVectorMapper > TermVectorMapperPtr
Definition: LuceneTypes.h:254
Lucene::IndexReader::~IndexReader
virtual ~IndexReader()
Lucene::IndexReader::FIELD_OPTION_INDEXED_WITH_TERMVECTOR
@ FIELD_OPTION_INDEXED_WITH_TERMVECTOR
All fields which are indexed with termvectors enabled.
Definition: IndexReader.h:60
Lucene::IndexReader::IndexReader
IndexReader()
Lucene::IndexReader::open
static IndexReaderPtr open(const IndexCommitPtr &commit, const IndexDeletionPolicyPtr &deletionPolicy, bool readOnly, int32_t termInfosIndexDivisor)
Returns an IndexReader reading the index in the given Directory, using a specific commit and with a c...
Lucene::IndexReader::main
static void main(Collection< String > args)
Prints the filename and size of each file within a given compound file. Add the -extract flag to extr...
Lucene::IndexReader::doUndeleteAll
virtual void doUndeleteAll()=0
Implements actual undeleteAll() in subclass.
Lucene::IndexReader::getVersion
virtual int64_t getVersion()
Version number when this IndexReader was opened. Not implemented in the IndexReader base class.
Lucene::IndexReader::setNorm
virtual void setNorm(int32_t doc, const String &field, uint8_t value)
Resets the normalization factor for the named field of the named document. The norm represents the pr...
Lucene::IndexReader::hasDeletions
virtual bool hasDeletions()=0
Returns true if any documents have been deleted.
SegmentInfos.h
Lucene::IndexReader::numDeletedDocs
int32_t numDeletedDocs()
Returns the number of deleted documents.
Lucene::IndexReader::isOptimized
virtual bool isOptimized()
Checks is the index is optimized (if it has a single segment and no deletions). Not implemented in th...
Lucene::IndexReader::reopen
virtual IndexReaderPtr reopen()
Refreshes an IndexReader if the index has changed since this instance was (re)opened.
Lucene::IndexReader::norms
virtual ByteArray norms(const String &field)=0
Returns the byte-encoded normalization factor for the named field of every document....
Lucene::IndexReader::doClose
virtual void doClose()=0
Implements close.
Lucene::IndexReader::FIELD_OPTION_ALL
@ FIELD_OPTION_ALL
All fields.
Definition: IndexReader.h:50
Lucene::IndexReader::ensureOpen
void ensureOpen()
Lucene::IndexReader::open
static IndexReaderPtr open(const DirectoryPtr &directory, bool readOnly)
Returns an IndexReader reading the index in the given Directory. You should pass readOnly = true,...
Lucene::IndexReader::open
static IndexReaderPtr open(const DirectoryPtr &directory, const IndexDeletionPolicyPtr &deletionPolicy, const IndexCommitPtr &commit, bool readOnly, int32_t termInfosIndexDivisor)
Lucene::IndexReader::terms
virtual TermEnumPtr terms(const TermPtr &t)=0
Returns an enumeration of all terms starting at a given term. If the given term does not exist,...
Lucene::IndexReader::acquireWriteLock
virtual void acquireWriteLock()
Does nothing by default. Subclasses that require a write lock for index modifications must implement ...
Lucene::IndexReader::deleteDocument
virtual void deleteDocument(int32_t docNum)
Deletes the document numbered docNum. Once a document is deleted it will not appear in TermDocs or Te...
Lucene::IndexReader::terms
virtual TermEnumPtr terms()=0
Returns an enumeration of all the terms in the index. The enumeration is ordered by Term::compareTo()...
Lucene::DocumentPtr
boost::shared_ptr< Document > DocumentPtr
Definition: LuceneTypes.h:74
Lucene
Definition: AbstractAllTermDocs.h:12
Lucene::ParallelReader
An IndexReader which reads multiple, parallel indexes. Each index added must have the same number of ...
Definition: ParallelReader.h:26
Lucene::DirectoryReader
An IndexReader which reads indexes with multiple segments.
Definition: DirectoryReader.h:19
Lucene::LuceneObject
Base class for all Lucene classes.
Definition: LuceneObject.h:31
Lucene::IndexReader::FIELD_OPTION_UNINDEXED
@ FIELD_OPTION_UNINDEXED
All fields which are not indexed.
Definition: IndexReader.h:58
Lucene::IndexReader::FIELD_OPTION_INDEXED_NO_TERMVECTOR
@ FIELD_OPTION_INDEXED_NO_TERMVECTOR
All fields which are indexed but don't have termvectors enabled.
Definition: IndexReader.h:62
Lucene::TermEnumPtr
boost::shared_ptr< TermEnum > TermEnumPtr
Definition: LuceneTypes.h:235
Lucene::IndexReaderPtr
boost::shared_ptr< IndexReader > IndexReaderPtr
Definition: LuceneTypes.h:157
Lucene::IndexReader::close
void close()
Closes files associated with this index. Also saves any new deletions to disk. No other methods shoul...
Lucene::TermDocsPtr
boost::shared_ptr< TermDocs > TermDocsPtr
Definition: LuceneTypes.h:236
Lucene::IndexReader::clone
virtual LuceneObjectPtr clone(const LuceneObjectPtr &other=LuceneObjectPtr())
Efficiently clones the IndexReader (sharing most internal state).
Lucene::IndexReader::deleteDocuments
virtual int32_t deleteDocuments(const TermPtr &term)
Deletes all documents that have a given term indexed. This is useful if one uses a document field to ...
Lucene::IndexReader::FIELD_OPTION_TERMVECTOR_WITH_OFFSET
@ FIELD_OPTION_TERMVECTOR_WITH_OFFSET
All fields with termvectors with offset values enabled.
Definition: IndexReader.h:68
Lucene::IndexReader::reopen
virtual IndexReaderPtr reopen(bool openReadOnly)
Just like reopen(), except you can change the readOnly of the original reader. If the index is unchan...
Lucene::IndexReader::getTermFreqVector
virtual void getTermFreqVector(int32_t docNumber, const TermVectorMapperPtr &mapper)=0
Map all the term vectors for all fields in a Document.
Lucene::HashSet< String >
Lucene::IndexReader::getFieldCacheKey
virtual LuceneObjectPtr getFieldCacheKey()
Lucene::IndexReader::undeleteAll
virtual void undeleteAll()
Undeletes all documents currently marked as deleted in this index.
Lucene::IndexReader
IndexReader is an abstract class, providing an interface for accessing an index. Search of an index i...
Definition: IndexReader.h:39
Lucene::TermFreqVectorPtr
boost::shared_ptr< TermFreqVector > TermFreqVectorPtr
Definition: LuceneTypes.h:237
Lucene::IndexReader::FIELD_OPTION_OMIT_TERM_FREQ_AND_POSITIONS
@ FIELD_OPTION_OMIT_TERM_FREQ_AND_POSITIONS
All fields that omit tf.
Definition: IndexReader.h:56
Lucene::IndexReader::FIELD_OPTION_STORES_PAYLOADS
@ FIELD_OPTION_STORES_PAYLOADS
All fields that store payloads.
Definition: IndexReader.h:54
Lucene::IndexReader::FieldOption
FieldOption
Constants describing field properties, for example used for IndexReader#getFieldNames(FieldOption).
Definition: IndexReader.h:48
Lucene::IndexReader::maxDoc
virtual int32_t maxDoc()=0
Returns one greater than the largest possible document number. This may be used to,...
Lucene::IndexReader::termDocs
virtual TermDocsPtr termDocs()=0
Returns an unpositioned TermDocs enumerator.
Lucene::IndexReader::getRefCount
int32_t getRefCount()
Returns the current refCount for this reader.
Lucene::IndexReader::termDocs
virtual TermDocsPtr termDocs(const TermPtr &term)
Returns an enumeration of all the documents which contain term. For each document,...
Lucene::IndexReader::lastModified
static int64_t lastModified(const DirectoryPtr &directory2)
Returns the time the index in the named directory was last modified. Do not use this to check whether...
Lucene::TermPositionsPtr
boost::shared_ptr< TermPositions > TermPositionsPtr
Definition: LuceneTypes.h:243
Lucene::IndexReader::indexExists
static bool indexExists(const DirectoryPtr &directory)
Returns true if an index exists at the specified directory. If the directory does not exist or if the...
Lucene::IndexReader::FIELD_OPTION_TERMVECTOR_WITH_POSITION
@ FIELD_OPTION_TERMVECTOR_WITH_POSITION
All fields with termvectors with position values enabled.
Definition: IndexReader.h:66
Lucene::IndexReader::getTermFreqVectors
virtual Collection< TermFreqVectorPtr > getTermFreqVectors(int32_t docNumber)=0
Return an array of term frequency vectors for the specified document. The array contains a vector for...
Lucene::IndexDeletionPolicyPtr
boost::shared_ptr< IndexDeletionPolicy > IndexDeletionPolicyPtr
Definition: LuceneTypes.h:153
Lucene::IndexReader::open
static IndexReaderPtr open(const IndexCommitPtr &commit, bool readOnly)
Returns an IndexReader reading the index in the given IndexCommit. You should pass readOnly = true,...
Lucene::IndexReader::reopen
virtual IndexReaderPtr reopen(const IndexCommitPtr &commit)
Reopen this reader on a specific commit point. This always returns a readOnly reader....
Lucene::IndexReader::hasChanges
virtual bool hasChanges()
Used for testing.
Lucene::IndexReader::getDeletesCacheKey
virtual LuceneObjectPtr getDeletesCacheKey()
This returns null if the reader has no deletions.
Lucene::IndexReader::doDelete
virtual void doDelete(int32_t docNum)=0
Implements deletion of the document numbered docNum. Applications should call deleteDocument(int) or ...
Lucene::IndexReader::getCurrentVersion
static int64_t getCurrentVersion(const DirectoryPtr &directory)
Reads version number from segments files. The version number is initialized with a timestamp and then...
Lucene::IndexReader::open
static IndexReaderPtr open(const IndexCommitPtr &commit, const IndexDeletionPolicyPtr &deletionPolicy, bool readOnly)
Returns an IndexReader reading the index in the given Directory, using a specific commit and with a c...
Lucene::IndexReader::flush
void flush()
Lucene::IndexReader::commit
void commit(MapStringString commitUserData)
Commit changes resulting from delete, undeleteAll, or setNorm operations. If an exception is hit,...
Lucene::IndexReader::getIndexCommit
virtual IndexCommitPtr getIndexCommit()
Return the IndexCommit that this reader has opened. This method is only implemented by those readers ...
Lucene::DirectoryPtr
boost::shared_ptr< Directory > DirectoryPtr
Definition: LuceneTypes.h:489
Lucene::IndexReader::FIELD_OPTION_TERMVECTOR
@ FIELD_OPTION_TERMVECTOR
All fields with termvectors enabled. Please note that only standard termvector fields are returned.
Definition: IndexReader.h:64
Lucene::IndexReader::setNorm
virtual void setNorm(int32_t doc, const String &field, double value)
Resets the normalization factor for the named field of the named document.
Lucene::IndexReader::directory
virtual DirectoryPtr directory()
Returns the directory associated with this index. The default implementation returns the directory sp...
Lucene::IndexReader::open
static IndexReaderPtr open(const DirectoryPtr &directory, const IndexDeletionPolicyPtr &deletionPolicy, bool readOnly)
Returns an IndexReader reading the index in the given Directory, with a custom IndexDeletionPolicy....
Lucene::IndexReader::norms
virtual void norms(const String &field, ByteArray norms, int32_t offset)=0
Reads the byte-encoded normalization factor for the named field of every document....
Lucene::IndexReader::getSequentialSubReaders
virtual Collection< IndexReaderPtr > getSequentialSubReaders()
Returns the sequential sub readers that this reader is logically composed of. For example,...
Lucene::IndexReader::getUniqueTermCount
virtual int64_t getUniqueTermCount()
Returns the number of unique terms (across all fields) in this reader.
Lucene::IndexReader::_hasChanges
bool _hasChanges
Definition: IndexReader.h:77
Lucene::FieldSelectorPtr
boost::shared_ptr< FieldSelector > FieldSelectorPtr
Definition: LuceneTypes.h:77
Lucene::IndexReader::closed
bool closed
Definition: IndexReader.h:76
Lucene::IndexReader::flush
void flush(MapStringString commitUserData)
Lucene::IndexReader::commit
void commit()
Commit changes resulting from delete, undeleteAll, or setNorm operations. If an exception is hit,...
Lucene::IndexReader::decRef
void decRef()
Decreases the refCount of this IndexReader instance. If the refCount drops to 0, then pending changes...
Lucene::IndexReader::refCount
int32_t refCount
Definition: IndexReader.h:78
Lucene::IndexReader::doSetNorm
virtual void doSetNorm(int32_t doc, const String &field, uint8_t value)=0
Implements setNorm in subclass.
Lucene::IndexReader::open
static IndexReaderPtr open(const DirectoryPtr &directory)
Returns a IndexReader reading the index in the given Directory, with readOnly = true.
Lucene::IndexReader::listCommits
static Collection< IndexCommitPtr > listCommits(const DirectoryPtr &dir)
Returns all commit points that exist in the Directory. Normally, because the default is {},...
Lucene::IndexReader::doCommit
virtual void doCommit(MapStringString commitUserData)=0
Implements commit.
Lucene::IndexReader::isCurrent
virtual bool isCurrent()
Check whether any new changes have occurred to the index since this reader was opened.
Lucene::IndexCommitPtr
boost::shared_ptr< IndexCommit > IndexCommitPtr
Definition: LuceneTypes.h:152
Lucene::IndexReader::FIELD_OPTION_INDEXED
@ FIELD_OPTION_INDEXED
All indexed fields.
Definition: IndexReader.h:52

clucene.sourceforge.net