Lucene++ - a full-featured, c++ search engine
API Documentation


TermVectorsReader.h
Go to the documentation of this file.
1 // Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
6 
7 #ifndef TERMVECTORSREADER_H
8 #define TERMVECTORSREADER_H
9 
10 #include "TermVectorMapper.h"
11 
12 namespace Lucene {
13 
14 class LPPAPI TermVectorsReader : public LuceneObject {
15 public:
17  TermVectorsReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fieldInfos);
18  TermVectorsReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fieldInfos,
19  int32_t readBufferSize, int32_t docStoreOffset = -1, int32_t size = 0);
20  virtual ~TermVectorsReader();
21 
23 
24 public:
26  static const int32_t FORMAT_VERSION;
27 
29  static const int32_t FORMAT_VERSION2;
30 
32  static const int32_t FORMAT_UTF8_LENGTH_IN_BYTES;
33 
35  static const int32_t FORMAT_CURRENT;
36 
38  static const int32_t FORMAT_SIZE;
39 
40  static const uint8_t STORE_POSITIONS_WITH_TERMVECTOR;
41  static const uint8_t STORE_OFFSET_WITH_TERMVECTOR;
42 
43 protected:
45 
49  int32_t _size;
50  int32_t numTotalDocs;
51 
53  int32_t docStoreOffset;
54 
55  int32_t format;
56 
57 public:
60 
63 
65 
69  void rawDocs(Collection<int32_t> tvdLengths, Collection<int32_t> tvfLengths, int32_t startDocID, int32_t numDocs);
70 
71  void close();
72 
74  int32_t size();
75 
76  void get(int32_t docNum, const String& field, const TermVectorMapperPtr& mapper);
77 
83  TermFreqVectorPtr get(int32_t docNum, const String& field);
84 
90 
91  void get(int32_t docNumber, const TermVectorMapperPtr& mapper);
92 
94 
95 protected:
96  void ConstructReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fieldInfos, int32_t readBufferSize, int32_t docStoreOffset, int32_t size);
97 
98  void seekTvx(int32_t docNum);
99 
100  int32_t checkValidFormat(const IndexInputPtr& in);
101 
103  Collection<String> readFields(int32_t fieldCount);
104 
107 
110 
114  void readTermVector(const String& field, int64_t tvfPointer, const TermVectorMapperPtr& mapper);
115 };
116 
119 public:
122 
124 
125 protected:
133  String field;
134 
135 public:
138  virtual void setExpectations(const String& field, int32_t numTerms, bool storeOffsets, bool storePositions);
139 
141  virtual void map(const String& term, int32_t frequency, Collection<TermVectorOffsetInfoPtr> offsets, Collection<int32_t> positions);
142 
146 };
147 
148 }
149 
150 #endif
Lucene::TermVectorsReader::readTvfPointers
Collection< int64_t > readTvfPointers(int32_t fieldCount)
Reads the long[] offsets into TVF; you have to pre-seek tvx/tvd to the right point.
Lucene::ParallelArrayTermVectorMapper::storingPositions
bool storingPositions
Definition: TermVectorsReader.h:132
TermVectorMapper.h
Lucene::TermVectorsReader::tvd
IndexInputPtr tvd
Definition: TermVectorsReader.h:47
LUCENE_CLASS
#define LUCENE_CLASS(Name)
Definition: LuceneObject.h:24
Lucene::TermVectorsReader::STORE_POSITIONS_WITH_TERMVECTOR
static const uint8_t STORE_POSITIONS_WITH_TERMVECTOR
Definition: TermVectorsReader.h:40
Lucene::Collection< int32_t >
Lucene::ParallelArrayTermVectorMapper::ParallelArrayTermVectorMapper
ParallelArrayTermVectorMapper()
Lucene::TermVectorsReader::tvf
IndexInputPtr tvf
Definition: TermVectorsReader.h:48
Lucene::LuceneObjectPtr
boost::shared_ptr< LuceneObject > LuceneObjectPtr
Definition: LuceneTypes.h:539
Lucene::TermVectorsReader::seekTvx
void seekTvx(int32_t docNum)
Lucene::TermVectorMapperPtr
boost::shared_ptr< TermVectorMapper > TermVectorMapperPtr
Definition: LuceneTypes.h:254
Lucene::TermVectorsReader::getTvdStream
IndexInputPtr getTvdStream()
Used for bulk copy when merging.
Lucene::ParallelArrayTermVectorMapper::materializeVector
TermFreqVectorPtr materializeVector()
Construct the vector.
Lucene::TermVectorsReader::readTermVectors
Collection< TermFreqVectorPtr > readTermVectors(int32_t docNum, Collection< String > fields, Collection< int64_t > tvfPointers)
Lucene::FieldInfosPtr
boost::shared_ptr< FieldInfos > FieldInfosPtr
Definition: LuceneTypes.h:127
Lucene::TermVectorsReader::tvx
IndexInputPtr tvx
Definition: TermVectorsReader.h:46
Lucene::TermVectorsReader::ConstructReader
void ConstructReader(const DirectoryPtr &d, const String &segment, const FieldInfosPtr &fieldInfos, int32_t readBufferSize, int32_t docStoreOffset, int32_t size)
Lucene::ParallelArrayTermVectorMapper::~ParallelArrayTermVectorMapper
virtual ~ParallelArrayTermVectorMapper()
Lucene::TermVectorsReader::getTvfStream
IndexInputPtr getTvfStream()
Used for bulk copy when merging.
Lucene::TermVectorsReader::get
TermFreqVectorPtr get(int32_t docNum, const String &field)
Retrieve the term vector for the given document and field.
Lucene::TermVectorsReader::FORMAT_UTF8_LENGTH_IN_BYTES
static const int32_t FORMAT_UTF8_LENGTH_IN_BYTES
Changed strings to UTF8 with length-in-bytes not length-in-chars.
Definition: TermVectorsReader.h:32
Lucene::TermVectorsReader::get
void get(int32_t docNum, const String &field, const TermVectorMapperPtr &mapper)
Lucene::TermVectorsReader::close
void close()
Lucene::TermVectorsReader::~TermVectorsReader
virtual ~TermVectorsReader()
Lucene::TermVectorsReader::canReadRawDocs
bool canReadRawDocs()
Lucene
Definition: AbstractAllTermDocs.h:12
Lucene::ParallelArrayTermVectorMapper::positions
Collection< Collection< int32_t > > positions
Definition: TermVectorsReader.h:128
Lucene::TermVectorsReader::_size
int32_t _size
Definition: TermVectorsReader.h:49
Lucene::LuceneObject
Base class for all Lucene classes.
Definition: LuceneObject.h:31
Lucene::ParallelArrayTermVectorMapper::offsets
Collection< Collection< TermVectorOffsetInfoPtr > > offsets
Definition: TermVectorsReader.h:129
Lucene::TermVectorsReader::get
void get(int32_t docNumber, const TermVectorMapperPtr &mapper)
Lucene::ParallelArrayTermVectorMapper::terms
Collection< String > terms
Definition: TermVectorsReader.h:123
Lucene::IndexInputPtr
boost::shared_ptr< IndexInput > IndexInputPtr
Definition: LuceneTypes.h:493
Lucene::ParallelArrayTermVectorMapper::setExpectations
virtual void setExpectations(const String &field, int32_t numTerms, bool storeOffsets, bool storePositions)
Tell the mapper what to expect in regards to field, number of terms, offset and position storage....
Lucene::TermVectorsReader::size
int32_t size()
Lucene::ParallelArrayTermVectorMapper::currentPosition
int32_t currentPosition
Definition: TermVectorsReader.h:130
Lucene::TermVectorsReader::FORMAT_VERSION
static const int32_t FORMAT_VERSION
NOTE: if you make a new format, it must be larger than the current format.
Definition: TermVectorsReader.h:22
Lucene::TermFreqVectorPtr
boost::shared_ptr< TermFreqVector > TermFreqVectorPtr
Definition: LuceneTypes.h:237
Lucene::ParallelArrayTermVectorMapper::field
String field
Definition: TermVectorsReader.h:133
Lucene::TermVectorsReader::clone
virtual LuceneObjectPtr clone(const LuceneObjectPtr &other=LuceneObjectPtr())
Return clone of this object.
Lucene::TermVectorsReader::TermVectorsReader
TermVectorsReader()
Lucene::ParallelArrayTermVectorMapper::map
virtual void map(const String &term, int32_t frequency, Collection< TermVectorOffsetInfoPtr > offsets, Collection< int32_t > positions)
Map the Term Vector information into your own structure.
Lucene::TermVectorsReader::FORMAT_SIZE
static const int32_t FORMAT_SIZE
The size in bytes that the FORMAT_VERSION will take up at the beginning of each file.
Definition: TermVectorsReader.h:38
Lucene::TermVectorsReader::FORMAT_VERSION2
static const int32_t FORMAT_VERSION2
Changes to speed up bulk merging of term vectors.
Definition: TermVectorsReader.h:29
Lucene::TermVectorsReader::format
int32_t format
Definition: TermVectorsReader.h:55
Lucene::TermVectorsReader::docStoreOffset
int32_t docStoreOffset
The docID offset where our docs begin in the index file. This will be 0 if we have our own private fi...
Definition: TermVectorsReader.h:53
Lucene::TermVectorsReader::TermVectorsReader
TermVectorsReader(const DirectoryPtr &d, const String &segment, const FieldInfosPtr &fieldInfos, int32_t readBufferSize, int32_t docStoreOffset=-1, int32_t size=0)
Lucene::TermVectorsReader::readTermVector
void readTermVector(const String &field, int64_t tvfPointer, const TermVectorMapperPtr &mapper)
Lucene::ParallelArrayTermVectorMapper
Models the existing parallel array structure.
Definition: TermVectorsReader.h:118
Lucene::DirectoryPtr
boost::shared_ptr< Directory > DirectoryPtr
Definition: LuceneTypes.h:489
Lucene::TermVectorsReader::FORMAT_CURRENT
static const int32_t FORMAT_CURRENT
NOTE: always change this if you switch to a new format.
Definition: TermVectorsReader.h:35
Lucene::ParallelArrayTermVectorMapper::termFreqs
Collection< int32_t > termFreqs
Definition: TermVectorsReader.h:127
Lucene::TermVectorsReader::TermVectorsReader
TermVectorsReader(const DirectoryPtr &d, const String &segment, const FieldInfosPtr &fieldInfos)
Lucene::TermVectorMapper
The TermVectorMapper can be used to map Term Vectors into your own structure instead of the parallel ...
Definition: TermVectorMapper.h:18
Lucene::ParallelArrayTermVectorMapper::storingOffsets
bool storingOffsets
Definition: TermVectorsReader.h:131
Lucene::TermVectorsReader::readTermVectors
void readTermVectors(Collection< String > fields, Collection< int64_t > tvfPointers, const TermVectorMapperPtr &mapper)
Lucene::TermVectorsReader::fieldInfos
FieldInfosPtr fieldInfos
Definition: TermVectorsReader.h:44
Lucene::TermVectorsReader::get
Collection< TermFreqVectorPtr > get(int32_t docNum)
Return all term vectors stored for this document or null if the could not be read in.
Lucene::TermVectorsReader
Definition: TermVectorsReader.h:14
Lucene::TermVectorsReader::checkValidFormat
int32_t checkValidFormat(const IndexInputPtr &in)
Lucene::TermVectorsReader::rawDocs
void rawDocs(Collection< int32_t > tvdLengths, Collection< int32_t > tvfLengths, int32_t startDocID, int32_t numDocs)
Retrieve the length (in bytes) of the tvd and tvf entries for the next numDocs starting with startDoc...
Lucene::TermVectorsReader::STORE_OFFSET_WITH_TERMVECTOR
static const uint8_t STORE_OFFSET_WITH_TERMVECTOR
Definition: TermVectorsReader.h:41
Lucene::TermVectorsReader::numTotalDocs
int32_t numTotalDocs
Definition: TermVectorsReader.h:50
Lucene::TermVectorsReader::readFields
Collection< String > readFields(int32_t fieldCount)
Reads the String[] fields; you have to pre-seek tvd to the right point.

clucene.sourceforge.net