Lucene++ - a full-featured, c++ search engine
API Documentation
Go to the documentation of this file.
7 #ifndef DOCUMENTSWRITER_H
8 #define DOCUMENTSWRITER_H
TermPtr lastDeleteTerm
Definition: DocumentsWriter.h:184
static const int32_t BYTES_PER_DEL_DOCID
Rough logic: del docIDs are List<Integer>. Say list allocates ~2X size (2*POINTER)....
Definition: DocumentsWriter.h:127
int32_t blockSize
Definition: DocumentsWriter.h:508
virtual DocConsumerPtr getChain(const DocumentsWriterPtr &documentsWriter)
MapTermNum getBufferedDeleteTerms()
void finishDocument(const DocumentsWriterThreadStatePtr &perThread, const DocWriterPtr &docWriter)
Does the synchronized work to finish/flush the inverted document.
static const int32_t MAX_TERM_LENGTH
Definition: DocumentsWriter.h:145
static const int32_t CHAR_NUM_BYTE
Definition: DocumentsWriter.h:116
The IndexingChain must define the getChain(DocumentsWriter) method which returns the DocConsumer that...
Definition: DocumentsWriter.h:423
bool bufferDeleteQueries(Collection< QueryPtr > queries)
ByteBlockAllocatorPtr byteBlockAllocator
Definition: DocumentsWriter.h:177
int32_t getFlushedDocCount()
bool checkDeleteTerm(const TermPtr &term)
#define LUCENE_CLASS(Name)
Definition: LuceneObject.h:24
void addDeleteDocID(int32_t docID)
Buffer a specific docID for deletion. Currently only used when we hit a exception when adding a docum...
void createCompoundFile(const String &segment)
Build compound file for the segment we just flushed.
void recycleCharBlocks(Collection< CharArray > blocks, int32_t numBlocks)
PerDocBufferPtr newPerDocBuffer()
Create and return a new DocWriterBuffer.
boost::shared_ptr< Term > TermPtr
Definition: LuceneTypes.h:233
int64_t freeTrigger
If we've allocated 5% over our RAM budget, we then free down to 95%.
Definition: DocumentsWriter.h:93
static const int32_t CHAR_BLOCK_MASK
Definition: DocumentsWriter.h:143
bool updateDocument(const DocumentPtr &doc, const AnalyzerPtr &analyzer, const TermPtr &delTerm)
boost::weak_ptr< DocumentsWriter > DocumentsWriterWeakPtr
Definition: LuceneTypes.h:123
String segment
Definition: DocumentsWriter.h:158
boost::shared_ptr< PerDocBuffer > PerDocBufferPtr
Definition: LuceneTypes.h:199
bool bufferDeleteQuery(const QueryPtr &query)
boost::shared_ptr< OneMerge > OneMergePtr
Definition: LuceneTypes.h:192
virtual int64_t sizeInBytes()
void setMaxBufferedDeleteTerms(int32_t maxBufferedDeleteTerms)
Consumer returns this on each doc. This holds any state that must be flushed synchronized "in docID o...
Definition: DocumentsWriter.h:402
bool pauseAllThreads()
Returns true if an abort is in progress.
void doAfterFlush()
Reset after a flush.
int32_t maxBufferedDeleteTerms
The max number of delete terms that can be buffered before they must be flushed to disk.
Definition: DocumentsWriter.h:85
bool timeToFlushDeletes()
RAMFile buffer for DocWriters.
Definition: DocumentsWriter.h:381
SkipDocWriterPtr skipDocWriter
Definition: DocumentsWriter.h:175
bool addDocument(const DocumentPtr &doc, const AnalyzerPtr &analyzer)
Returns true if the caller (IndexWriter) should now flush.
boost::shared_ptr< DocumentsWriter > DocumentsWriterPtr
Definition: LuceneTypes.h:123
void updateFlushedDocCount(int32_t n)
static const int32_t BYTE_BLOCK_MASK
Definition: DocumentsWriter.h:137
boost::shared_ptr< Query > QueryPtr
Definition: LuceneTypes.h:420
HashSet< String > abortedFiles()
int32_t getNumBufferedDeleteTerms()
boost::shared_ptr< SkipDocWriter > SkipDocWriterPtr
Definition: LuceneTypes.h:226
bool bufferIsFull
Definition: DocumentsWriter.h:163
boost::shared_ptr< DocWriter > DocWriterPtr
Definition: LuceneTypes.h:125
void balanceRAM()
We have four pools of RAM: Postings, byte blocks (holds freq/prox posting data), char blocks (holds c...
DirectoryPtr directory
Definition: DocumentsWriter.h:156
static const int32_t PER_DOC_BLOCK_SIZE
Definition: DocumentsWriter.h:152
HashSet< String > _abortedFiles
List of files that were written before last abort()
Definition: DocumentsWriter.h:105
bool bufferDeleteTerm(const TermPtr &term)
int32_t nextWriteLoc
Definition: DocumentsWriter.h:482
boost::shared_ptr< ByteBlockAllocator > ByteBlockAllocatorPtr
Definition: LuceneTypes.h:88
static const int32_t CHAR_BLOCK_SHIFT
Initial chunk size of the shared char[] blocks used to store term text.
Definition: DocumentsWriter.h:141
int32_t maxFieldLength
Definition: DocumentsWriter.h:366
boost::shared_ptr< SegmentWriteState > SegmentWriteStatePtr
Definition: LuceneTypes.h:222
HashSet< String > closedFiles()
int32_t pauseThreads
Definition: DocumentsWriter.h:73
Definition: DocumentsWriter.h:497
void initFlushState(bool onlyDocStore)
void setSimilarity(const SimilarityPtr &similarity)
int32_t numDocsInStore
Definition: DocumentsWriter.h:160
void addDeleteQuery(const QueryPtr &query, int32_t docID)
virtual int64_t sizeInBytes()=0
Collection< IntArray > freeIntBlocks
Definition: DocumentsWriter.h:108
void waitReady(const DocumentsWriterThreadStatePtr &state)
Definition: ByteBlockPool.h:54
int32_t getMaxBufferedDeleteTerms()
virtual ~ByteBlockAllocator()
HashSet< String > _openFiles
Definition: DocumentsWriter.h:171
int64_t ramBufferSize
How much RAM we can use before flushing. This is 0 if we are flushing by doc count instead.
Definition: DocumentsWriter.h:88
bool updateDocument(const TermPtr &t, const DocumentPtr &doc, const AnalyzerPtr &analyzer)
virtual ByteArray getByteBlock(bool trackAllocations)
Allocate another byte[] from the shared pool.
virtual void recycleByteBlocks(Collection< ByteArray > blocks, int32_t start, int32_t end)
Return byte[]'s to the pool.
String closeDocStore()
Closes the current open doc stores an returns the doc store segment name. This returns null if there ...
void writeDocument(const DocWriterPtr &doc)
bool add(const DocWriterPtr &doc)
virtual void initialize()
Called directly after instantiation to create objects that depend on this object being fully construc...
Definition: DocumentsWriter.h:356
static const int32_t INT_BLOCK_SIZE
Definition: DocumentsWriter.h:149
void remapDeletes(const SegmentInfosPtr &infos, Collection< Collection< int32_t > > docMaps, Collection< int32_t > delCounts, const OneMergePtr &merge, int32_t mergeDocCount)
Called whenever a merge has completed and the merged segments had deletions.
double getRAMBufferSizeMB()
int64_t freeLevel
Definition: DocumentsWriter.h:94
void abort()
Called if we hit an exception at a bad time (when updating the index files) and must discard all curr...
int64_t waitQueueResumeBytes
Definition: DocumentsWriter.h:90
bool bufferDeleteTerms(Collection< TermPtr > terms)
IntArray getIntBlock(bool trackAllocations)
int32_t nextWriteDocID
Definition: DocumentsWriter.h:481
This class accepts multiple added documents and directly writes a single segment file....
Definition: DocumentsWriter.h:54
boost::shared_ptr< Analyzer > AnalyzerPtr
Definition: LuceneTypes.h:20
bool setFlushPending()
Set flushPending if it is not already set and returns whether it was set. This is used by IndexWriter...
void addOpenFile(const String &name)
void message(const String &message)
File used as buffer in RAMDirectory.
Definition: RAMFile.h:15
void bytesUsed(int64_t numBytes)
int64_t numBytesAlloc
Definition: DocumentsWriter.h:180
boost::shared_ptr< WaitQueue > WaitQueuePtr
Definition: LuceneTypes.h:265
WaitQueue(const DocumentsWriterPtr &docWriter)
static const int32_t BYTE_BLOCK_SHIFT
Initial chunks size of the shared byte[] blocks used to store postings data.
Definition: DocumentsWriter.h:135
static IndexingChainPtr getDefaultIndexingChain()
boost::shared_ptr< DocFieldProcessor > DocFieldProcessorPtr
Definition: LuceneTypes.h:115
boost::weak_ptr< IndexWriter > IndexWriterWeakPtr
Definition: LuceneTypes.h:160
ByteBlockAllocator(const DocumentsWriterPtr &docWriter, int32_t blockSize)
Definition: DocumentsWriter.h:469
boost::shared_ptr< Document > DocumentPtr
Definition: LuceneTypes.h:74
void setInfoStream(const InfoStreamPtr &infoStream)
If non-null, various details of indexing are printed here.
BufferedDeletesPtr deletesInRAM
Deletes done after the last flush; these are discarded on abort.
Definition: DocumentsWriter.h:79
Definition: AbstractAllTermDocs.h:12
int32_t docID
Definition: DocumentsWriter.h:411
int32_t numDocsInRAM
Definition: DocumentsWriter.h:66
MapThreadDocumentsWriterThreadState threadBindings
Definition: DocumentsWriter.h:71
Base class for all Lucene classes.
Definition: LuceneObject.h:31
Collection< DocWriterPtr > waiting
Definition: DocumentsWriter.h:480
virtual ByteArray newBuffer(int32_t size)
Allocate bytes used from shared pool.
SimilarityPtr similarity
Definition: DocumentsWriter.h:167
boost::shared_ptr< IndexReader > IndexReaderPtr
Definition: LuceneTypes.h:157
DocumentsWriterWeakPtr _docWriter
Definition: DocumentsWriter.h:502
SegmentWriteStatePtr flushState
Definition: DocumentsWriter.h:106
int64_t waitQueuePauseBytes
Definition: DocumentsWriter.h:89
virtual ~DocumentsWriter()
Collection< DocumentsWriterThreadStatePtr > threadStates
Definition: DocumentsWriter.h:70
HashSet< String > _closedFiles
Definition: DocumentsWriter.h:172
DocumentsWriterWeakPtr _docWriter
Definition: DocumentsWriter.h:474
void bytesAllocated(int64_t numBytes)
int32_t maxFieldLength
Definition: DocumentsWriter.h:166
InfoStreamPtr infoStream
Definition: DocumentsWriter.h:367
String getSegment()
Get current segment name we are writing.
int32_t numWaiting
Definition: DocumentsWriter.h:483
SimilarityPtr similarity
Definition: DocumentsWriter.h:368
InfoStreamPtr infoStream
Definition: DocumentsWriter.h:165
Definition: DocumentsWriter.h:457
int64_t waitingBytes
Definition: DocumentsWriter.h:484
DocConsumerPtr consumer
Definition: DocumentsWriter.h:169
DocumentsWriterThreadStatePtr getThreadState(const DocumentPtr &doc, const TermPtr &delTerm)
Returns a free (idle) ThreadState that may be used for indexing this one document....
bool applyDeletes(const SegmentInfosPtr &infos)
HashSet< String > getFlushedFiles()
virtual bool testPoint(const String &name)
Only called by asserts.
boost::shared_ptr< DocumentsWriterThreadState > DocumentsWriterThreadStatePtr
Definition: LuceneTypes.h:124
int32_t getMaxBufferedDocs()
static const int32_t CHAR_BLOCK_SIZE
Definition: DocumentsWriter.h:142
This is the current indexing chain: DocConsumer / DocConsumerPerThread --> code: DocFieldProcessor / ...
Definition: DocumentsWriter.h:447
virtual ~DefaultIndexingChain()
void setMaxBufferedDocs(int32_t count)
Set max buffered docs, which means we will flush by doc count instead of by RAM usage.
static const int32_t INT_NUM_BYTE
Definition: DocumentsWriter.h:115
static const int32_t BYTE_BLOCK_NOT_MASK
Definition: DocumentsWriter.h:138
DocumentPtr doc
Definition: DocumentsWriter.h:370
int32_t getDocStoreOffset()
Returns the doc offset into the shared doc store for the current buffered docs.
AnalyzerPtr analyzer
Definition: DocumentsWriter.h:365
static const int32_t POINTER_NUM_BYTE
Definition: DocumentsWriter.h:114
int32_t flush(bool _closeDocStore)
Flush all pending docs to a new segment.
void addDeleteTerm(const TermPtr &term, int32_t docCount)
DocumentsWriterWeakPtr _docWriter
Definition: DocumentsWriter.h:361
DocumentsWriter(const DirectoryPtr &directory, const IndexWriterPtr &writer, const IndexingChainPtr &indexingChain)
WaitQueuePtr waitQueue
Definition: DocumentsWriter.h:174
void setFlushedDocCount(int32_t n)
IndexingChainPtr indexingChain
Definition: DocumentsWriter.h:157
virtual DocConsumerPtr getChain(const DocumentsWriterPtr &documentsWriter)=0
String getDocStoreSegment()
Returns the current doc store segment we are writing to.
int32_t docID
Definition: DocumentsWriter.h:369
void recycle()
Recycle the bytes used.
void recycleIntBlocks(Collection< IntArray > blocks, int32_t start, int32_t end)
PerDocBuffer(const DocumentsWriterPtr &docWriter)
DocWriterPtr next
Definition: DocumentsWriter.h:407
DocFieldProcessorPtr docFieldProcessor
Definition: DocumentsWriter.h:76
bool closed
Definition: DocumentsWriter.h:102
static const int32_t BYTES_PER_DEL_QUERY
Rough logic: HashMap has an array[Entry] with varying load factor (say 2 * POINTER)....
Definition: DocumentsWriter.h:132
static const int32_t OBJECT_HEADER_BYTES
Coarse estimates used to measure RAM usage of buffered deletes.
Definition: DocumentsWriter.h:113
int32_t getNumDocsInRAM()
Returns how many docs are currently buffered in RAM.
boost::shared_ptr< SegmentInfos > SegmentInfosPtr
Definition: LuceneTypes.h:210
boost::shared_ptr< Directory > DirectoryPtr
Definition: LuceneTypes.h:489
static const int32_t INT_BLOCK_MASK
Definition: DocumentsWriter.h:150
int32_t maxBufferedDocs
Flush @ this number of docs. If ramBufferSize is non-zero we will flush by RAM usage instead.
Definition: DocumentsWriter.h:97
DocumentsWriterWeakPtr _docWriter
Definition: DocumentsWriter.h:386
static const int32_t BYTE_BLOCK_SIZE
Definition: DocumentsWriter.h:136
boost::shared_ptr< DocConsumer > DocConsumerPtr
Definition: LuceneTypes.h:106
int32_t nextDocID
Definition: DocumentsWriter.h:65
boost::shared_ptr< InfoStream > InfoStreamPtr
Definition: LuceneTypes.h:532
String docStoreSegment
Definition: DocumentsWriter.h:59
void setMaxFieldLength(int32_t maxFieldLength)
bool hasProx()
Returns true if any of the fields in the current buffered docs have omitTermFreqAndPositions==false.
virtual void setNext(const DocWriterPtr &next)
boost::shared_ptr< Similarity > SimilarityPtr
Definition: LuceneTypes.h:435
virtual void recycleByteBlocks(Collection< ByteArray > blocks)
Collection< CharArray > freeCharBlocks
Definition: DocumentsWriter.h:109
boost::shared_ptr< BufferedDeletes > BufferedDeletesPtr
Definition: LuceneTypes.h:87
ByteBlockAllocatorPtr perDocAllocator
Definition: DocumentsWriter.h:178
bool aborting
Definition: DocumentsWriter.h:74
bool applyDeletes(const IndexReaderPtr &reader, int32_t docIDStart)
int64_t numBytesUsed
Definition: DocumentsWriter.h:181
Collection< ByteArray > freeByteBlocks
Definition: DocumentsWriter.h:509
void removeOpenFile(const String &name)
boost::shared_ptr< IndexWriter > IndexWriterPtr
Definition: LuceneTypes.h:160
void initSegmentName(bool onlyDocStore)
static const int32_t BYTES_PER_DEL_TERM
Rough logic: HashMap has an array[Entry] with varying load factor (say 2 * POINTER)....
Definition: DocumentsWriter.h:123
String maxTermPrefix
Definition: DocumentsWriter.h:371
static const int32_t INT_BLOCK_SHIFT
Initial chunks size of the shared int[] blocks used to store postings data.
Definition: DocumentsWriter.h:148
int32_t flushedDocCount
How many docs already flushed to index.
Definition: DocumentsWriter.h:100
static const int32_t MAX_THREAD_STATE
Max # ThreadState instances; if there are more threads than this they share ThreadStates.
Definition: DocumentsWriter.h:69
void setRAMBufferSizeMB(double mb)
Set how much RAM we can use before flushing.
boost::shared_ptr< IndexingChain > IndexingChainPtr
Definition: LuceneTypes.h:156
int32_t docStoreOffset
Definition: DocumentsWriter.h:63
bool flushPending
Definition: DocumentsWriter.h:162
BufferedDeletesPtr deletesFlushed
Deletes done before the last flush; these are still kept on abort.
Definition: DocumentsWriter.h:82
HashSet< String > openFiles()
Returns Collection of files in use by this instance, including any flushed segments.
clucene.sourceforge.net