Lucene++ - a full-featured, c++ search engine
API Documentation


DirectoryReader.h
Go to the documentation of this file.
1 // Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
6 
7 #ifndef DIRECTORYREADER_H
8 #define DIRECTORYREADER_H
9 
10 #include "IndexReader.h"
11 #include "TermEnum.h"
12 #include "TermPositions.h"
13 #include "IndexCommit.h"
14 #include "SegmentMergeQueue.h"
15 
16 namespace Lucene {
17 
19 class DirectoryReader : public IndexReader {
20 public:
23 
25  DirectoryReader(const IndexWriterPtr& writer, const SegmentInfosPtr& infos, int32_t termInfosIndexDivisor);
26 
28  DirectoryReader(const DirectoryPtr& directory, const SegmentInfosPtr& infos, Collection<SegmentReaderPtr> oldReaders,
29  Collection<int32_t> oldStarts, MapStringByteArray oldNormsCache, bool readOnly,
30  bool doClone, int32_t termInfosIndexDivisor);
31 
32  virtual ~DirectoryReader();
33 
35 
36 protected:
38  bool readOnly;
45  bool stale;
47 
49 
51  Collection<int32_t> starts; // 1st docno for each segment
52  MapStringByteArray normsCache;
53  int32_t _maxDoc;
54  int32_t _numDocs;
56 
57  // Max version in index as of when we opened; this can be > our current segmentInfos version
58  // in case we were opened on a past IndexCommit
59  int64_t maxIndexVersion;
60 
61 public:
63 
64  static IndexReaderPtr open(const DirectoryPtr& directory, const IndexDeletionPolicyPtr& deletionPolicy, const IndexCommitPtr& commit, bool readOnly, int32_t termInfosIndexDivisor);
65 
66  virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr());
67  virtual LuceneObjectPtr clone(bool openReadOnly, const LuceneObjectPtr& other = LuceneObjectPtr());
68 
69  virtual IndexReaderPtr reopen();
70  virtual IndexReaderPtr reopen(bool openReadOnly);
72 
74  virtual int64_t getVersion();
75 
77  virtual Collection<TermFreqVectorPtr> getTermFreqVectors(int32_t docNumber);
78 
80  virtual TermFreqVectorPtr getTermFreqVector(int32_t docNumber, const String& field);
81 
83  virtual void getTermFreqVector(int32_t docNumber, const String& field, const TermVectorMapperPtr& mapper);
84 
86  virtual void getTermFreqVector(int32_t docNumber, const TermVectorMapperPtr& mapper);
87 
90  virtual bool isOptimized();
91 
93  virtual int32_t numDocs();
94 
96  virtual int32_t maxDoc();
97 
99  virtual DocumentPtr document(int32_t n, const FieldSelectorPtr& fieldSelector);
100 
102  virtual bool isDeleted(int32_t n);
103 
105  virtual bool hasDeletions();
106 
108  static int32_t readerIndex(int32_t n, Collection<int32_t> starts, int32_t numSubReaders);
109 
111  virtual bool hasNorms(const String& field);
112 
114  virtual ByteArray norms(const String& field);
115 
117  virtual void norms(const String& field, ByteArray norms, int32_t offset);
118 
120  virtual TermEnumPtr terms();
121 
123  virtual TermEnumPtr terms(const TermPtr& t);
124 
126  virtual int32_t docFreq(const TermPtr& t);
127 
129  virtual TermDocsPtr termDocs();
130 
133 
136  virtual void acquireWriteLock();
137 
138  void startCommit();
139  void rollbackCommit();
140 
142  virtual MapStringString getCommitUserData();
143 
145  virtual bool isCurrent();
146 
149  virtual HashSet<String> getFieldNames(FieldOption fieldOption);
150 
152 
155 
157  virtual DirectoryPtr directory();
158 
159  virtual int32_t getTermInfosIndexDivisor();
160 
162  virtual IndexCommitPtr getIndexCommit();
163 
166 
167 protected:
168  IndexReaderPtr doReopenFromWriter(bool openReadOnly, const IndexCommitPtr& commit);
169  IndexReaderPtr doReopen(bool openReadOnly, const IndexCommitPtr& commit);
170  IndexReaderPtr doReopenNoWriter(bool openReadOnly, const IndexCommitPtr& commit);
171  DirectoryReaderPtr doReopen(const SegmentInfosPtr& infos, bool doClone, bool openReadOnly);
172 
174  virtual void doDelete(int32_t docNum);
175 
177  virtual void doUndeleteAll();
178 
179  int32_t readerIndex(int32_t n);
180 
182  virtual void doSetNorm(int32_t doc, const String& field, uint8_t value);
183 
187  virtual void doCommit(MapStringString commitUserData);
188 
190  virtual void doClose();
191 
192  friend class FindSegmentsReopen;
193 };
194 
195 class MultiTermEnum : public TermEnum {
196 public:
197  MultiTermEnum(const IndexReaderPtr& topReader, Collection<IndexReaderPtr> readers, Collection<int32_t> starts, const TermPtr& t);
198  virtual ~MultiTermEnum();
199 
201 
202 protected:
203  SegmentMergeQueuePtr queue;
205  int32_t _docFreq;
206 
207 public:
209  Collection<SegmentMergeInfoPtr> matchingSegments; // null terminated array of matching segments
210 
211 public:
213  virtual bool next();
214 
216  virtual TermPtr term();
217 
219  virtual int32_t docFreq();
220 
222  virtual void close();
223 };
224 
225 class MultiTermDocs : public TermPositions, public LuceneObject {
226 public:
228  virtual ~MultiTermDocs();
229 
231 
232 protected:
233  IndexReaderWeakPtr _topReader; // used for matching TermEnum to TermDocs
237 
238  int32_t base;
239  int32_t pointer;
240 
243  MultiTermEnumPtr tenum; // the term enum used for seeking
244  int32_t matchingSegmentPos; // position into the matching segments from tenum
245  SegmentMergeInfoPtr smi; // current segment mere info
246 
247 public:
249  virtual int32_t doc();
250 
252  virtual int32_t freq();
253 
255  virtual void seek(const TermPtr& term);
256 
258  virtual void seek(const TermEnumPtr& termEnum);
259 
261  virtual bool next();
262 
265  virtual int32_t read(Collection<int32_t> docs, Collection<int32_t> freqs);
266 
268  virtual bool skipTo(int32_t target);
269 
271  virtual void close();
272 
273 protected:
274  virtual TermDocsPtr termDocs(int32_t i);
275  virtual TermDocsPtr termDocs(const IndexReaderPtr& reader);
276 };
277 
279 public:
281  virtual ~MultiTermPositions();
282 
284 
285 public:
287  virtual int32_t nextPosition();
288 
290  virtual int32_t getPayloadLength();
291 
293  virtual ByteArray getPayload(ByteArray data, int32_t offset);
294 
296  virtual bool isPayloadAvailable();
297 
298 protected:
299  virtual TermDocsPtr termDocs(const IndexReaderPtr& reader);
300 };
301 
302 class ReaderCommit : public IndexCommit {
303 public:
304  ReaderCommit(const SegmentInfosPtr& infos, const DirectoryPtr& dir);
305  virtual ~ReaderCommit();
306 
308 
309 protected:
310  String segmentsFileName;
313  int64_t generation;
314  int64_t version;
316  MapStringString userData;
317 
318 public:
319  virtual String toString();
320 
322  virtual bool isOptimized();
323 
325  virtual String getSegmentsFileName();
326 
328  virtual HashSet<String> getFileNames();
329 
331  virtual DirectoryPtr getDirectory();
332 
334  virtual int64_t getVersion();
335 
337  virtual int64_t getGeneration();
338 
339  virtual bool isDeleted();
340 
342  virtual MapStringString getUserData();
343 
344  virtual void deleteCommit();
345 };
346 
347 }
348 
349 #endif
IndexWriterWeakPtr _writer
Definition: DirectoryReader.h:39
Abstract class for enumerating terms.
Definition: TermEnum.h:18
void commit()
Commit changes resulting from delete, undeleteAll, or setNorm operations. If an exception is hit...
Definition: DirectoryReader.h:195
virtual bool isDeleted(int32_t n)
Returns true if document n has been deleted.
virtual DirectoryPtr directory()
Returns the directory this index resides in.
int32_t pointer
Definition: DirectoryReader.h:239
boost::shared_ptr< Term > TermPtr
Definition: LuceneTypes.h:233
virtual TermDocsPtr termDocs()
Returns an unpositioned TermDocs enumerator.
boost::shared_ptr< LuceneObject > LuceneObjectPtr
Definition: LuceneTypes.h:539
void _initialize(Collection< SegmentReaderPtr > subReaders)
virtual IndexCommitPtr getIndexCommit()
Return the IndexCommit that this reader has opened.
IndexReaderPtr doReopen(bool openReadOnly, const IndexCommitPtr &commit)
int32_t _numDocs
Definition: DirectoryReader.h:54
LockPtr writeLock
Definition: DirectoryReader.h:42
TermPtr _term
Definition: DirectoryReader.h:204
TermDocsPtr current
Definition: DirectoryReader.h:242
boost::shared_ptr< TermDocs > TermDocsPtr
Definition: LuceneTypes.h:236
boost::shared_ptr< TermEnum > TermEnumPtr
Definition: LuceneTypes.h:235
boost::shared_ptr< FieldSelector > FieldSelectorPtr
Definition: LuceneTypes.h:77
virtual bool isCurrent()
Check whether any new changes have occurred to the index since this reader was opened.
virtual void doClose()
Implements close.
int32_t base
Definition: DirectoryReader.h:238
Collection< int32_t > starts
Definition: DirectoryReader.h:51
int64_t version
Definition: DirectoryReader.h:314
SegmentInfosPtr segmentInfosStart
Definition: DirectoryReader.h:44
Collection< int32_t > starts
Definition: DirectoryReader.h:235
boost::shared_ptr< IndexWriter > IndexWriterPtr
Definition: LuceneTypes.h:160
Represents a single commit into an index as seen by the IndexDeletionPolicy or IndexReader.
Definition: IndexCommit.h:22
virtual int32_t numDocs()
Returns the number of documents in this index.
static IndexReaderPtr open(const DirectoryPtr &directory, const IndexDeletionPolicyPtr &deletionPolicy, const IndexCommitPtr &commit, bool readOnly, int32_t termInfosIndexDivisor)
virtual DocumentPtr document(int32_t n, const FieldSelectorPtr &fieldSelector)
Get the Document at the n&#39;th position. The FieldSelector may be used to determine what Fields to load...
DirectoryReader(const DirectoryPtr &directory, const SegmentInfosPtr &sis, const IndexDeletionPolicyPtr &deletionPolicy, bool readOnly, int32_t termInfosIndexDivisor)
Construct reading the named set of readers.
int32_t _docFreq
Definition: DirectoryReader.h:205
Definition: DirectoryReader.h:278
int32_t termInfosIndexDivisor
Definition: DirectoryReader.h:46
DirectoryPtr _directory
Definition: DirectoryReader.h:34
An IndexReader which reads indexes with multiple segments.
Definition: DirectoryReader.h:19
boost::weak_ptr< IndexWriter > IndexWriterWeakPtr
Definition: LuceneTypes.h:160
virtual Collection< TermFreqVectorPtr > getTermFreqVectors(int32_t docNumber)
Return an array of term frequency vectors for the specified document.
virtual ByteArray norms(const String &field)
Returns the byte-encoded normalization factor for the named field of every document.
boost::shared_ptr< Lock > LockPtr
Definition: LuceneTypes.h:496
bool rollbackHasChanges
Definition: DirectoryReader.h:48
boost::shared_ptr< SegmentInfos > SegmentInfosPtr
Definition: LuceneTypes.h:210
virtual IndexReaderPtr reopen()
Refreshes an IndexReader if the index has changed since this instance was (re)opened.
virtual TermEnumPtr terms()
Returns an enumeration of all the terms in the index.
Collection< SegmentMergeInfoPtr > matchingSegments
Definition: DirectoryReader.h:209
virtual void doDelete(int32_t docNum)
Implements deletion of the document numbered docNum.
MapStringString userData
Definition: DirectoryReader.h:316
Collection< SegmentReaderPtr > subReaders
Definition: DirectoryReader.h:50
TermPositions provides an interface for enumerating the <document, frequency, <position>*> tuples for...
Definition: TermPositions.h:18
virtual bool hasDeletions()
Returns true if any documents have been deleted.
boost::shared_ptr< Document > DocumentPtr
Definition: LuceneTypes.h:74
Collection< IndexReaderPtr > readers
Definition: DirectoryReader.h:234
boost::shared_ptr< IndexDeletionPolicy > IndexDeletionPolicyPtr
Definition: LuceneTypes.h:153
Definition: DirectoryReader.h:302
SegmentMergeInfoPtr smi
Definition: DirectoryReader.h:245
FieldOption
Constants describing field properties, for example used for IndexReader#getFieldNames(FieldOption).
Definition: IndexReader.h:48
boost::shared_ptr< IndexReader > IndexReaderPtr
Definition: LuceneTypes.h:157
boost::shared_ptr< Directory > DirectoryPtr
Definition: LuceneTypes.h:489
boost::shared_ptr< TermVectorMapper > TermVectorMapperPtr
Definition: LuceneTypes.h:254
virtual void doSetNorm(int32_t doc, const String &field, uint8_t value)
Implements setNorm in subclass.
Base class for all Lucene classes.
Definition: LuceneObject.h:31
IndexReaderWeakPtr _topReader
Definition: DirectoryReader.h:208
int64_t maxIndexVersion
Definition: DirectoryReader.h:59
boost::shared_ptr< TermPositions > TermPositionsPtr
Definition: LuceneTypes.h:243
virtual Collection< IndexReaderPtr > getSequentialSubReaders()
Returns the sequential sub readers that this reader is logically composed of.
virtual void doCommit(MapStringString commitUserData)
Commit changes resulting from delete, undeleteAll, or setNorm operations.
virtual MapStringString getCommitUserData()
Retrieve the String userData optionally passed to IndexWriter::commit.
bool readOnly
Definition: DirectoryReader.h:38
int32_t _maxDoc
Definition: DirectoryReader.h:53
#define LUCENE_CLASS(Name)
Definition: LuceneObject.h:24
virtual String toString()
Returns a string representation of the object.
Definition: AbstractAllTermDocs.h:12
boost::shared_ptr< IndexCommit > IndexCommitPtr
Definition: LuceneTypes.h:152
virtual void doUndeleteAll()
Implements actual undeleteAll() in subclass.
virtual TermFreqVectorPtr getTermFreqVector(int32_t docNumber, const String &field)
Return a term frequency vector for the specified document and field.
boost::weak_ptr< IndexReader > IndexReaderWeakPtr
Definition: LuceneTypes.h:157
boost::shared_ptr< DirectoryReader > DirectoryReaderPtr
Definition: LuceneTypes.h:105
Collection< TermDocsPtr > readerTermDocs
Definition: DirectoryReader.h:241
virtual bool isOptimized()
Checks is the index is optimized (if it has a single segment and no deletions). Not implemented in th...
friend class FindSegmentsReopen
Definition: DirectoryReader.h:192
int32_t matchingSegmentPos
Definition: DirectoryReader.h:244
SegmentInfosPtr segmentInfos
Definition: DirectoryReader.h:43
virtual HashSet< String > getFieldNames(FieldOption fieldOption)
Get a list of unique field names that exist in this index and have the specified field option informa...
virtual int32_t maxDoc()
Returns one greater than the largest possible document number.
MapStringByteArray normsCache
Definition: DirectoryReader.h:52
IndexReaderPtr doReopenFromWriter(bool openReadOnly, const IndexCommitPtr &commit)
void close()
Closes files associated with this index. Also saves any new deletions to disk. No other methods shoul...
bool _isOptimized
Definition: DirectoryReader.h:315
boost::shared_ptr< SegmentMergeQueue > SegmentMergeQueuePtr
Definition: LuceneTypes.h:213
int64_t generation
Definition: DirectoryReader.h:313
virtual int32_t getTermInfosIndexDivisor()
For IndexReader implementations that use TermInfosReader to read terms, this returns the current inde...
bool _hasDeletions
Definition: DirectoryReader.h:55
IndexReader is an abstract class, providing an interface for accessing an index. Search of an index i...
Definition: IndexReader.h:39
IndexReaderPtr doReopenNoWriter(bool openReadOnly, const IndexCommitPtr &commit)
DirectoryPtr dir
Definition: DirectoryReader.h:312
virtual LuceneObjectPtr clone(const LuceneObjectPtr &other=LuceneObjectPtr())
Efficiently clones the IndexReader (sharing most internal state).
MultiTermEnumPtr tenum
Definition: DirectoryReader.h:243
bool stale
Definition: DirectoryReader.h:45
Definition: DirectoryReader.h:225
HashSet< String > synced
Definition: DirectoryReader.h:41
virtual void acquireWriteLock()
Tries to acquire the WriteLock on this directory. this method is only valid if this IndexReader is di...
static int32_t readerIndex(int32_t n, Collection< int32_t > starts, int32_t numSubReaders)
Find reader for doc n.
TermPtr term
Definition: DirectoryReader.h:236
IndexDeletionPolicyPtr deletionPolicy
Definition: DirectoryReader.h:40
boost::shared_ptr< MultiTermEnum > MultiTermEnumPtr
Definition: LuceneTypes.h:183
virtual TermPositionsPtr termPositions()
Returns an unpositioned TermPositions enumerator.
static Collection< IndexCommitPtr > listCommits(const DirectoryPtr &dir)
Returns all commit points that exist in the Directory.
virtual bool hasNorms(const String &field)
Returns true if there are norms stored for this field.
virtual int64_t getVersion()
Version number when this IndexReader was opened.
virtual int32_t docFreq(const TermPtr &t)
Returns the number of documents containing the term t.
boost::shared_ptr< TermFreqVector > TermFreqVectorPtr
Definition: LuceneTypes.h:237
boost::shared_ptr< SegmentMergeInfo > SegmentMergeInfoPtr
Definition: LuceneTypes.h:212
HashSet< String > files
Definition: DirectoryReader.h:311

clucene.sourceforge.net