Lucene++ - a full-featured, c++ search engine
API Documentation


Loading...
Searching...
No Matches
TermVectorsReader.h
Go to the documentation of this file.
1
2// Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3// Distributable under the terms of either the Apache License (Version 2.0)
4// or the GNU Lesser General Public License.
6
7#ifndef TERMVECTORSREADER_H
8#define TERMVECTORSREADER_H
9
10#include "TermVectorMapper.h"
11
12namespace Lucene {
13
14class LPPAPI TermVectorsReader : public LuceneObject {
15public:
17 TermVectorsReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fieldInfos);
18 TermVectorsReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fieldInfos,
19 int32_t readBufferSize, int32_t docStoreOffset = -1, int32_t size = 0);
21
23
24public:
26 static const int32_t FORMAT_VERSION;
27
29 static const int32_t FORMAT_VERSION2;
30
32 static const int32_t FORMAT_UTF8_LENGTH_IN_BYTES;
33
35 static const int32_t FORMAT_CURRENT;
36
38 static const int32_t FORMAT_SIZE;
39
40 static const uint8_t STORE_POSITIONS_WITH_TERMVECTOR;
41 static const uint8_t STORE_OFFSET_WITH_TERMVECTOR;
42
43protected:
45
49 int32_t _size;
50 int32_t numTotalDocs;
51
54
55 int32_t format;
56
57public:
60
63
65
69 void rawDocs(Collection<int32_t> tvdLengths, Collection<int32_t> tvfLengths, int32_t startDocID, int32_t numDocs);
70
71 void close();
72
74 int32_t size();
75
76 void get(int32_t docNum, const String& field, const TermVectorMapperPtr& mapper);
77
83 TermFreqVectorPtr get(int32_t docNum, const String& field);
84
90
91 void get(int32_t docNumber, const TermVectorMapperPtr& mapper);
92
94
95protected:
96 void ConstructReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fieldInfos, int32_t readBufferSize, int32_t docStoreOffset, int32_t size);
97
98 void seekTvx(int32_t docNum);
99
100 int32_t checkValidFormat(const IndexInputPtr& in);
101
103 Collection<String> readFields(int32_t fieldCount);
104
107
110
114 void readTermVector(const String& field, int64_t tvfPointer, const TermVectorMapperPtr& mapper);
115};
116
147
148}
149
150#endif
#define LUCENE_CLASS(Name)
Definition LuceneObject.h:24
Utility template class to handle collections that can be safely copied and shared.
Definition Collection.h:17
int32_t currentPosition
Definition TermVectorsReader.h:130
Collection< Collection< int32_t > > positions
Definition TermVectorsReader.h:128
bool storingPositions
Definition TermVectorsReader.h:132
String field
Definition TermVectorsReader.h:133
TermFreqVectorPtr materializeVector()
Construct the vector.
Collection< int32_t > termFreqs
Definition TermVectorsReader.h:127
Collection< String > terms
Definition TermVectorsReader.h:126
virtual void map(const String &term, int32_t frequency, Collection< TermVectorOffsetInfoPtr > offsets, Collection< int32_t > positions)
Map the Term Vector information into your own structure.
Collection< Collection< TermVectorOffsetInfoPtr > > offsets
Definition TermVectorsReader.h:129
bool storingOffsets
Definition TermVectorsReader.h:131
virtual void setExpectations(const String &field, int32_t numTerms, bool storeOffsets, bool storePositions)
Tell the mapper what to expect in regards to field, number of terms, offset and position storage....
TermVectorMapper(bool ignoringPositions=false, bool ignoringOffsets=false)
int32_t checkValidFormat(const IndexInputPtr &in)
FieldInfosPtr fieldInfos
Definition TermVectorsReader.h:44
IndexInputPtr getTvfStream()
Used for bulk copy when merging.
static const int32_t FORMAT_VERSION
NOTE: if you make a new format, it must be larger than the current format.
Definition TermVectorsReader.h:26
int32_t format
Definition TermVectorsReader.h:55
void readTermVector(const String &field, int64_t tvfPointer, const TermVectorMapperPtr &mapper)
void readTermVectors(Collection< String > fields, Collection< int64_t > tvfPointers, const TermVectorMapperPtr &mapper)
Collection< TermFreqVectorPtr > readTermVectors(int32_t docNum, Collection< String > fields, Collection< int64_t > tvfPointers)
static const int32_t FORMAT_CURRENT
NOTE: always change this if you switch to a new format.
Definition TermVectorsReader.h:35
TermFreqVectorPtr get(int32_t docNum, const String &field)
Retrieve the term vector for the given document and field.
static const uint8_t STORE_OFFSET_WITH_TERMVECTOR
Definition TermVectorsReader.h:41
static const uint8_t STORE_POSITIONS_WITH_TERMVECTOR
Definition TermVectorsReader.h:40
static const int32_t FORMAT_VERSION2
Changes to speed up bulk merging of term vectors.
Definition TermVectorsReader.h:29
IndexInputPtr tvd
Definition TermVectorsReader.h:47
void get(int32_t docNumber, const TermVectorMapperPtr &mapper)
void rawDocs(Collection< int32_t > tvdLengths, Collection< int32_t > tvfLengths, int32_t startDocID, int32_t numDocs)
Retrieve the length (in bytes) of the tvd and tvf entries for the next numDocs starting with startDoc...
IndexInputPtr getTvdStream()
Used for bulk copy when merging.
Collection< String > readFields(int32_t fieldCount)
Reads the String[] fields; you have to pre-seek tvd to the right point.
void seekTvx(int32_t docNum)
TermVectorsReader(const DirectoryPtr &d, const String &segment, const FieldInfosPtr &fieldInfos, int32_t readBufferSize, int32_t docStoreOffset=-1, int32_t size=0)
static const int32_t FORMAT_SIZE
The size in bytes that the FORMAT_VERSION will take up at the beginning of each file.
Definition TermVectorsReader.h:38
Collection< int64_t > readTvfPointers(int32_t fieldCount)
Reads the long[] offsets into TVF; you have to pre-seek tvx/tvd to the right point.
TermVectorsReader(const DirectoryPtr &d, const String &segment, const FieldInfosPtr &fieldInfos)
IndexInputPtr tvx
Definition TermVectorsReader.h:46
Collection< TermFreqVectorPtr > get(int32_t docNum)
Return all term vectors stored for this document or null if the could not be read in.
int32_t numTotalDocs
Definition TermVectorsReader.h:50
int32_t docStoreOffset
The docID offset where our docs begin in the index file. This will be 0 if we have our own private fi...
Definition TermVectorsReader.h:53
int32_t _size
Definition TermVectorsReader.h:49
void ConstructReader(const DirectoryPtr &d, const String &segment, const FieldInfosPtr &fieldInfos, int32_t readBufferSize, int32_t docStoreOffset, int32_t size)
void get(int32_t docNum, const String &field, const TermVectorMapperPtr &mapper)
virtual LuceneObjectPtr clone(const LuceneObjectPtr &other=LuceneObjectPtr())
Return clone of this object.
IndexInputPtr tvf
Definition TermVectorsReader.h:48
static const int32_t FORMAT_UTF8_LENGTH_IN_BYTES
Changed strings to UTF8 with length-in-bytes not length-in-chars.
Definition TermVectorsReader.h:32
Definition AbstractAllTermDocs.h:12
boost::shared_ptr< FieldInfos > FieldInfosPtr
Definition LuceneTypes.h:127
boost::shared_ptr< TermFreqVector > TermFreqVectorPtr
Definition LuceneTypes.h:237
boost::shared_ptr< IndexInput > IndexInputPtr
Definition LuceneTypes.h:493
boost::shared_ptr< Directory > DirectoryPtr
Definition LuceneTypes.h:489
boost::shared_ptr< TermVectorMapper > TermVectorMapperPtr
Definition LuceneTypes.h:254
boost::shared_ptr< LuceneObject > LuceneObjectPtr
Definition LuceneTypes.h:539

clucene.sourceforge.net