You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ro...@apache.org on 2015/02/06 17:39:49 UTC
svn commit: r1657890 [2/10] - in /lucene/dev/branches/branch_5x: ./
dev-tools/ dev-tools/idea/lucene/highlighter/ lucene/ lucene/analysis/
lucene/analysis/common/src/test/org/apache/lucene/analysis/core/
lucene/analysis/common/src/test/org/apache/lucen...
Modified: lucene/dev/branches/branch_5x/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java?rev=1657890&r1=1657889&r2=1657890&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (original)
+++ lucene/dev/branches/branch_5x/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java Fri Feb 6 16:39:45 2015
@@ -915,7 +915,7 @@ public class TestBackwardsCompatibility
writer.addDocument(doc);
}
- private int countDocs(DocsEnum docs) throws IOException {
+ private int countDocs(PostingsEnum docs) throws IOException {
int count = 0;
while((docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
count ++;
@@ -941,7 +941,7 @@ public class TestBackwardsCompatibility
// should be found exactly
assertEquals(TermsEnum.SeekStatus.FOUND,
terms.seekCeil(aaaTerm));
- assertEquals(35, countDocs(TestUtil.docs(random(), terms, null, null, DocsEnum.FLAG_NONE)));
+ assertEquals(35, countDocs(TestUtil.docs(random(), terms, null, null, PostingsEnum.FLAG_NONE)));
assertNull(terms.next());
// should hit end of field
@@ -953,12 +953,12 @@ public class TestBackwardsCompatibility
assertEquals(TermsEnum.SeekStatus.NOT_FOUND,
terms.seekCeil(new BytesRef("a")));
assertTrue(terms.term().bytesEquals(aaaTerm));
- assertEquals(35, countDocs(TestUtil.docs(random(), terms, null, null, DocsEnum.FLAG_NONE)));
+ assertEquals(35, countDocs(TestUtil.docs(random(), terms, null, null, PostingsEnum.FLAG_NONE)));
assertNull(terms.next());
assertEquals(TermsEnum.SeekStatus.FOUND,
terms.seekCeil(aaaTerm));
- assertEquals(35, countDocs(TestUtil.docs(random(), terms, null, null, DocsEnum.FLAG_NONE)));
+ assertEquals(35, countDocs(TestUtil.docs(random(), terms, null, null, PostingsEnum.FLAG_NONE)));
assertNull(terms.next());
r.close();
Modified: lucene/dev/branches/branch_5x/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java?rev=1657890&r1=1657889&r2=1657890&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (original)
+++ lucene/dev/branches/branch_5x/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java Fri Feb 6 16:39:45 2015
@@ -40,7 +40,7 @@ import org.apache.lucene.benchmark.byTas
import org.apache.lucene.collation.CollationKeyAnalyzer;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.DirectoryReader;
-import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
@@ -497,9 +497,9 @@ public class TestPerfTasksLogic extends
continue;
}
TermsEnum termsEnum = terms.iterator(null);
- DocsEnum docs = null;
+ PostingsEnum docs = null;
while(termsEnum.next() != null) {
- docs = TestUtil.docs(random(), termsEnum, MultiFields.getLiveDocs(reader), docs, DocsEnum.FLAG_FREQS);
+ docs = TestUtil.docs(random(), termsEnum, MultiFields.getLiveDocs(reader), docs, PostingsEnum.FLAG_FREQS);
while(docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
totalTokenCount2 += docs.freq();
}
Modified: lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java?rev=1657890&r1=1657889&r2=1657890&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java (original)
+++ lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java Fri Feb 6 16:39:45 2015
@@ -30,8 +30,7 @@ import org.apache.lucene.codecs.CodecUti
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.index.CorruptIndexException;
-import org.apache.lucene.index.DocsAndPositionsEnum;
-import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexOptions;
@@ -657,22 +656,19 @@ public class BlockTermsReader extends Fi
}
@Override
- public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
- //System.out.println("BTR.docs this=" + this);
- decodeMetaData();
- //System.out.println("BTR.docs: state.docFreq=" + state.docFreq);
- return postingsReader.docs(fieldInfo, state, liveDocs, reuse, flags);
- }
+ public PostingsEnum postings(Bits liveDocs, PostingsEnum reuse, int flags) throws IOException {
- @Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
- if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
- // Positions were not indexed:
- return null;
+ if (PostingsEnum.requiresPositions(flags)) {
+ if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
+ // Positions were not indexed:
+ return null;
+ }
}
+ //System.out.println("BTR.docs this=" + this);
decodeMetaData();
- return postingsReader.docsAndPositions(fieldInfo, state, liveDocs, reuse, flags);
+ //System.out.println("BTR.docs: state.docFreq=" + state.docFreq);
+ return postingsReader.postings(fieldInfo, state, liveDocs, reuse, flags);
}
@Override
Modified: lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnum.java?rev=1657890&r1=1657889&r2=1657890&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnum.java (original)
+++ lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnum.java Fri Feb 6 16:39:45 2015
@@ -20,8 +20,7 @@ package org.apache.lucene.codecs.blocktr
import java.io.IOException;
import org.apache.lucene.codecs.blocktreeords.FSTOrdsOutputs.Output;
-import org.apache.lucene.index.DocsAndPositionsEnum;
-import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermsEnum;
@@ -203,20 +202,17 @@ final class OrdsIntersectTermsEnum exten
}
@Override
- public DocsEnum docs(Bits skipDocs, DocsEnum reuse, int flags) throws IOException {
- currentFrame.decodeMetaData();
- return fr.parent.postingsReader.docs(fr.fieldInfo, currentFrame.termState, skipDocs, reuse, flags);
- }
+ public PostingsEnum postings(Bits skipDocs, PostingsEnum reuse, int flags) throws IOException {
- @Override
- public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
- if (fr.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
- // Positions were not indexed:
- return null;
+ if (PostingsEnum.requiresPositions(flags)) {
+ if (fr.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
+ // Positions were not indexed:
+ return null;
+ }
}
currentFrame.decodeMetaData();
- return fr.parent.postingsReader.docsAndPositions(fr.fieldInfo, currentFrame.termState, skipDocs, reuse, flags);
+ return fr.parent.postingsReader.postings(fr.fieldInfo, currentFrame.termState, skipDocs, reuse, flags);
}
private int getState() {
Modified: lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnum.java?rev=1657890&r1=1657889&r2=1657890&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnum.java (original)
+++ lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnum.java Fri Feb 6 16:39:45 2015
@@ -25,8 +25,7 @@ import java.io.PrintStream;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.blocktreeords.FSTOrdsOutputs.Output;
-import org.apache.lucene.index.DocsAndPositionsEnum;
-import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermsEnum;
@@ -924,7 +923,15 @@ public final class OrdsSegmentTermsEnum
}
@Override
- public DocsEnum docs(Bits skipDocs, DocsEnum reuse, int flags) throws IOException {
+ public PostingsEnum postings(Bits skipDocs, PostingsEnum reuse, int flags) throws IOException {
+
+ if (PostingsEnum.requiresPositions(flags)) {
+ if (fr.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
+ // Positions were not indexed:
+ return null;
+ }
+ }
+
assert !eof;
//if (DEBUG) {
//System.out.println("BTTR.docs seg=" + segment);
@@ -933,19 +940,7 @@ public final class OrdsSegmentTermsEnum
//if (DEBUG) {
//System.out.println(" state=" + currentFrame.state);
//}
- return fr.parent.postingsReader.docs(fr.fieldInfo, currentFrame.state, skipDocs, reuse, flags);
- }
-
- @Override
- public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
- if (fr.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
- // Positions were not indexed:
- return null;
- }
-
- assert !eof;
- currentFrame.decodeMetaData();
- return fr.parent.postingsReader.docsAndPositions(fr.fieldInfo, currentFrame.state, skipDocs, reuse, flags);
+ return fr.parent.postingsReader.postings(fr.fieldInfo, currentFrame.state, skipDocs, reuse, flags);
}
@Override
Modified: lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java?rev=1657890&r1=1657889&r2=1657890&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java (original)
+++ lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java Fri Feb 6 16:39:45 2015
@@ -32,8 +32,7 @@ import org.apache.lucene.codecs.FieldsCo
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.bloom.FuzzySet.ContainsResult;
-import org.apache.lucene.index.DocsAndPositionsEnum;
-import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexFileNames;
@@ -382,19 +381,13 @@ public final class BloomFilteringPosting
public long totalTermFreq() throws IOException {
return delegate().totalTermFreq();
}
-
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs,
- DocsAndPositionsEnum reuse, int flags) throws IOException {
- return delegate().docsAndPositions(liveDocs, reuse, flags);
- }
-
- @Override
- public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags)
+ public PostingsEnum postings(Bits liveDocs, PostingsEnum reuse, int flags)
throws IOException {
- return delegate().docs(liveDocs, reuse, flags);
+ return delegate().postings(liveDocs, reuse, flags);
}
+
}
@Override
@@ -460,7 +453,7 @@ public final class BloomFilteringPosting
FuzzySet bloomFilter = null;
- DocsEnum docsEnum = null;
+ PostingsEnum postingsEnum = null;
while (true) {
BytesRef term = termsEnum.next();
if (term == null) {
@@ -476,8 +469,8 @@ public final class BloomFilteringPosting
bloomFilters.put(fieldInfo, bloomFilter);
}
// Make sure there's at least one doc for this term:
- docsEnum = termsEnum.docs(null, docsEnum, 0);
- if (docsEnum.nextDoc() != DocsEnum.NO_MORE_DOCS) {
+ postingsEnum = termsEnum.postings(null, postingsEnum, 0);
+ if (postingsEnum.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
bloomFilter.addValue(term);
}
}
Modified: lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java?rev=1657890&r1=1657889&r2=1657890&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java (original)
+++ lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java Fri Feb 6 16:39:45 2015
@@ -27,13 +27,13 @@ import java.util.TreeMap;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsFormat;
-import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat; // javadocs
-import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.OrdTermState;
+import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.TermState;
@@ -51,7 +51,7 @@ import org.apache.lucene.util.automaton.
import org.apache.lucene.util.automaton.RunAutomaton;
import org.apache.lucene.util.automaton.Transition;
-// TODO:
+// TODO:
// - build depth-N prefix hash?
// - or: longer dense skip lists than just next byte?
@@ -62,7 +62,7 @@ import org.apache.lucene.util.automaton.
* <p><b>WARNING</b>: This is
* exceptionally RAM intensive: it makes no effort to
* compress the postings data, storing terms as separate
- * byte[] and postings as separate int[], but as a result it
+ * byte[] and postings as separate int[], but as a result it
* gives substantial increase in search performance.
*
* <p>This postings format supports {@link TermsEnum#ord}
@@ -89,7 +89,7 @@ public final class DirectPostingsFormat
public DirectPostingsFormat() {
this(DEFAULT_MIN_SKIP_COUNT, DEFAULT_LOW_FREQ_CUTOFF);
}
-
+
/** minSkipCount is how many terms in a row must have the
* same prefix before we put a skip pointer down. Terms
* with docFreq <= lowFreqCutoff will use a single int[]
@@ -100,7 +100,7 @@ public final class DirectPostingsFormat
this.minSkipCount = minSkipCount;
this.lowFreqCutoff = lowFreqCutoff;
}
-
+
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
return PostingsFormat.forName("Lucene50").fieldsConsumer(state);
@@ -161,7 +161,7 @@ public final class DirectPostingsFormat
}
return sizeInBytes;
}
-
+
@Override
public Collection<Accountable> getChildResources() {
return Accountables.namedAccountables("field", fields);
@@ -206,7 +206,7 @@ public final class DirectPostingsFormat
@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES_USED +
- ((postings!=null) ? RamUsageEstimator.sizeOf(postings) : 0) +
+ ((postings!=null) ? RamUsageEstimator.sizeOf(postings) : 0) +
((payloads!=null) ? RamUsageEstimator.sizeOf(payloads) : 0);
}
@@ -214,6 +214,7 @@ public final class DirectPostingsFormat
public Collection<Accountable> getChildResources() {
return Collections.emptyList();
}
+
}
// TODO: maybe specialize into prx/no-prx/no-frq cases?
@@ -237,36 +238,37 @@ public final class DirectPostingsFormat
@Override
public long ramBytesUsed() {
- long sizeInBytes = BASE_RAM_BYTES_USED;
- sizeInBytes += (docIDs!=null)? RamUsageEstimator.sizeOf(docIDs) : 0;
- sizeInBytes += (freqs!=null)? RamUsageEstimator.sizeOf(freqs) : 0;
-
- if(positions != null) {
- sizeInBytes += RamUsageEstimator.shallowSizeOf(positions);
- for(int[] position : positions) {
- sizeInBytes += (position!=null) ? RamUsageEstimator.sizeOf(position) : 0;
- }
- }
-
- if (payloads != null) {
- sizeInBytes += RamUsageEstimator.shallowSizeOf(payloads);
- for(byte[][] payload : payloads) {
- if(payload != null) {
- sizeInBytes += RamUsageEstimator.shallowSizeOf(payload);
- for(byte[] pload : payload) {
- sizeInBytes += (pload!=null) ? RamUsageEstimator.sizeOf(pload) : 0;
- }
- }
- }
- }
-
- return sizeInBytes;
+ long sizeInBytes = BASE_RAM_BYTES_USED;
+ sizeInBytes += (docIDs!=null)? RamUsageEstimator.sizeOf(docIDs) : 0;
+ sizeInBytes += (freqs!=null)? RamUsageEstimator.sizeOf(freqs) : 0;
+
+ if(positions != null) {
+ sizeInBytes += RamUsageEstimator.shallowSizeOf(positions);
+ for(int[] position : positions) {
+ sizeInBytes += (position!=null) ? RamUsageEstimator.sizeOf(position) : 0;
+ }
+ }
+
+ if (payloads != null) {
+ sizeInBytes += RamUsageEstimator.shallowSizeOf(payloads);
+ for(byte[][] payload : payloads) {
+ if(payload != null) {
+ sizeInBytes += RamUsageEstimator.shallowSizeOf(payload);
+ for(byte[] pload : payload) {
+ sizeInBytes += (pload!=null) ? RamUsageEstimator.sizeOf(pload) : 0;
+ }
+ }
+ }
+ }
+
+ return sizeInBytes;
}
@Override
public Collection<Accountable> getChildResources() {
return Collections.emptyList();
}
+
}
private final byte[] termBytes;
@@ -323,7 +325,7 @@ public final class DirectPostingsFormat
}
terms = new TermAndSkip[numTerms];
termOffsets = new int[1+numTerms];
-
+
byte[] termBytes = new byte[1024];
this.minSkipCount = minSkipCount;
@@ -334,8 +336,8 @@ public final class DirectPostingsFormat
hasPayloads = fieldInfo.hasPayloads();
BytesRef term;
- DocsEnum docsEnum = null;
- DocsAndPositionsEnum docsAndPositionsEnum = null;
+ PostingsEnum postingsEnum = null;
+ PostingsEnum docsAndPositionsEnum = null;
final TermsEnum termsEnum = termsIn.iterator(null);
int termOffset = 0;
@@ -366,18 +368,18 @@ public final class DirectPostingsFormat
termOffsets[count+1] = termOffset;
if (hasPos) {
- docsAndPositionsEnum = termsEnum.docsAndPositions(null, docsAndPositionsEnum);
+ docsAndPositionsEnum = termsEnum.postings(null, docsAndPositionsEnum, PostingsEnum.FLAG_ALL);
} else {
- docsEnum = termsEnum.docs(null, docsEnum);
+ postingsEnum = termsEnum.postings(null, postingsEnum);
}
final TermAndSkip ent;
- final DocsEnum docsEnum2;
+ final PostingsEnum postingsEnum2;
if (hasPos) {
- docsEnum2 = docsAndPositionsEnum;
+ postingsEnum2 = docsAndPositionsEnum;
} else {
- docsEnum2 = docsEnum;
+ postingsEnum2 = postingsEnum;
}
int docID;
@@ -387,10 +389,10 @@ public final class DirectPostingsFormat
ros.reset();
// Pack postings for low-freq terms into a single int[]:
- while ((docID = docsEnum2.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
+ while ((docID = postingsEnum2.nextDoc()) != PostingsEnum.NO_MORE_DOCS) {
scratch.add(docID);
if (hasFreq) {
- final int freq = docsEnum2.freq();
+ final int freq = postingsEnum2.freq();
scratch.add(freq);
if (hasPos) {
for(int pos=0;pos<freq;pos++) {
@@ -422,7 +424,7 @@ public final class DirectPostingsFormat
}
final int[] postings = scratch.get();
-
+
ent = new LowFreqTerm(postings, payloads, docFreq, (int) totalTermFreq);
} else {
final int[] docs = new int[docFreq];
@@ -451,10 +453,10 @@ public final class DirectPostingsFormat
// Use separate int[] for the postings for high-freq
// terms:
int upto = 0;
- while ((docID = docsEnum2.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
+ while ((docID = postingsEnum2.nextDoc()) != PostingsEnum.NO_MORE_DOCS) {
docs[upto] = docID;
if (hasFreq) {
- final int freq = docsEnum2.freq();
+ final int freq = postingsEnum2.freq();
freqs[upto] = freq;
if (hasPos) {
final int mult;
@@ -534,14 +536,14 @@ public final class DirectPostingsFormat
sizeInBytes += ((skips!=null) ? RamUsageEstimator.sizeOf(skips) : 0);
sizeInBytes += ((skipOffsets!=null) ? RamUsageEstimator.sizeOf(skipOffsets) : 0);
sizeInBytes += ((sameCounts!=null) ? RamUsageEstimator.sizeOf(sameCounts) : 0);
-
+
if(terms!=null) {
sizeInBytes += RamUsageEstimator.shallowSizeOf(terms);
for(TermAndSkip termAndSkip : terms) {
sizeInBytes += (termAndSkip!=null) ? termAndSkip.ramBytesUsed() : 0;
}
}
-
+
return sizeInBytes;
}
@@ -562,7 +564,7 @@ public final class DirectPostingsFormat
int upto = termOffsets[ord];
final int termLen = termOffsets[1+ord] - upto;
int otherUpto = other.offset;
-
+
final int stop = upto + Math.min(termLen, other.length);
while (upto < stop) {
int diff = (termBytes[upto++] & 0xFF) - (otherBytes[otherUpto++] & 0xFF);
@@ -570,7 +572,7 @@ public final class DirectPostingsFormat
return diff;
}
}
-
+
// One is a prefix of the other, or, they are equal:
return termLen - other.length;
}
@@ -722,7 +724,7 @@ public final class DirectPostingsFormat
public boolean hasPositions() {
return hasPos;
}
-
+
@Override
public boolean hasPayloads() {
return hasPayloads;
@@ -870,10 +872,26 @@ public final class DirectPostingsFormat
}
@Override
- public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) {
+ public PostingsEnum postings(Bits liveDocs, PostingsEnum reuse, int flags) throws IOException {
// TODO: implement reuse
// it's hairy!
+ if ((flags & PostingsEnum.FLAG_POSITIONS) >= PostingsEnum.FLAG_POSITIONS) {
+ if (!hasPos) {
+ return null;
+ }
+
+ if (terms[termOrd] instanceof LowFreqTerm) {
+ final LowFreqTerm term = ((LowFreqTerm) terms[termOrd]);
+ final int[] postings = term.postings;
+ final byte[] payloads = term.payloads;
+ return new LowFreqPostingsEnum(liveDocs, hasOffsets, hasPayloads).reset(postings, payloads);
+ } else {
+ final HighFreqTerm term = (HighFreqTerm) terms[termOrd];
+ return new HighFreqPostingsEnum(liveDocs, hasOffsets).reset(term.docIDs, term.freqs, term.positions, term.payloads);
+ }
+ }
+
if (terms[termOrd] instanceof LowFreqTerm) {
final int[] postings = ((LowFreqTerm) terms[termOrd]).postings;
if (hasFreq) {
@@ -942,25 +960,6 @@ public final class DirectPostingsFormat
}
}
- @Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) {
- if (!hasPos) {
- return null;
- }
-
- // TODO: implement reuse
- // it's hairy!
-
- if (terms[termOrd] instanceof LowFreqTerm) {
- final LowFreqTerm term = ((LowFreqTerm) terms[termOrd]);
- final int[] postings = term.postings;
- final byte[] payloads = term.payloads;
- return new LowFreqDocsAndPositionsEnum(liveDocs, hasOffsets, hasPayloads).reset(postings, payloads);
- } else {
- final HighFreqTerm term = (HighFreqTerm) terms[termOrd];
- return new HighFreqDocsAndPositionsEnum(liveDocs, hasOffsets).reset(term.docIDs, term.freqs, term.positions, term.payloads);
- }
- }
}
private final class DirectIntersectTermsEnum extends TermsEnum {
@@ -1218,7 +1217,7 @@ public final class DirectPostingsFormat
// if (DEBUG) {
// System.out.println(" term=" + new BytesRef(termBytes, termOffset, termLength).utf8ToString() + " skips=" + Arrays.toString(skips));
// }
-
+
assert termOrd < state.changeOrd;
assert stateUpto <= termLength: "term.length=" + termLength + "; stateUpto=" + stateUpto;
@@ -1351,7 +1350,7 @@ public final class DirectPostingsFormat
compiledAutomaton.automaton.initTransition(nextState, states[stateUpto].transition);
states[stateUpto].transitionUpto = -1;
states[stateUpto].transitionMax = -1;
-
+
if (stateUpto == termLength) {
// if (DEBUG) {
// System.out.println(" term ends after push");
@@ -1468,9 +1467,23 @@ public final class DirectPostingsFormat
}
@Override
- public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) {
+ public PostingsEnum postings(Bits liveDocs, PostingsEnum reuse, int flags) {
// TODO: implement reuse
// it's hairy!
+ if ((flags & PostingsEnum.FLAG_POSITIONS) >= PostingsEnum.FLAG_POSITIONS) {
+ if (!hasPos) {
+ return null;
+ }
+ if (terms[termOrd] instanceof LowFreqTerm) {
+ final LowFreqTerm term = ((LowFreqTerm) terms[termOrd]);
+ final int[] postings = term.postings;
+ final byte[] payloads = term.payloads;
+ return new LowFreqPostingsEnum(liveDocs, hasOffsets, hasPayloads).reset(postings, payloads);
+ } else {
+ final HighFreqTerm term = (HighFreqTerm) terms[termOrd];
+ return new HighFreqPostingsEnum(liveDocs, hasOffsets).reset(term.docIDs, term.freqs, term.positions, term.payloads);
+ }
+ }
if (terms[termOrd] instanceof LowFreqTerm) {
final int[] postings = ((LowFreqTerm) terms[termOrd]).postings;
@@ -1500,26 +1513,6 @@ public final class DirectPostingsFormat
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) {
- if (!hasPos) {
- return null;
- }
-
- // TODO: implement reuse
- // it's hairy!
-
- if (terms[termOrd] instanceof LowFreqTerm) {
- final LowFreqTerm term = ((LowFreqTerm) terms[termOrd]);
- final int[] postings = term.postings;
- final byte[] payloads = term.payloads;
- return new LowFreqDocsAndPositionsEnum(liveDocs, hasOffsets, hasPayloads).reset(postings, payloads);
- } else {
- final HighFreqTerm term = (HighFreqTerm) terms[termOrd];
- return new HighFreqDocsAndPositionsEnum(liveDocs, hasOffsets).reset(term.docIDs, term.freqs, term.positions, term.payloads);
- }
- }
-
- @Override
public SeekStatus seekCeil(BytesRef term) {
throw new UnsupportedOperationException();
}
@@ -1545,7 +1538,7 @@ public final class DirectPostingsFormat
return liveDocs == this.liveDocs;
}
- public DocsEnum reset(int[] postings) {
+ public PostingsEnum reset(int[] postings) {
this.postings = postings;
upto = -1;
return this;
@@ -1588,12 +1581,18 @@ public final class DirectPostingsFormat
}
@Override
+ public int nextPosition() throws IOException {
+ assert false : "Shouldn't be calling nextPosition() if you haven't asked for positions";
+ return -1;
+ }
+
+ @Override
public int advance(int target) throws IOException {
// Linear scan, but this is low-freq term so it won't
// be costly:
return slowAdvance(target);
}
-
+
@Override
public long cost() {
return postings.length;
@@ -1614,7 +1613,7 @@ public final class DirectPostingsFormat
return liveDocs == this.liveDocs;
}
- public DocsEnum reset(int[] postings) {
+ public PostingsEnum reset(int[] postings) {
this.postings = postings;
upto = -2;
return this;
@@ -1656,12 +1655,18 @@ public final class DirectPostingsFormat
}
@Override
+ public int nextPosition() throws IOException {
+ assert false : "Shouldn't be calling nextPosition() if you haven't asked for positions";
+ return -1;
+ }
+
+ @Override
public int advance(int target) throws IOException {
// Linear scan, but this is low-freq term so it won't
// be costly:
return slowAdvance(target);
}
-
+
@Override
public long cost() {
return postings.length / 2;
@@ -1688,7 +1693,7 @@ public final class DirectPostingsFormat
return liveDocs == this.liveDocs && posMult == this.posMult;
}
- public DocsEnum reset(int[] postings) {
+ public PostingsEnum reset(int[] postings) {
this.postings = postings;
upto = -2;
freq = 0;
@@ -1703,7 +1708,7 @@ public final class DirectPostingsFormat
// System.out.println(" nextDoc freq=" + freq + " upto=" + upto + " vs " + postings.length);
// }
if (liveDocs == null) {
- if (upto < postings.length) {
+ if (upto < postings.length) {
freq = postings[upto+1];
assert freq > 0;
return postings[upto];
@@ -1740,12 +1745,18 @@ public final class DirectPostingsFormat
}
@Override
+ public int nextPosition() throws IOException {
+ assert false : "should be using LowFreqDocsAndPositionsEnum";
+ return -1;
+ }
+
+ @Override
public int advance(int target) throws IOException {
// Linear scan, but this is low-freq term so it won't
// be costly:
return slowAdvance(target);
}
-
+
@Override
public long cost() {
// TODO: could do a better estimate
@@ -1753,7 +1764,7 @@ public final class DirectPostingsFormat
}
}
- private final static class LowFreqDocsAndPositionsEnum extends DocsAndPositionsEnum {
+ private final static class LowFreqPostingsEnum extends PostingsEnum {
private int[] postings;
private final Bits liveDocs;
private final int posMult;
@@ -1764,6 +1775,7 @@ public final class DirectPostingsFormat
private int docID;
private int freq;
private int skipPositions;
+ private int pos;
private int startOffset;
private int endOffset;
private int lastPayloadOffset;
@@ -1771,7 +1783,7 @@ public final class DirectPostingsFormat
private int payloadLength;
private byte[] payloadBytes;
- public LowFreqDocsAndPositionsEnum(Bits liveDocs, boolean hasOffsets, boolean hasPayloads) {
+ public LowFreqPostingsEnum(Bits liveDocs, boolean hasOffsets, boolean hasPayloads) {
this.liveDocs = liveDocs;
this.hasOffsets = hasOffsets;
this.hasPayloads = hasPayloads;
@@ -1788,10 +1800,11 @@ public final class DirectPostingsFormat
}
}
- public DocsAndPositionsEnum reset(int[] postings, byte[] payloadBytes) {
+ public PostingsEnum reset(int[] postings, byte[] payloadBytes) {
this.postings = postings;
upto = 0;
skipPositions = 0;
+ pos = -1;
startOffset = -1;
endOffset = -1;
docID = -1;
@@ -1802,6 +1815,7 @@ public final class DirectPostingsFormat
@Override
public int nextDoc() {
+ pos = -1;
if (hasPayloads) {
for(int i=0;i<skipPositions;i++) {
upto++;
@@ -1860,7 +1874,7 @@ public final class DirectPostingsFormat
public int nextPosition() {
assert skipPositions > 0;
skipPositions--;
- final int pos = postings[upto++];
+ pos = postings[upto++];
if (hasOffsets) {
startOffset = postings[upto++];
endOffset = postings[upto++];
@@ -1899,7 +1913,7 @@ public final class DirectPostingsFormat
return null;
}
}
-
+
@Override
public long cost() {
// TODO: could do a better estimate
@@ -1931,7 +1945,7 @@ public final class DirectPostingsFormat
return freqs;
}
- public DocsEnum reset(int[] docIDs, int[] freqs) {
+ public PostingsEnum reset(int[] docIDs, int[] freqs) {
this.docIDs = docIDs;
this.freqs = freqs;
docID = upto = -1;
@@ -2078,7 +2092,7 @@ public final class DirectPostingsFormat
return docID = docIDs[upto];
}
}
-
+
@Override
public long cost() {
return docIDs.length;
@@ -2086,7 +2100,7 @@ public final class DirectPostingsFormat
}
// TODO: specialize offsets and not
- private final static class HighFreqDocsAndPositionsEnum extends DocsAndPositionsEnum {
+ private final static class HighFreqPostingsEnum extends PostingsEnum {
private int[] docIDs;
private int[] freqs;
private int[][] positions;
@@ -2099,7 +2113,7 @@ public final class DirectPostingsFormat
private int posUpto;
private int[] curPositions;
- public HighFreqDocsAndPositionsEnum(Bits liveDocs, boolean hasOffsets) {
+ public HighFreqPostingsEnum(Bits liveDocs, boolean hasOffsets) {
this.liveDocs = liveDocs;
this.hasOffsets = hasOffsets;
posJump = hasOffsets ? 3 : 1;
@@ -2121,7 +2135,7 @@ public final class DirectPostingsFormat
return liveDocs;
}
- public DocsAndPositionsEnum reset(int[] docIDs, int[] freqs, int[][] positions, byte[][][] payloads) {
+ public PostingsEnum reset(int[] docIDs, int[] freqs, int[][] positions, byte[][][] payloads) {
this.docIDs = docIDs;
this.freqs = freqs;
this.positions = positions;
@@ -2135,7 +2149,7 @@ public final class DirectPostingsFormat
upto++;
if (liveDocs == null) {
if (upto < docIDs.length) {
- posUpto = -posJump;
+ posUpto = -posJump;
curPositions = positions[upto];
return docID = docIDs[upto];
}
@@ -2166,6 +2180,7 @@ public final class DirectPostingsFormat
@Override
public int nextPosition() {
posUpto += posJump;
+ assert posUpto < curPositions.length;
return curPositions[posUpto];
}
@@ -2316,7 +2331,7 @@ public final class DirectPostingsFormat
return payload;
}
}
-
+
@Override
public long cost() {
return docIDs.length;
Modified: lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java?rev=1657890&r1=1657889&r2=1657890&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java (original)
+++ lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java Fri Feb 6 16:39:45 2015
@@ -32,8 +32,7 @@ import org.apache.lucene.codecs.CodecUti
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.index.CorruptIndexException;
-import org.apache.lucene.index.DocsAndPositionsEnum;
-import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
@@ -428,18 +427,9 @@ public class FSTOrdTermsReader extends F
}
@Override
- public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
+ public PostingsEnum postings(Bits liveDocs, PostingsEnum reuse, int flags) throws IOException {
decodeMetaData();
- return postingsReader.docs(fieldInfo, state, liveDocs, reuse, flags);
- }
-
- @Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
- if (!hasPositions()) {
- return null;
- }
- decodeMetaData();
- return postingsReader.docsAndPositions(fieldInfo, state, liveDocs, reuse, flags);
+ return postingsReader.postings(fieldInfo, state, liveDocs, reuse, flags);
}
// TODO: this can be achieved by making use of Util.getByOutput()
Modified: lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java?rev=1657890&r1=1657889&r2=1657890&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java (original)
+++ lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java Fri Feb 6 16:39:45 2015
@@ -31,8 +31,7 @@ import org.apache.lucene.codecs.CodecUti
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.index.CorruptIndexException;
-import org.apache.lucene.index.DocsAndPositionsEnum;
-import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
@@ -291,18 +290,9 @@ public class FSTTermsReader extends Fiel
}
@Override
- public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
+ public PostingsEnum postings(Bits liveDocs, PostingsEnum reuse, int flags) throws IOException {
decodeMetaData();
- return postingsReader.docs(fieldInfo, state, liveDocs, reuse, flags);
- }
-
- @Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
- if (!hasPositions()) {
- return null;
- }
- decodeMetaData();
- return postingsReader.docsAndPositions(fieldInfo, state, liveDocs, reuse, flags);
+ return postingsReader.postings(fieldInfo, state, liveDocs, reuse, flags);
}
@Override
Modified: lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java?rev=1657890&r1=1657889&r2=1657890&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java Fri Feb 6 16:39:45 2015
@@ -31,8 +31,7 @@ import org.apache.lucene.codecs.DocValue
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValues;
-import org.apache.lucene.index.DocsAndPositionsEnum;
-import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
@@ -893,13 +892,9 @@ class MemoryDocValuesProducer extends Do
}
@Override
- public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
+ public PostingsEnum postings(Bits liveDocs, PostingsEnum reuse, int flags) throws IOException {
throw new UnsupportedOperationException();
}
- @Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
- throw new UnsupportedOperationException();
- }
}
}
Modified: lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java?rev=1657890&r1=1657889&r2=1657890&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java (original)
+++ lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java Fri Feb 6 16:39:45 2015
@@ -31,8 +31,8 @@ import org.apache.lucene.codecs.FieldsPr
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.TermStats;
import org.apache.lucene.index.CorruptIndexException;
-import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields;
@@ -317,8 +317,8 @@ public final class MemoryPostingsFormat
FixedBitSet docsSeen = new FixedBitSet(state.segmentInfo.getDocCount());
long sumTotalTermFreq = 0;
long sumDocFreq = 0;
- DocsEnum docsEnum = null;
- DocsAndPositionsEnum posEnum = null;
+ PostingsEnum postingsEnum = null;
+ PostingsEnum posEnum = null;
int enumFlags;
IndexOptions indexOptions = fieldInfo.getIndexOptions();
@@ -330,18 +330,19 @@ public final class MemoryPostingsFormat
if (writeFreqs == false) {
enumFlags = 0;
} else if (writePositions == false) {
- enumFlags = DocsEnum.FLAG_FREQS;
+ enumFlags = PostingsEnum.FLAG_FREQS;
} else if (writeOffsets == false) {
if (writePayloads) {
- enumFlags = DocsAndPositionsEnum.FLAG_PAYLOADS;
- } else {
- enumFlags = 0;
+ enumFlags = PostingsEnum.FLAG_PAYLOADS;
+ }
+ else {
+ enumFlags = PostingsEnum.FLAG_POSITIONS;
}
} else {
if (writePayloads) {
- enumFlags = DocsAndPositionsEnum.FLAG_PAYLOADS | DocsAndPositionsEnum.FLAG_OFFSETS;
+ enumFlags = PostingsEnum.FLAG_PAYLOADS | PostingsEnum.FLAG_OFFSETS;
} else {
- enumFlags = DocsAndPositionsEnum.FLAG_OFFSETS;
+ enumFlags = PostingsEnum.FLAG_OFFSETS;
}
}
@@ -353,18 +354,18 @@ public final class MemoryPostingsFormat
termsWriter.postingsWriter.reset();
if (writePositions) {
- posEnum = termsEnum.docsAndPositions(null, posEnum, enumFlags);
- docsEnum = posEnum;
+ posEnum = termsEnum.postings(null, posEnum, enumFlags);
+ postingsEnum = posEnum;
} else {
- docsEnum = termsEnum.docs(null, docsEnum, enumFlags);
+ postingsEnum = termsEnum.postings(null, postingsEnum, enumFlags);
posEnum = null;
}
int docFreq = 0;
long totalTermFreq = 0;
while (true) {
- int docID = docsEnum.nextDoc();
- if (docID == DocsEnum.NO_MORE_DOCS) {
+ int docID = postingsEnum.nextDoc();
+ if (docID == PostingsEnum.NO_MORE_DOCS) {
break;
}
docsSeen.set(docID);
@@ -372,7 +373,7 @@ public final class MemoryPostingsFormat
int freq;
if (writeFreqs) {
- freq = docsEnum.freq();
+ freq = postingsEnum.freq();
totalTermFreq += freq;
} else {
freq = -1;
@@ -545,14 +546,14 @@ public final class MemoryPostingsFormat
public int freq() {
return freq;
}
-
+
@Override
public long cost() {
return numDocs;
}
}
- private final static class FSTDocsAndPositionsEnum extends DocsAndPositionsEnum {
+ private final static class FSTPostingsEnum extends PostingsEnum {
private final boolean storePayloads;
private byte[] buffer = new byte[16];
private final ByteArrayDataInput in = new ByteArrayDataInput(buffer);
@@ -572,7 +573,7 @@ public final class MemoryPostingsFormat
private int pos;
private final BytesRef payload = new BytesRef();
- public FSTDocsAndPositionsEnum(boolean storePayloads, boolean storeOffsets) {
+ public FSTPostingsEnum(boolean storePayloads, boolean storeOffsets) {
this.storePayloads = storePayloads;
this.storeOffsets = storeOffsets;
}
@@ -581,7 +582,7 @@ public final class MemoryPostingsFormat
return storePayloads == this.storePayloads && storeOffsets == this.storeOffsets;
}
- public FSTDocsAndPositionsEnum reset(BytesRef bufferIn, Bits liveDocs, int numDocs) {
+ public FSTPostingsEnum reset(BytesRef bufferIn, Bits liveDocs, int numDocs) {
assert numDocs > 0;
// System.out.println("D&P reset bytes this=" + this);
@@ -807,7 +808,27 @@ public final class MemoryPostingsFormat
}
@Override
- public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) {
+ public PostingsEnum postings(Bits liveDocs, PostingsEnum reuse, int flags) {
+
+ if ((flags & PostingsEnum.FLAG_POSITIONS) >= PostingsEnum.FLAG_POSITIONS) {
+ if (field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
+ return null;
+ }
+ boolean hasOffsets = field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+ decodeMetaData();
+ FSTPostingsEnum docsAndPositionsEnum;
+ if (reuse == null || !(reuse instanceof FSTPostingsEnum)) {
+ docsAndPositionsEnum = new FSTPostingsEnum(field.hasPayloads(), hasOffsets);
+ } else {
+ docsAndPositionsEnum = (FSTPostingsEnum) reuse;
+ if (!docsAndPositionsEnum.canReuse(field.hasPayloads(), hasOffsets)) {
+ docsAndPositionsEnum = new FSTPostingsEnum(field.hasPayloads(), hasOffsets);
+ }
+ }
+ //System.out.println("D&P reset this=" + this);
+ return docsAndPositionsEnum.reset(postingsSpare, liveDocs, docFreq);
+ }
+
decodeMetaData();
FSTDocsEnum docsEnum;
@@ -823,27 +844,6 @@ public final class MemoryPostingsFormat
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) {
-
- boolean hasOffsets = field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
- if (field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
- return null;
- }
- decodeMetaData();
- FSTDocsAndPositionsEnum docsAndPositionsEnum;
- if (reuse == null || !(reuse instanceof FSTDocsAndPositionsEnum)) {
- docsAndPositionsEnum = new FSTDocsAndPositionsEnum(field.hasPayloads(), hasOffsets);
- } else {
- docsAndPositionsEnum = (FSTDocsAndPositionsEnum) reuse;
- if (!docsAndPositionsEnum.canReuse(field.hasPayloads(), hasOffsets)) {
- docsAndPositionsEnum = new FSTDocsAndPositionsEnum(field.hasPayloads(), hasOffsets);
- }
- }
- //System.out.println("D&P reset this=" + this);
- return docsAndPositionsEnum.reset(postingsSpare, liveDocs, docFreq);
- }
-
- @Override
public BytesRef term() {
return current.input;
}
Modified: lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java?rev=1657890&r1=1657889&r2=1657890&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java (original)
+++ lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java Fri Feb 6 16:39:45 2015
@@ -17,16 +17,6 @@ package org.apache.lucene.codecs.simplet
* limitations under the License.
*/
-import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.DOC;
-import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.END;
-import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.END_OFFSET;
-import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.FIELD;
-import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.FREQ;
-import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.PAYLOAD;
-import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.POS;
-import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.START_OFFSET;
-import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.TERM;
-
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Collection;
@@ -37,11 +27,11 @@ import java.util.Map;
import java.util.TreeMap;
import org.apache.lucene.codecs.FieldsProducer;
-import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
@@ -68,11 +58,21 @@ import org.apache.lucene.util.fst.PairOu
import org.apache.lucene.util.fst.PositiveIntOutputs;
import org.apache.lucene.util.fst.Util;
+import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.DOC;
+import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.END;
+import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.END_OFFSET;
+import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.FIELD;
+import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.FREQ;
+import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.PAYLOAD;
+import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.POS;
+import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.START_OFFSET;
+import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.TERM;
+
class SimpleTextFieldsReader extends FieldsProducer {
private static final long BASE_RAM_BYTES_USED =
- RamUsageEstimator.shallowSizeOfInstance(SimpleTextFieldsReader.class)
- + RamUsageEstimator.shallowSizeOfInstance(TreeMap.class);
+ RamUsageEstimator.shallowSizeOfInstance(SimpleTextFieldsReader.class)
+ + RamUsageEstimator.shallowSizeOfInstance(TreeMap.class);
private final TreeMap<String,Long> fields;
private final IndexInput in;
@@ -93,12 +93,12 @@ class SimpleTextFieldsReader extends Fie
}
}
}
-
+
private TreeMap<String,Long> readFields(IndexInput in) throws IOException {
ChecksumIndexInput input = new BufferedChecksumIndexInput(in);
BytesRefBuilder scratch = new BytesRefBuilder();
TreeMap<String,Long> fields = new TreeMap<>();
-
+
while (true) {
SimpleTextUtil.readLine(input, scratch);
if (scratch.get().equals(END)) {
@@ -206,9 +206,26 @@ class SimpleTextFieldsReader extends Fie
public long totalTermFreq() {
return indexOptions == IndexOptions.DOCS ? -1 : totalTermFreq;
}
-
+
@Override
- public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
+ public PostingsEnum postings(Bits liveDocs, PostingsEnum reuse, int flags) throws IOException {
+
+ if (PostingsEnum.requiresPositions(flags)) {
+ if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
+ // Positions were not indexed
+ return null;
+ }
+
+ SimpleTextPostingsEnum docsAndPositionsEnum;
+ if (reuse != null && reuse instanceof SimpleTextPostingsEnum && ((SimpleTextPostingsEnum) reuse).canReuse(SimpleTextFieldsReader.this.in)) {
+ docsAndPositionsEnum = (SimpleTextPostingsEnum) reuse;
+ } else {
+ docsAndPositionsEnum = new SimpleTextPostingsEnum();
+ }
+ return docsAndPositionsEnum.reset(docsStart, liveDocs, indexOptions, docFreq);
+
+ }
+
SimpleTextDocsEnum docsEnum;
if (reuse != null && reuse instanceof SimpleTextDocsEnum && ((SimpleTextDocsEnum) reuse).canReuse(SimpleTextFieldsReader.this.in)) {
docsEnum = (SimpleTextDocsEnum) reuse;
@@ -218,22 +235,6 @@ class SimpleTextFieldsReader extends Fie
return docsEnum.reset(docsStart, liveDocs, indexOptions == IndexOptions.DOCS, docFreq);
}
- @Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
-
- if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
- // Positions were not indexed
- return null;
- }
-
- SimpleTextDocsAndPositionsEnum docsAndPositionsEnum;
- if (reuse != null && reuse instanceof SimpleTextDocsAndPositionsEnum && ((SimpleTextDocsAndPositionsEnum) reuse).canReuse(SimpleTextFieldsReader.this.in)) {
- docsAndPositionsEnum = (SimpleTextDocsAndPositionsEnum) reuse;
- } else {
- docsAndPositionsEnum = new SimpleTextDocsAndPositionsEnum();
- }
- return docsAndPositionsEnum.reset(docsStart, liveDocs, indexOptions, docFreq);
- }
}
private class SimpleTextDocsEnum extends DocsEnum {
@@ -246,7 +247,7 @@ class SimpleTextFieldsReader extends Fie
private final BytesRefBuilder scratch = new BytesRefBuilder();
private final CharsRefBuilder scratchUTF16 = new CharsRefBuilder();
private int cost;
-
+
public SimpleTextDocsEnum() {
this.inStart = SimpleTextFieldsReader.this.in;
this.in = this.inStart.clone();
@@ -277,6 +278,12 @@ class SimpleTextFieldsReader extends Fie
}
@Override
+ public int nextPosition() throws IOException {
+ assert false : "Shouldn't be calling nextPosition() if you haven't asked for positions";
+ return -1;
+ }
+
+ @Override
public int nextDoc() throws IOException {
if (docID == NO_MORE_DOCS) {
return docID;
@@ -328,14 +335,14 @@ class SimpleTextFieldsReader extends Fie
// Naive -- better to index skip data
return slowAdvance(target);
}
-
+
@Override
public long cost() {
return cost;
}
}
- private class SimpleTextDocsAndPositionsEnum extends DocsAndPositionsEnum {
+ private class SimpleTextPostingsEnum extends PostingsEnum {
private final IndexInput inStart;
private final IndexInput in;
private int docID = -1;
@@ -345,6 +352,7 @@ class SimpleTextFieldsReader extends Fie
private final BytesRefBuilder scratch2 = new BytesRefBuilder();
private final CharsRefBuilder scratchUTF16 = new CharsRefBuilder();
private final CharsRefBuilder scratchUTF16_2 = new CharsRefBuilder();
+ private int pos;
private BytesRef payload;
private long nextDocStart;
private boolean readOffsets;
@@ -353,7 +361,7 @@ class SimpleTextFieldsReader extends Fie
private int endOffset;
private int cost;
- public SimpleTextDocsAndPositionsEnum() {
+ public SimpleTextPostingsEnum() {
this.inStart = SimpleTextFieldsReader.this.in;
this.in = inStart.clone();
}
@@ -362,7 +370,7 @@ class SimpleTextFieldsReader extends Fie
return in == inStart;
}
- public SimpleTextDocsAndPositionsEnum reset(long fp, Bits liveDocs, IndexOptions indexOptions, int docFreq) {
+ public SimpleTextPostingsEnum reset(long fp, Bits liveDocs, IndexOptions indexOptions, int docFreq) {
this.liveDocs = liveDocs;
nextDocStart = fp;
docID = -1;
@@ -437,7 +445,6 @@ class SimpleTextFieldsReader extends Fie
@Override
public int nextPosition() throws IOException {
- final int pos;
if (readPositions) {
SimpleTextUtil.readLine(in, scratch);
assert StringHelper.startsWith(scratch.get(), POS): "got line=" + scratch.get().utf8ToString();
@@ -488,7 +495,7 @@ class SimpleTextFieldsReader extends Fie
public BytesRef getPayload() {
return payload;
}
-
+
@Override
public long cost() {
return cost;
@@ -506,9 +513,9 @@ class SimpleTextFieldsReader extends Fie
}
private static final long TERMS_BASE_RAM_BYTES_USED =
- RamUsageEstimator.shallowSizeOfInstance(SimpleTextTerms.class)
- + RamUsageEstimator.shallowSizeOfInstance(BytesRef.class)
- + RamUsageEstimator.shallowSizeOfInstance(CharsRef.class);
+ RamUsageEstimator.shallowSizeOfInstance(SimpleTextTerms.class)
+ + RamUsageEstimator.shallowSizeOfInstance(BytesRef.class)
+ + RamUsageEstimator.shallowSizeOfInstance(CharsRef.class);
private class SimpleTextTerms extends Terms implements Accountable {
private final long termsStart;
private final FieldInfo fieldInfo;
@@ -533,7 +540,7 @@ class SimpleTextFieldsReader extends Fie
final Builder<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> b;
final PairOutputs<Long,Long> outputsInner = new PairOutputs<>(posIntOutputs, posIntOutputs);
final PairOutputs<Long,PairOutputs.Pair<Long,Long>> outputs = new PairOutputs<>(posIntOutputs,
- outputsInner);
+ outputsInner);
b = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
IndexInput in = SimpleTextFieldsReader.this.in.clone();
in.seek(termsStart);
@@ -548,8 +555,8 @@ class SimpleTextFieldsReader extends Fie
if (scratch.get().equals(END) || StringHelper.startsWith(scratch.get(), FIELD)) {
if (lastDocsStart != -1) {
b.add(Util.toIntsRef(lastTerm.get(), scratchIntsRef),
- outputs.newPair(lastDocsStart,
- outputsInner.newPair((long) docFreq, totalTermFreq)));
+ outputs.newPair(lastDocsStart,
+ outputsInner.newPair((long) docFreq, totalTermFreq)));
sumTotalTermFreq += totalTermFreq;
}
break;
@@ -565,7 +572,7 @@ class SimpleTextFieldsReader extends Fie
} else if (StringHelper.startsWith(scratch.get(), TERM)) {
if (lastDocsStart != -1) {
b.add(Util.toIntsRef(lastTerm.get(), scratchIntsRef), outputs.newPair(lastDocsStart,
- outputsInner.newPair((long) docFreq, totalTermFreq)));
+ outputsInner.newPair((long) docFreq, totalTermFreq)));
}
lastDocsStart = in.getFilePointer();
final int len = scratch.length() - TERM.length;
@@ -652,7 +659,7 @@ class SimpleTextFieldsReader extends Fie
public boolean hasPositions() {
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
}
-
+
@Override
public boolean hasPayloads() {
return fieldInfo.hasPayloads();
Modified: lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java?rev=1657890&r1=1657889&r2=1657890&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java (original)
+++ lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java Fri Feb 6 16:39:45 2015
@@ -20,8 +20,7 @@ package org.apache.lucene.codecs.simplet
import java.io.IOException;
import org.apache.lucene.codecs.FieldsConsumer;
-import org.apache.lucene.index.DocsAndPositionsEnum;
-import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields;
@@ -33,7 +32,7 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
class SimpleTextFieldsWriter extends FieldsConsumer {
-
+
private IndexOutput out;
private final BytesRefBuilder scratch = new BytesRefBuilder();
private final SegmentWriteState writeState;
@@ -79,22 +78,21 @@ class SimpleTextFieldsWriter extends Fie
int flags = 0;
if (hasPositions) {
-
+ flags = PostingsEnum.FLAG_POSITIONS;
if (hasPayloads) {
- flags = flags | DocsAndPositionsEnum.FLAG_PAYLOADS;
+ flags = flags | PostingsEnum.FLAG_PAYLOADS;
}
if (hasOffsets) {
- flags = flags | DocsAndPositionsEnum.FLAG_OFFSETS;
+ flags = flags | PostingsEnum.FLAG_OFFSETS;
}
} else {
if (hasFreqs) {
- flags = flags | DocsEnum.FLAG_FREQS;
+ flags = flags | PostingsEnum.FLAG_FREQS;
}
}
TermsEnum termsEnum = terms.iterator(null);
- DocsAndPositionsEnum posEnum = null;
- DocsEnum docsEnum = null;
+ PostingsEnum postingsEnum = null;
// for each term in field
while(true) {
@@ -103,20 +101,16 @@ class SimpleTextFieldsWriter extends Fie
break;
}
- if (hasPositions) {
- posEnum = termsEnum.docsAndPositions(null, posEnum, flags);
- docsEnum = posEnum;
- } else {
- docsEnum = termsEnum.docs(null, docsEnum, flags);
- }
- assert docsEnum != null: "termsEnum=" + termsEnum + " hasPos=" + hasPositions + " flags=" + flags;
+ postingsEnum = termsEnum.postings(null, postingsEnum, flags);
+
+ assert postingsEnum != null: "termsEnum=" + termsEnum + " hasPos=" + hasPositions + " flags=" + flags;
boolean wroteTerm = false;
// for each doc in field+term
while(true) {
- int doc = docsEnum.nextDoc();
- if (doc == DocsEnum.NO_MORE_DOCS) {
+ int doc = postingsEnum.nextDoc();
+ if (doc == PostingsEnum.NO_MORE_DOCS) {
break;
}
@@ -143,7 +137,7 @@ class SimpleTextFieldsWriter extends Fie
write(Integer.toString(doc));
newline();
if (hasFreqs) {
- int freq = docsEnum.freq();
+ int freq = postingsEnum.freq();
write(FREQ);
write(Integer.toString(freq));
newline();
@@ -154,15 +148,15 @@ class SimpleTextFieldsWriter extends Fie
// for each pos in field+term+doc
for(int i=0;i<freq;i++) {
- int position = posEnum.nextPosition();
+ int position = postingsEnum.nextPosition();
write(POS);
write(Integer.toString(position));
newline();
if (hasOffsets) {
- int startOffset = posEnum.startOffset();
- int endOffset = posEnum.endOffset();
+ int startOffset = postingsEnum.startOffset();
+ int endOffset = postingsEnum.endOffset();
assert endOffset >= startOffset;
assert startOffset >= lastStartOffset: "startOffset=" + startOffset + " lastStartOffset=" + lastStartOffset;
lastStartOffset = startOffset;
@@ -174,7 +168,7 @@ class SimpleTextFieldsWriter extends Fie
newline();
}
- BytesRef payload = posEnum.getPayload();
+ BytesRef payload = postingsEnum.getPayload();
if (payload != null && payload.length > 0) {
assert payload.length != 0;
Modified: lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java?rev=1657890&r1=1657889&r2=1657890&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java (original)
+++ lucene/dev/branches/branch_5x/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java Fri Feb 6 16:39:45 2015
@@ -26,8 +26,8 @@ import java.util.SortedMap;
import java.util.TreeMap;
import org.apache.lucene.codecs.TermVectorsReader;
-import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
@@ -61,15 +61,15 @@ import static org.apache.lucene.codecs.s
public class SimpleTextTermVectorsReader extends TermVectorsReader {
private static final long BASE_RAM_BYTES_USED =
- RamUsageEstimator.shallowSizeOfInstance(SimpleTextTermVectorsReader.class)
- + RamUsageEstimator.shallowSizeOfInstance(BytesRef.class)
- + RamUsageEstimator.shallowSizeOfInstance(CharsRef.class);
+ RamUsageEstimator.shallowSizeOfInstance(SimpleTextTermVectorsReader.class)
+ + RamUsageEstimator.shallowSizeOfInstance(BytesRef.class)
+ + RamUsageEstimator.shallowSizeOfInstance(CharsRef.class);
private long offsets[]; /* docid -> offset in .vec file */
private IndexInput in;
private BytesRefBuilder scratch = new BytesRefBuilder();
private CharsRefBuilder scratchUTF16 = new CharsRefBuilder();
-
+
public SimpleTextTermVectorsReader(Directory directory, SegmentInfo si, IOContext context) throws IOException {
boolean success = false;
try {
@@ -84,15 +84,15 @@ public class SimpleTextTermVectorsReader
}
readIndex(si.getDocCount());
}
-
+
// used by clone
SimpleTextTermVectorsReader(long offsets[], IndexInput in) {
this.offsets = offsets;
this.in = in;
}
-
- // we don't actually write a .tvx-like index, instead we read the
- // vectors file in entirety up-front and save the offsets
+
+ // we don't actually write a .tvx-like index, instead we read the
+ // vectors file in entirety up-front and save the offsets
// so we can seek to the data later.
private void readIndex(int maxDoc) throws IOException {
ChecksumIndexInput input = new BufferedChecksumIndexInput(in);
@@ -108,7 +108,7 @@ public class SimpleTextTermVectorsReader
SimpleTextUtil.checkFooter(input);
assert upto == offsets.length;
}
-
+
@Override
public Fields get(int doc) throws IOException {
SortedMap<String,SimpleTVTerms> fields = new TreeMap<>();
@@ -124,30 +124,30 @@ public class SimpleTextTermVectorsReader
assert StringHelper.startsWith(scratch.get(), FIELD);
// skip fieldNumber:
parseIntAt(FIELD.length);
-
+
readLine();
assert StringHelper.startsWith(scratch.get(), FIELDNAME);
String fieldName = readString(FIELDNAME.length, scratch);
-
+
readLine();
assert StringHelper.startsWith(scratch.get(), FIELDPOSITIONS);
boolean positions = Boolean.parseBoolean(readString(FIELDPOSITIONS.length, scratch));
-
+
readLine();
assert StringHelper.startsWith(scratch.get(), FIELDOFFSETS);
boolean offsets = Boolean.parseBoolean(readString(FIELDOFFSETS.length, scratch));
-
+
readLine();
assert StringHelper.startsWith(scratch.get(), FIELDPAYLOADS);
boolean payloads = Boolean.parseBoolean(readString(FIELDPAYLOADS.length, scratch));
-
+
readLine();
assert StringHelper.startsWith(scratch.get(), FIELDTERMCOUNT);
int termCount = parseIntAt(FIELDTERMCOUNT.length);
-
+
SimpleTVTerms terms = new SimpleTVTerms(offsets, positions, payloads);
fields.put(fieldName, terms);
-
+
BytesRefBuilder term = new BytesRefBuilder();
for (int j = 0; j < termCount; j++) {
readLine();
@@ -156,14 +156,14 @@ public class SimpleTextTermVectorsReader
term.grow(termLength);
term.setLength(termLength);
System.arraycopy(scratch.bytes(), TERMTEXT.length, term.bytes(), 0, termLength);
-
+
SimpleTVPostings postings = new SimpleTVPostings();
terms.terms.put(term.toBytesRef(), postings);
-
+
readLine();
assert StringHelper.startsWith(scratch.get(), TERMFREQ);
postings.freq = parseIntAt(TERMFREQ.length);
-
+
if (positions || offsets) {
if (positions) {
postings.positions = new int[postings.freq];
@@ -171,12 +171,12 @@ public class SimpleTextTermVectorsReader
postings.payloads = new BytesRef[postings.freq];
}
}
-
+
if (offsets) {
postings.startOffsets = new int[postings.freq];
postings.endOffsets = new int[postings.freq];
}
-
+
for (int k = 0; k < postings.freq; k++) {
if (positions) {
readLine();
@@ -194,12 +194,12 @@ public class SimpleTextTermVectorsReader
}
}
}
-
+
if (offsets) {
readLine();
assert StringHelper.startsWith(scratch.get(), STARTOFFSET);
postings.startOffsets[k] = parseIntAt(STARTOFFSET.length);
-
+
readLine();
assert StringHelper.startsWith(scratch.get(), ENDOFFSET);
postings.endOffsets[k] = parseIntAt(ENDOFFSET.length);
@@ -218,11 +218,11 @@ public class SimpleTextTermVectorsReader
}
return new SimpleTextTermVectorsReader(offsets, in.clone());
}
-
+
@Override
public void close() throws IOException {
try {
- IOUtils.close(in);
+ IOUtils.close(in);
} finally {
in = null;
offsets = null;
@@ -232,20 +232,20 @@ public class SimpleTextTermVectorsReader
private void readLine() throws IOException {
SimpleTextUtil.readLine(in, scratch);
}
-
+
private int parseIntAt(int offset) {
scratchUTF16.copyUTF8Bytes(scratch.bytes(), offset, scratch.length()-offset);
return ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length());
}
-
+
private String readString(int offset, BytesRefBuilder scratch) {
scratchUTF16.copyUTF8Bytes(scratch.bytes(), offset, scratch.length()-offset);
return scratchUTF16.toString();
}
-
+
private class SimpleTVFields extends Fields {
private final SortedMap<String,SimpleTVTerms> fields;
-
+
SimpleTVFields(SortedMap<String,SimpleTVTerms> fields) {
this.fields = fields;
}
@@ -265,20 +265,20 @@ public class SimpleTextTermVectorsReader
return fields.size();
}
}
-
+
private static class SimpleTVTerms extends Terms {
final SortedMap<BytesRef,SimpleTVPostings> terms;
final boolean hasOffsets;
final boolean hasPositions;
final boolean hasPayloads;
-
+
SimpleTVTerms(boolean hasOffsets, boolean hasPositions, boolean hasPayloads) {
this.hasOffsets = hasOffsets;
this.hasPositions = hasPositions;
this.hasPayloads = hasPayloads;
terms = new TreeMap<>();
}
-
+
@Override
public TermsEnum iterator(TermsEnum reuse) throws IOException {
// TODO: reuse
@@ -319,13 +319,13 @@ public class SimpleTextTermVectorsReader
public boolean hasPositions() {
return hasPositions;
}
-
+
@Override
public boolean hasPayloads() {
return hasPayloads;
}
}
-
+
private static class SimpleTVPostings {
private int freq;
private int positions[];
@@ -333,17 +333,17 @@ public class SimpleTextTermVectorsReader
private int endOffsets[];
private BytesRef payloads[];
}
-
+
private static class SimpleTVTermsEnum extends TermsEnum {
SortedMap<BytesRef,SimpleTVPostings> terms;
Iterator<Map.Entry<BytesRef,SimpleTextTermVectorsReader.SimpleTVPostings>> iterator;
Map.Entry<BytesRef,SimpleTextTermVectorsReader.SimpleTVPostings> current;
-
+
SimpleTVTermsEnum(SortedMap<BytesRef,SimpleTVPostings> terms) {
this.terms = terms;
this.iterator = terms.entrySet().iterator();
}
-
+
@Override
public SeekStatus seekCeil(BytesRef text) throws IOException {
iterator = terms.tailMap(text).entrySet().iterator();
@@ -390,26 +390,27 @@ public class SimpleTextTermVectorsReader
}
@Override
- public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
- // TODO: reuse
- SimpleTVDocsEnum e = new SimpleTVDocsEnum();
- e.reset(liveDocs, (flags & DocsEnum.FLAG_FREQS) == 0 ? 1 : current.getValue().freq);
- return e;
- }
+ public PostingsEnum postings(Bits liveDocs, PostingsEnum reuse, int flags) throws IOException {
- @Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
- SimpleTVPostings postings = current.getValue();
- if (postings.positions == null && postings.startOffsets == null) {
- return null;
+ if (PostingsEnum.requiresPositions(flags)) {
+ SimpleTVPostings postings = current.getValue();
+ if (postings.positions == null && postings.startOffsets == null) {
+ return null;
+ }
+ // TODO: reuse
+ SimpleTVPostingsEnum e = new SimpleTVPostingsEnum();
+ e.reset(liveDocs, postings.positions, postings.startOffsets, postings.endOffsets, postings.payloads);
+ return e;
}
+
// TODO: reuse
- SimpleTVDocsAndPositionsEnum e = new SimpleTVDocsAndPositionsEnum();
- e.reset(liveDocs, postings.positions, postings.startOffsets, postings.endOffsets, postings.payloads);
+ SimpleTVDocsEnum e = new SimpleTVDocsEnum();
+ e.reset(liveDocs, (flags & PostingsEnum.FLAG_FREQS) == 0 ? 1 : current.getValue().freq);
return e;
}
+
}
-
+
// note: these two enum classes are exactly like the Default impl...
private static class SimpleTVDocsEnum extends DocsEnum {
private boolean didNext;
@@ -424,6 +425,12 @@ public class SimpleTextTermVectorsReader
}
@Override
+ public int nextPosition() throws IOException {
+ assert false;
+ return -1;
+ }
+
+ @Override
public int docID() {
return doc;
}
@@ -449,14 +456,14 @@ public class SimpleTextTermVectorsReader
this.doc = -1;
didNext = false;
}
-
+
@Override
public long cost() {
return 1;
}
}
-
- private static class SimpleTVDocsAndPositionsEnum extends DocsAndPositionsEnum {
+
+ private static class SimpleTVPostingsEnum extends PostingsEnum {
private boolean didNext;
private int doc = -1;
private int nextPos;
@@ -514,11 +521,11 @@ public class SimpleTextTermVectorsReader
@Override
public int nextPosition() {
- assert (positions != null && nextPos < positions.length) ||
- startOffsets != null && nextPos < startOffsets.length;
if (positions != null) {
+ assert nextPos < positions.length;
return positions[nextPos++];
} else {
+ assert nextPos < startOffsets.length;
nextPos++;
return -1;
}
@@ -541,7 +548,7 @@ public class SimpleTextTermVectorsReader
return endOffsets[nextPos-1];
}
}
-
+
@Override
public long cost() {
return 1;
Modified: lucene/dev/branches/branch_5x/lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestDirectPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestDirectPostingsFormat.java?rev=1657890&r1=1657889&r2=1657890&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestDirectPostingsFormat.java (original)
+++ lucene/dev/branches/branch_5x/lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestDirectPostingsFormat.java Fri Feb 6 16:39:45 2015
@@ -29,6 +29,11 @@ public class TestDirectPostingsFormat ex
private final Codec codec = TestUtil.alwaysPostingsFormat(new DirectPostingsFormat());
@Override
+ protected boolean isPostingsEnumReuseImplemented() {
+ return false;
+ }
+
+ @Override
protected Codec getCodec() {
return codec;
}
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/analysis/Token.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/analysis/Token.java?rev=1657890&r1=1657889&r2=1657890&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/analysis/Token.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/analysis/Token.java Fri Feb 6 16:39:45 2015
@@ -20,7 +20,7 @@ package org.apache.lucene.analysis;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
-import org.apache.lucene.index.DocsAndPositionsEnum; // for javadoc
+import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeImpl;
@@ -43,7 +43,7 @@ import org.apache.lucene.util.BytesRef;
with type "eos". The default token type is "word".
<p>
A Token can optionally have metadata (a.k.a. payload) in the form of a variable
- length byte array. Use {@link DocsAndPositionsEnum#getPayload()} to retrieve the
+ length byte array. Use {@link org.apache.lucene.index.PostingsEnum#getPayload()} to retrieve the
payloads from the index.
<br><br>
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttribute.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttribute.java?rev=1657890&r1=1657889&r2=1657890&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttribute.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttribute.java Fri Feb 6 16:39:45 2015
@@ -17,7 +17,6 @@ package org.apache.lucene.analysis.token
* limitations under the License.
*/
-import org.apache.lucene.index.DocsAndPositionsEnum; // javadocs
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.BytesRef;
@@ -33,7 +32,7 @@ import org.apache.lucene.util.BytesRef;
* best to use the minimum number of bytes necessary. Some codec implementations
* may optimize payload storage when all payloads have the same length.
*
- * @see DocsAndPositionsEnum
+ * @see org.apache.lucene.index.PostingsEnum
*/
public interface PayloadAttribute extends Attribute {
/**
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttribute.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttribute.java?rev=1657890&r1=1657889&r2=1657890&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttribute.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttribute.java Fri Feb 6 16:39:45 2015
@@ -43,7 +43,7 @@ import org.apache.lucene.util.Attribute;
*
* </ul>
*
- * @see org.apache.lucene.index.DocsAndPositionsEnum
+ * @see org.apache.lucene.index.PostingsEnum
*/
public interface PositionIncrementAttribute extends Attribute {
/** Set the position increment. The default value is one.
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java?rev=1657890&r1=1657889&r2=1657890&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java Fri Feb 6 16:39:45 2015
@@ -16,13 +16,12 @@ package org.apache.lucene.codecs;
* limitations under the License.
*/
-import org.apache.lucene.index.DocsEnum; // javadocs
import org.apache.lucene.index.OrdTermState;
import org.apache.lucene.index.TermState;
/**
* Holds all state required for {@link PostingsReaderBase}
- * to produce a {@link DocsEnum} without re-seeking the
+ * to produce a {@link org.apache.lucene.index.PostingsEnum} without re-seeking the
* terms dict.
*/
public class BlockTermState extends OrdTermState {
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java?rev=1657890&r1=1657889&r2=1657890&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java Fri Feb 6 16:39:45 2015
@@ -20,8 +20,7 @@ package org.apache.lucene.codecs;
import java.io.Closeable;
import java.io.IOException;
-import org.apache.lucene.index.DocsAndPositionsEnum;
-import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.store.DataInput;
@@ -31,8 +30,8 @@ import org.apache.lucene.util.Bits;
/** The core terms dictionaries (BlockTermsReader,
* BlockTreeTermsReader) interact with a single instance
- * of this class to manage creation of {@link DocsEnum} and
- * {@link DocsAndPositionsEnum} instances. It provides an
+ * of this class to manage creation of {@link org.apache.lucene.index.PostingsEnum} and
+ * {@link org.apache.lucene.index.PostingsEnum} instances. It provides an
* IndexInput (termsIn) where this class may read any
* previously stored data that it had written in its
* corresponding {@link PostingsWriterBase} at indexing
@@ -66,12 +65,7 @@ public abstract class PostingsReaderBase
/** Must fully consume state, since after this call that
* TermState may be reused. */
- public abstract DocsEnum docs(FieldInfo fieldInfo, BlockTermState state, Bits skipDocs, DocsEnum reuse, int flags) throws IOException;
-
- /** Must fully consume state, since after this call that
- * TermState may be reused. */
- public abstract DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState state, Bits skipDocs, DocsAndPositionsEnum reuse,
- int flags) throws IOException;
+ public abstract PostingsEnum postings(FieldInfo fieldInfo, BlockTermState state, Bits skipDocs, PostingsEnum reuse, int flags) throws IOException;
/**
* Checks consistency of this reader.
@@ -81,7 +75,7 @@ public abstract class PostingsReaderBase
* @lucene.internal
*/
public abstract void checkIntegrity() throws IOException;
-
+
@Override
public abstract void close() throws IOException;
}
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/PostingsWriterBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/PostingsWriterBase.java?rev=1657890&r1=1657889&r2=1657890&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/PostingsWriterBase.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/PostingsWriterBase.java Fri Feb 6 16:39:45 2015
@@ -17,12 +17,7 @@ package org.apache.lucene.codecs;
* limitations under the License.
*/
-import java.io.Closeable;
-import java.io.IOException;
-
import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
-import org.apache.lucene.index.DocsAndPositionsEnum; // javadocs
-import org.apache.lucene.index.DocsEnum; // javadocs
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.TermsEnum;
@@ -31,6 +26,9 @@ import org.apache.lucene.store.IndexOutp
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
+import java.io.Closeable;
+import java.io.IOException;
+
/**
* Class that plugs into term dictionaries, such as {@link
* BlockTreeTermsWriter}, and handles writing postings.
@@ -54,8 +52,8 @@ public abstract class PostingsWriterBase
public abstract void init(IndexOutput termsOut, SegmentWriteState state) throws IOException;
/** Write all postings for one term; use the provided
- * {@link TermsEnum} to pull a {@link DocsEnum} or {@link
- * DocsAndPositionsEnum}. This method should not
+ * {@link TermsEnum} to pull a {@link org.apache.lucene.index.PostingsEnum}.
+ * This method should not
* re-position the {@code TermsEnum}! It is already
* positioned on the term that should be written. This
* method must set the bit in the provided {@link