You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/08/10 16:16:21 UTC
svn commit: r1371709 - in /lucene/dev/trunk/lucene: ./
core/src/java/org/apache/lucene/codecs/
core/src/java/org/apache/lucene/codecs/bloom/
core/src/java/org/apache/lucene/codecs/lucene40/
core/src/java/org/apache/lucene/codecs/memory/ core/src/java/o...
Author: rmuir
Date: Fri Aug 10 14:16:20 2012
New Revision: 1371709
URL: http://svn.apache.org/viewvc?rev=1371709&view=rev
Log:
LUCENE-4299: add Terms hasPositions/hasOffsets, so you know what features a docs TVs have
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiTerms.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/Terms.java
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java
lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java
lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java
lucene/dev/trunk/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1371709&r1=1371708&r2=1371709&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Fri Aug 10 14:16:20 2012
@@ -8,6 +8,13 @@ http://s.apache.org/luceneversions
======================= Lucene 4.0.0 =======================
+API Changes
+
+* LUCENE-4299: Added Terms.hasPositions() and Terms.hasOffsets().
+ Previously you had no real way to know that a term vector field
+ had positions or offsets, since this can be configured on a
+ per-field-per-document basis. (Robert Muir)
+
Bug Fixes
* LUCENE-4297: BooleanScorer2 would multiply the coord() factor
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java?rev=1371709&r1=1371708&r2=1371709&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java Fri Aug 10 14:16:20 2012
@@ -254,6 +254,16 @@ public class BlockTermsReader extends Fi
}
@Override
+ public boolean hasOffsets() {
+ return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+ }
+
+ @Override
+ public boolean hasPositions() {
+ return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+ }
+
+ @Override
public long size() {
return numTerms;
}
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java?rev=1371709&r1=1371708&r2=1371709&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java Fri Aug 10 14:16:20 2012
@@ -457,6 +457,16 @@ public class BlockTreeTermsReader extend
}
@Override
+ public boolean hasOffsets() {
+ return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+ }
+
+ @Override
+ public boolean hasPositions() {
+ return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+ }
+
+ @Override
public TermsEnum iterator(TermsEnum reuse) throws IOException {
return new SegmentTermsEnum();
}
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java?rev=1371709&r1=1371708&r2=1371709&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java Fri Aug 10 14:16:20 2012
@@ -184,6 +184,9 @@ public abstract class TermVectorsWriter
final FieldsEnum fieldsEnum = vectors.iterator();
String fieldName;
String lastFieldName = null;
+
+ TermsEnum termsEnum = null;
+ DocsAndPositionsEnum docsAndPositionsEnum = null;
while((fieldName = fieldsEnum.next()) != null) {
final FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldName);
@@ -196,39 +199,30 @@ public abstract class TermVectorsWriter
// FieldsEnum shouldn't lie...
continue;
}
+
+ final boolean hasPositions = terms.hasPositions();
+ final boolean hasOffsets = terms.hasOffsets();
+
final int numTerms = (int) terms.size();
if (numTerms == -1) {
throw new IllegalStateException("terms.size() must be implemented (it returned -1)");
}
- final TermsEnum termsEnum = terms.iterator(null);
-
- DocsAndPositionsEnum docsAndPositionsEnum = null;
-
- boolean startedField = false;
-
- // NOTE: this is tricky, because TermVectors allow
- // indexing offsets but NOT positions. So we must
- // lazily init the field by checking whether first
- // position we see is -1 or not.
+
+ startField(fieldInfo, numTerms, hasPositions, hasOffsets);
+ termsEnum = terms.iterator(termsEnum);
int termCount = 0;
while(termsEnum.next() != null) {
termCount++;
final int freq = (int) termsEnum.totalTermFreq();
+
+ startTerm(termsEnum.term(), freq);
- if (startedField) {
- startTerm(termsEnum.term(), freq);
- }
-
- // TODO: we need a "query" API where we can ask (via
- // flex API) what this term was indexed with...
- // Both positions & offsets:
- docsAndPositionsEnum = termsEnum.docsAndPositions(null, null);
- boolean hasOffsets = false;
- boolean hasPositions = false;
-
- if (docsAndPositionsEnum != null) {
+ if (hasPositions || hasOffsets) {
+ docsAndPositionsEnum = termsEnum.docsAndPositions(null, docsAndPositionsEnum);
+ assert docsAndPositionsEnum != null;
+
final int docID = docsAndPositionsEnum.nextDoc();
assert docID != DocIdSetIterator.NO_MORE_DOCS;
assert docsAndPositionsEnum.freq() == freq;
@@ -237,28 +231,10 @@ public abstract class TermVectorsWriter
final int pos = docsAndPositionsEnum.nextPosition();
final int startOffset = docsAndPositionsEnum.startOffset();
final int endOffset = docsAndPositionsEnum.endOffset();
- if (!startedField) {
- assert numTerms > 0;
- hasPositions = pos != -1;
- hasOffsets = startOffset != -1;
- startField(fieldInfo, numTerms, hasPositions, hasOffsets);
- startTerm(termsEnum.term(), freq);
- startedField = true;
- }
- if (hasOffsets) {
- assert startOffset != -1;
- assert endOffset != -1;
- }
+
assert !hasPositions || pos >= 0;
addPosition(pos, startOffset, endOffset);
}
- } else {
- if (!startedField) {
- assert numTerms > 0;
- startField(fieldInfo, numTerms, hasPositions, hasOffsets);
- startTerm(termsEnum.term(), freq);
- startedField = true;
- }
}
}
assert termCount == numTerms;
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java?rev=1371709&r1=1371708&r2=1371709&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java Fri Aug 10 14:16:20 2012
@@ -314,6 +314,16 @@ public class BloomFilteringPostingsForma
public int getDocCount() throws IOException {
return delegateTerms.getDocCount();
}
+
+ @Override
+ public boolean hasOffsets() {
+ return delegateTerms.hasOffsets();
+ }
+
+ @Override
+ public boolean hasPositions() {
+ return delegateTerms.hasPositions();
+ }
}
class BloomFilteredTermsEnum extends TermsEnum {
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java?rev=1371709&r1=1371708&r2=1371709&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java Fri Aug 10 14:16:20 2012
@@ -296,10 +296,16 @@ public class Lucene40TermVectorsReader e
private class TVTerms extends Terms {
private final int numTerms;
private final long tvfFPStart;
+ private final boolean storePositions;
+ private final boolean storeOffsets;
+
public TVTerms(long tvfFP) throws IOException {
tvf.seek(tvfFP);
numTerms = tvf.readVInt();
+ final byte bits = tvf.readByte();
+ storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
+ storeOffsets = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
tvfFPStart = tvf.getFilePointer();
}
@@ -314,7 +320,7 @@ public class Lucene40TermVectorsReader e
} else {
termsEnum = new TVTermsEnum();
}
- termsEnum.reset(numTerms, tvfFPStart);
+ termsEnum.reset(numTerms, tvfFPStart, storePositions, storeOffsets);
return termsEnum;
}
@@ -345,6 +351,16 @@ public class Lucene40TermVectorsReader e
// this...? I guess codec could buffer and re-sort...
return BytesRef.getUTF8SortedAsUnicodeComparator();
}
+
+ @Override
+ public boolean hasOffsets() {
+ return storeOffsets;
+ }
+
+ @Override
+ public boolean hasPositions() {
+ return storePositions;
+ }
}
private class TVTermsEnum extends TermsEnum {
@@ -373,13 +389,12 @@ public class Lucene40TermVectorsReader e
return tvf == origTVF;
}
- public void reset(int numTerms, long tvfFPStart) throws IOException {
+ public void reset(int numTerms, long tvfFPStart, boolean storePositions, boolean storeOffsets) throws IOException {
this.numTerms = numTerms;
+ this.storePositions = storePositions;
+ this.storeOffsets = storeOffsets;
nextTerm = 0;
tvf.seek(tvfFPStart);
- final byte bits = tvf.readByte();
- storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
- storeOffsets = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
tvfFP = 1+tvfFPStart;
positions = null;
startOffsets = null;
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java?rev=1371709&r1=1371708&r2=1371709&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java Fri Aug 10 14:16:20 2012
@@ -635,6 +635,16 @@ public class DirectPostingsFormat extend
return BytesRef.getUTF8SortedAsUnicodeComparator();
}
+ @Override
+ public boolean hasOffsets() {
+ return hasOffsets;
+ }
+
+ @Override
+ public boolean hasPositions() {
+ return hasPos;
+ }
+
private final class DirectTermsEnum extends TermsEnum {
private final BytesRef scratch = new BytesRef();
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java?rev=1371709&r1=1371708&r2=1371709&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java Fri Aug 10 14:16:20 2012
@@ -834,6 +834,16 @@ public class MemoryPostingsFormat extend
public Comparator<BytesRef> getComparator() {
return BytesRef.getUTF8SortedAsUnicodeComparator();
}
+
+ @Override
+ public boolean hasOffsets() {
+ return field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+ }
+
+ @Override
+ public boolean hasPositions() {
+ return field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+ }
}
@Override
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java?rev=1371709&r1=1371708&r2=1371709&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java Fri Aug 10 14:16:20 2012
@@ -609,6 +609,16 @@ class SimpleTextFieldsReader extends Fie
public int getDocCount() throws IOException {
return docCount;
}
+
+ @Override
+ public boolean hasOffsets() {
+ return indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+ }
+
+ @Override
+ public boolean hasPositions() {
+ return indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+ }
}
@Override
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java?rev=1371709&r1=1371708&r2=1371709&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java Fri Aug 10 14:16:20 2012
@@ -130,7 +130,7 @@ public class SimpleTextTermVectorsReader
assert StringHelper.startsWith(scratch, FIELDTERMCOUNT);
int termCount = parseIntAt(FIELDTERMCOUNT.length);
- SimpleTVTerms terms = new SimpleTVTerms();
+ SimpleTVTerms terms = new SimpleTVTerms(offsets, positions);
fields.put(fieldName, terms);
for (int j = 0; j < termCount; j++) {
@@ -257,8 +257,12 @@ public class SimpleTextTermVectorsReader
private static class SimpleTVTerms extends Terms {
final SortedMap<BytesRef,SimpleTVPostings> terms;
+ final boolean hasOffsets;
+ final boolean hasPositions;
- SimpleTVTerms() {
+ SimpleTVTerms(boolean hasOffsets, boolean hasPositions) {
+ this.hasOffsets = hasOffsets;
+ this.hasPositions = hasPositions;
terms = new TreeMap<BytesRef,SimpleTVPostings>();
}
@@ -292,6 +296,16 @@ public class SimpleTextTermVectorsReader
public int getDocCount() throws IOException {
return 1;
}
+
+ @Override
+ public boolean hasOffsets() {
+ return hasOffsets;
+ }
+
+ @Override
+ public boolean hasPositions() {
+ return hasPositions;
+ }
}
private static class SimpleTVPostings {
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java?rev=1371709&r1=1371708&r2=1371709&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java Fri Aug 10 14:16:20 2012
@@ -718,6 +718,11 @@ public class CheckIndex {
continue;
}
+ final boolean hasPositions = terms.hasPositions();
+ final boolean hasOffsets = terms.hasOffsets();
+ // term vectors cannot omit TF
+ final boolean hasFreqs = isVectors || fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
+
final TermsEnum termsEnum = terms.iterator(null);
boolean hasOrd = true;
@@ -777,17 +782,10 @@ public class CheckIndex {
status.termCount++;
final DocsEnum docs2;
- final boolean hasPositions;
- // if we are checking vectors, we have freqs implicitly
- final boolean hasFreqs = isVectors || fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
- // if we are checking vectors, offsets are a free-for-all anyway
- final boolean hasOffsets = isVectors || fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
if (postings != null) {
docs2 = postings;
- hasPositions = true;
} else {
docs2 = docs;
- hasPositions = false;
}
int lastDoc = -1;
@@ -827,10 +825,7 @@ public class CheckIndex {
// NOTE: pos=-1 is allowed because of ancient bug
// (LUCENE-1542) whereby IndexWriter could
// write pos=-1 when first token's posInc is 0
- // (separately: analyzers should not give
- // posInc=0 to first token); also, term
- // vectors are allowed to return pos=-1 if
- // they indexed offset but not positions:
+
if (pos < -1) {
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
}
@@ -1439,19 +1434,18 @@ public class CheckIndex {
}
postingsTermsEnum = postingsTerms.iterator(postingsTermsEnum);
+ final boolean hasProx = terms.hasOffsets() || terms.hasPositions();
BytesRef term = null;
while ((term = termsEnum.next()) != null) {
-
- final boolean hasProx;
- // Try positions:
- postings = termsEnum.docsAndPositions(null, postings);
- if (postings == null) {
- hasProx = false;
- // Try docIDs & freqs:
- docs = termsEnum.docs(null, docs);
+ if (hasProx) {
+ postings = termsEnum.docsAndPositions(null, postings);
+ assert postings != null;
+ docs = null;
} else {
- hasProx = true;
+ docs = termsEnum.docs(null, docs);
+ assert docs != null;
+ postings = null;
}
final DocsEnum docs2;
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java?rev=1371709&r1=1371708&r2=1371709&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java Fri Aug 10 14:16:20 2012
@@ -109,6 +109,16 @@ public class FilterAtomicReader extends
public TermsEnum intersect(CompiledAutomaton automaton, BytesRef bytes) throws java.io.IOException {
return in.intersect(automaton, bytes);
}
+
+ @Override
+ public boolean hasOffsets() {
+ return in.hasOffsets();
+ }
+
+ @Override
+ public boolean hasPositions() {
+ return in.hasPositions();
+ }
}
/** Base class for filtering {@link TermsEnum} implementations. */
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiTerms.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiTerms.java?rev=1371709&r1=1371708&r2=1371709&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiTerms.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiTerms.java Fri Aug 10 14:16:20 2012
@@ -37,12 +37,17 @@ public final class MultiTerms extends Te
private final Terms[] subs;
private final ReaderSlice[] subSlices;
private final Comparator<BytesRef> termComp;
+ private final boolean hasOffsets;
+ private final boolean hasPositions;
public MultiTerms(Terms[] subs, ReaderSlice[] subSlices) throws IOException {
this.subs = subs;
this.subSlices = subSlices;
Comparator<BytesRef> _termComp = null;
+ assert subs.length > 0 : "inefficient: don't use MultiTerms over one sub";
+ boolean _hasOffsets = true;
+ boolean _hasPositions = true;
for(int i=0;i<subs.length;i++) {
if (_termComp == null) {
_termComp = subs[i].getComparator();
@@ -54,9 +59,13 @@ public final class MultiTerms extends Te
throw new IllegalStateException("sub-readers have different BytesRef.Comparators; cannot merge");
}
}
+ _hasOffsets &= subs[i].hasOffsets();
+ _hasPositions &= subs[i].hasPositions();
}
termComp = _termComp;
+ hasOffsets = _hasOffsets;
+ hasPositions = _hasPositions;
}
@Override
@@ -142,5 +151,15 @@ public final class MultiTerms extends Te
public Comparator<BytesRef> getComparator() {
return termComp;
}
+
+ @Override
+ public boolean hasOffsets() {
+ return hasOffsets;
+ }
+
+ @Override
+ public boolean hasPositions() {
+ return hasPositions;
+ }
}
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/Terms.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/Terms.java?rev=1371709&r1=1371708&r2=1371709&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/Terms.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/Terms.java Fri Aug 10 14:16:20 2012
@@ -104,6 +104,12 @@ public abstract class Terms {
* measures, this measure does not take deleted documents
* into account. */
public abstract int getDocCount() throws IOException;
+
+ /** Returns true if documents in this field store offsets. */
+ public abstract boolean hasOffsets();
+
+ /** Returns true if documents in this field store positions. */
+ public abstract boolean hasPositions();
public final static Terms[] EMPTY_ARRAY = new Terms[0];
}
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java?rev=1371709&r1=1371708&r2=1371709&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java Fri Aug 10 14:16:20 2012
@@ -207,6 +207,8 @@ public class TestDuelingCodecs extends L
return;
}
assertTermsStatistics(leftTerms, rightTerms);
+ assertEquals(leftTerms.hasOffsets(), rightTerms.hasOffsets());
+ assertEquals(leftTerms.hasPositions(), rightTerms.hasPositions());
TermsEnum leftTermsEnum = leftTerms.iterator(null);
TermsEnum rightTermsEnum = rightTerms.iterator(null);
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java?rev=1371709&r1=1371708&r2=1371709&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java Fri Aug 10 14:16:20 2012
@@ -122,6 +122,16 @@ public final class FieldCacheRewriteMeth
public long size() {
return -1;
}
+
+ @Override
+ public boolean hasOffsets() {
+ return false;
+ }
+
+ @Override
+ public boolean hasPositions() {
+ return false;
+ }
});
assert termsEnum != null;
Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java?rev=1371709&r1=1371708&r2=1371709&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java Fri Aug 10 14:16:20 2012
@@ -125,20 +125,7 @@ public class TokenSources {
}
private static boolean hasPositions(Terms vector) throws IOException {
- final TermsEnum termsEnum = vector.iterator(null);
- if (termsEnum.next() != null) {
- DocsAndPositionsEnum dpEnum = termsEnum.docsAndPositions(null, null, DocsAndPositionsEnum.FLAG_PAYLOADS);
- if (dpEnum != null) {
- int doc = dpEnum.nextDoc();
- assert doc >= 0 && doc != DocIdSetIterator.NO_MORE_DOCS;
- int pos = dpEnum.nextPosition();
- if (pos >= 0) {
- return true;
- }
- }
- }
-
- return false;
+ return vector.hasPositions();
}
/**
Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java?rev=1371709&r1=1371708&r2=1371709&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java Fri Aug 10 14:16:20 2012
@@ -56,18 +56,17 @@ public final class TokenStreamFromTermPo
termAttribute = addAttribute(CharTermAttribute.class);
positionIncrementAttribute = addAttribute(PositionIncrementAttribute.class);
offsetAttribute = addAttribute(OffsetAttribute.class);
+ final boolean hasOffsets = vector.hasOffsets();
final TermsEnum termsEnum = vector.iterator(null);
BytesRef text;
DocsAndPositionsEnum dpEnum = null;
while((text = termsEnum.next()) != null) {
dpEnum = termsEnum.docsAndPositions(null, dpEnum);
assert dpEnum != null; // presumably checked by TokenSources.hasPositions earlier
- boolean hasOffsets = true;
dpEnum.nextDoc();
final int freq = dpEnum.freq();
for (int j = 0; j < freq; j++) {
int pos = dpEnum.nextPosition();
- hasOffsets &= dpEnum.startOffset() >= 0;
Token token;
if (hasOffsets) {
token = new Token(text.utf8ToString(),
Modified: lucene/dev/trunk/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java?rev=1371709&r1=1371708&r2=1371709&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (original)
+++ lucene/dev/trunk/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java Fri Aug 10 14:16:20 2012
@@ -778,8 +778,16 @@ public class MemoryIndex {
public int getDocCount() {
return info.sortedTerms.length > 0 ? 1 : 0;
}
-
-
+
+ @Override
+ public boolean hasOffsets() {
+ return stride == 3;
+ }
+
+ @Override
+ public boolean hasPositions() {
+ return true;
+ }
};
}
}
Modified: lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java?rev=1371709&r1=1371708&r2=1371709&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java (original)
+++ lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java Fri Aug 10 14:16:20 2012
@@ -127,9 +127,11 @@ public class RAMOnlyPostingsFormat exten
long sumTotalTermFreq;
long sumDocFreq;
int docCount;
+ final FieldInfo.IndexOptions options;
- RAMField(String field) {
+ RAMField(String field, FieldInfo.IndexOptions options) {
this.field = field;
+ this.options = options;
}
@Override
@@ -161,6 +163,16 @@ public class RAMOnlyPostingsFormat exten
public Comparator<BytesRef> getComparator() {
return reverseUnicodeComparator;
}
+
+ @Override
+ public boolean hasOffsets() {
+ return options.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+ }
+
+ @Override
+ public boolean hasPositions() {
+ return options.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+ }
}
static class RAMTerm {
@@ -198,7 +210,7 @@ public class RAMOnlyPostingsFormat exten
if (field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) {
throw new UnsupportedOperationException("this codec cannot index offsets");
}
- RAMField ramField = new RAMField(field.name);
+ RAMField ramField = new RAMField(field.name, field.getIndexOptions());
postings.fieldToTerms.put(field.name, ramField);
termsConsumer.reset(ramField);
return termsConsumer;