You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2012/01/16 00:17:47 UTC
svn commit: r1231794 [1/3] - in /lucene/dev/trunk: lucene/
lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/
lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/
lucene/contrib/memory/src/java/org/apache/l...
Author: mikemccand
Date: Sun Jan 15 23:17:45 2012
New Revision: 1231794
URL: http://svn.apache.org/viewvc?rev=1231794&view=rev
Log:
LUCENE-3684: add offsets to postings APIs
Added:
lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/analysis/CannedAnalyzer.java (with props)
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestPostingsOffsets.java (with props)
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java
lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java
lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java
lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
lucene/dev/trunk/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/BlockTermsReader.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/MappingMultiDocsAndPositionsEnum.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/PostingsConsumer.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/PostingsReaderBase.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/TermVectorsWriter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/TermsConsumer.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFields.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/TermInfosReader.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsWriter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/sep/SepPostingsWriter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosReader.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosWriter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsFormat.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CheckIndex.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocTermOrds.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfo.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilteredTermsEnum.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexReader.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiFields.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/Term.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermsEnum.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermsHashPerField.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PhraseQuery.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java
lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexFieldsWriter.java
lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java
lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/index/RandomIndexWriter.java
lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/util/LuceneTestCase.java
lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/util/_TestUtil.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/codecs/pulsing/TestPulsingReuse.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/document/TestDocument.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestCodecs.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDoc.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDuelingCodecs.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestFilterIndexReader.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReader.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexableField.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestLazyProxSkipping.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestLongPostings.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestOmitPositions.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestPayloadProcessorProvider.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestPayloads.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestSegmentReader.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestTermVectorsWriter.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestTermVectors.java
lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java
lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java
lucene/dev/trunk/modules/facet/src/java/org/apache/lucene/facet/search/PayloadIterator.java
lucene/dev/trunk/modules/facet/src/java/org/apache/lucene/facet/taxonomy/directory/ParentArray.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Sun Jan 15 23:17:45 2012
@@ -226,6 +226,10 @@ Changes in backwards compatibility polic
* LUCENE-3640: Removed IndexSearcher.close(), because IndexSearcher no longer
takes a Directory and no longer "manages" IndexReaders, it is a no-op.
(Robert Muir)
+
+* LUCENE-3684: Add offsets into DocsAndPositionsEnum, and a few
+ FieldInfo.IndexOption: DOCS_AND_POSITIONS_AND_OFFSETS. (Robert
+ Muir, Mike McCandless)
Changes in Runtime Behavior
Modified: lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java (original)
+++ lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java Sun Jan 15 23:17:45 2012
@@ -126,7 +126,7 @@ public class TokenSources {
private static boolean hasPositions(Terms vector) throws IOException {
final TermsEnum termsEnum = vector.iterator(null);
if (termsEnum.next() != null) {
- DocsAndPositionsEnum dpEnum = termsEnum.docsAndPositions(null, null);
+ DocsAndPositionsEnum dpEnum = termsEnum.docsAndPositions(null, null, false);
if (dpEnum != null) {
int pos = dpEnum.nextPosition();
if (pos >= 0) {
@@ -219,22 +219,21 @@ public class TokenSources {
DocsAndPositionsEnum dpEnum = null;
while ((text = termsEnum.next()) != null) {
- dpEnum = termsEnum.docsAndPositions(null, dpEnum);
- if (dpEnum == null || (!dpEnum.attributes().hasAttribute(OffsetAttribute.class))) {
+ dpEnum = termsEnum.docsAndPositions(null, dpEnum, true);
+ if (dpEnum == null) {
throw new IllegalArgumentException(
"Required TermVector Offset information was not found");
}
final String term = text.utf8ToString();
- final OffsetAttribute offsetAtt = dpEnum.attributes().getAttribute(OffsetAttribute.class);
dpEnum.nextDoc();
final int freq = dpEnum.freq();
for(int posUpto=0;posUpto<freq;posUpto++) {
final int pos = dpEnum.nextPosition();
final Token token = new Token(term,
- offsetAtt.startOffset(),
- offsetAtt.endOffset());
+ dpEnum.startOffset(),
+ dpEnum.endOffset());
if (tokenPositionsGuaranteedContiguous && pos != -1) {
// We have positions stored and a guarantee that the token position
// information is contiguous
Modified: lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java (original)
+++ lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java Sun Jan 15 23:17:45 2012
@@ -60,22 +60,23 @@ public final class TokenStreamFromTermPo
BytesRef text;
DocsAndPositionsEnum dpEnum = null;
while((text = termsEnum.next()) != null) {
- dpEnum = termsEnum.docsAndPositions(null, dpEnum);
- dpEnum.nextDoc();
- final int freq = dpEnum.freq();
- final OffsetAttribute offsetAtt;
- if (dpEnum.attributes().hasAttribute(OffsetAttribute.class)) {
- offsetAtt = dpEnum.attributes().getAttribute(OffsetAttribute.class);
+ dpEnum = termsEnum.docsAndPositions(null, dpEnum, true);
+ final boolean hasOffsets;
+ if (dpEnum == null) {
+ hasOffsets = false;
+ dpEnum = termsEnum.docsAndPositions(null, dpEnum, false);
} else {
- offsetAtt = null;
+ hasOffsets = true;
}
+ dpEnum.nextDoc();
+ final int freq = dpEnum.freq();
for (int j = 0; j < freq; j++) {
int pos = dpEnum.nextPosition();
Token token;
- if (offsetAtt != null) {
+ if (hasOffsets) {
token = new Token(text.utf8ToString(),
- offsetAtt.startOffset(),
- offsetAtt.endOffset());
+ dpEnum.startOffset(),
+ dpEnum.endOffset());
} else {
token = new Token();
token.setEmpty().append(text.utf8ToString());
Modified: lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java (original)
+++ lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java Sun Jan 15 23:17:45 2012
@@ -21,7 +21,6 @@ import java.util.Collections;
import java.util.LinkedList;
import java.util.Set;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
@@ -101,29 +100,19 @@ public class FieldTermStack {
if (!termSet.contains(term)) {
continue;
}
- dpEnum = termsEnum.docsAndPositions(null, dpEnum);
+ dpEnum = termsEnum.docsAndPositions(null, dpEnum, true);
if (dpEnum == null) {
// null snippet
return;
}
- if (!dpEnum.attributes().hasAttribute(OffsetAttribute.class)) {
- // null snippet
- return;
- }
dpEnum.nextDoc();
- final OffsetAttribute offsetAtt = dpEnum.attributes().getAttribute(OffsetAttribute.class);
-
final int freq = dpEnum.freq();
for(int i = 0;i < freq;i++) {
- final int pos = dpEnum.nextPosition();
- if (pos == -1) {
- // null snippet
- return;
- }
- termList.add(new TermInfo(term, offsetAtt.startOffset(), offsetAtt.endOffset(), pos));
+ int pos = dpEnum.nextPosition();
+ termList.add(new TermInfo(term, dpEnum.startOffset(), dpEnum.endOffset(), pos));
}
}
Modified: lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (original)
+++ lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java Sun Jan 15 23:17:45 2012
@@ -953,7 +953,10 @@ public class MemoryIndex {
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) {
+ public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) {
+ if (needsOffsets) {
+ return null;
+ }
if (reuse == null || !(reuse instanceof MemoryDocsAndPositionsEnum)) {
reuse = new MemoryDocsAndPositionsEnum();
}
@@ -1066,6 +1069,16 @@ public class MemoryIndex {
}
@Override
+ public int startOffset() {
+ return -1;
+ }
+
+ @Override
+ public int endOffset() {
+ return -1;
+ }
+
+ @Override
public boolean hasPayload() {
return false;
}
Modified: lucene/dev/trunk/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java (original)
+++ lucene/dev/trunk/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java Sun Jan 15 23:17:45 2012
@@ -206,7 +206,7 @@ public class MemoryIndexTest extends Bas
MemoryIndex memory = new MemoryIndex();
memory.addField("foo", "bar", analyzer);
IndexReader reader = memory.createSearcher().getIndexReader();
- DocsAndPositionsEnum disi = reader.termPositionsEnum(null, "foo", new BytesRef("bar"));
+ DocsAndPositionsEnum disi = reader.termPositionsEnum(null, "foo", new BytesRef("bar"), false);
int docid = disi.docID();
assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
@@ -214,7 +214,7 @@ public class MemoryIndexTest extends Bas
// now reuse and check again
TermsEnum te = reader.terms("foo").iterator(null);
assertTrue(te.seekExact(new BytesRef("bar"), true));
- disi = te.docsAndPositions(null, disi);
+ disi = te.docsAndPositions(null, disi, false);
docid = disi.docID();
assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/BlockTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/BlockTermsReader.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/BlockTermsReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/BlockTermsReader.java Sun Jan 15 23:17:45 2012
@@ -697,16 +697,20 @@ public class BlockTermsReader extends Fi
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
- //System.out.println("BTR.d&p this=" + this);
- decodeMetaData();
- if (fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+ public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
+ if (fieldInfo.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
+ // Positions were not indexed:
return null;
- } else {
- DocsAndPositionsEnum dpe = postingsReader.docsAndPositions(fieldInfo, state, liveDocs, reuse);
- //System.out.println(" return d&pe=" + dpe);
- return dpe;
}
+
+ if (needsOffsets &&
+ fieldInfo.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) < 0) {
+ // Offsets were not indexed:
+ return null;
+ }
+
+ decodeMetaData();
+ return postingsReader.docsAndPositions(fieldInfo, state, liveDocs, reuse, needsOffsets);
}
@Override
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java Sun Jan 15 23:17:45 2012
@@ -881,13 +881,20 @@ public class BlockTreeTermsReader extend
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
- if (fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+ public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
+ if (fieldInfo.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
+ // Positions were not indexed:
+ return null;
+ }
+
+ if (needsOffsets &&
+ fieldInfo.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) < 0) {
+ // Offsets were not indexed:
return null;
- } else {
- currentFrame.decodeMetaData();
- return postingsReader.docsAndPositions(fieldInfo, currentFrame.termState, skipDocs, reuse);
}
+
+ currentFrame.decodeMetaData();
+ return postingsReader.docsAndPositions(fieldInfo, currentFrame.termState, skipDocs, reuse, needsOffsets);
}
private int getState() {
@@ -2096,17 +2103,21 @@ public class BlockTreeTermsReader extend
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
- assert !eof;
- //System.out.println("BTR.d&p this=" + this);
- if (fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+ public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
+ if (fieldInfo.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
+ // Positions were not indexed:
+ return null;
+ }
+
+ if (needsOffsets &&
+ fieldInfo.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) < 0) {
+ // Offsets were not indexed:
return null;
- } else {
- currentFrame.decodeMetaData();
- DocsAndPositionsEnum dpe = postingsReader.docsAndPositions(fieldInfo, currentFrame.state, skipDocs, reuse);
- //System.out.println(" return d&pe=" + dpe);
- return dpe;
}
+
+ assert !eof;
+ currentFrame.decodeMetaData();
+ return postingsReader.docsAndPositions(fieldInfo, currentFrame.state, skipDocs, reuse, needsOffsets);
}
@Override
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/MappingMultiDocsAndPositionsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/MappingMultiDocsAndPositionsEnum.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/MappingMultiDocsAndPositionsEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/MappingMultiDocsAndPositionsEnum.java Sun Jan 15 23:17:45 2012
@@ -102,6 +102,16 @@ public final class MappingMultiDocsAndPo
public int nextPosition() throws IOException {
return current.nextPosition();
}
+
+ @Override
+ public int startOffset() throws IOException {
+ return current.startOffset();
+ }
+
+ @Override
+ public int endOffset() throws IOException {
+ return current.endOffset();
+ }
@Override
public BytesRef getPayload() throws IOException {
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/PostingsConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/PostingsConsumer.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/PostingsConsumer.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/PostingsConsumer.java Sun Jan 15 23:17:45 2012
@@ -44,12 +44,12 @@ public abstract class PostingsConsumer {
int docBase;
}
- /** Add a new position & payload. A null payload means no
- * payload; a non-null payload with zero length also
- * means no payload. Caller may reuse the {@link
- * BytesRef} for the payload between calls (method must
- * fully consume the payload). */
- public abstract void addPosition(int position, BytesRef payload) throws IOException;
+ /** Add a new position & payload, and start/end offset. A
+ * null payload means no payload; a non-null payload with
+ * zero length also means no payload. Caller may reuse
+ * the {@link BytesRef} for the payload between calls
+ * (method must fully consume the payload). */
+ public abstract void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException;
/** Called when we are done adding positions & payloads
* for each doc. Not called when the field omits term
@@ -88,7 +88,32 @@ public abstract class PostingsConsumer {
df++;
totTF += freq;
}
+ } else if (mergeState.fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+ final DocsAndPositionsEnum postingsEnum = (DocsAndPositionsEnum) postings;
+ while(true) {
+ final int doc = postingsEnum.nextDoc();
+ if (doc == DocIdSetIterator.NO_MORE_DOCS) {
+ break;
+ }
+ visitedDocs.set(doc);
+ final int freq = postingsEnum.freq();
+ this.startDoc(doc, freq);
+ totTF += freq;
+ for(int i=0;i<freq;i++) {
+ final int position = postingsEnum.nextPosition();
+ final BytesRef payload;
+ if (postingsEnum.hasPayload()) {
+ payload = postingsEnum.getPayload();
+ } else {
+ payload = null;
+ }
+ this.addPosition(position, payload, -1, -1);
+ }
+ this.finishDoc();
+ df++;
+ }
} else {
+ assert mergeState.fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
final DocsAndPositionsEnum postingsEnum = (DocsAndPositionsEnum) postings;
while(true) {
final int doc = postingsEnum.nextDoc();
@@ -107,7 +132,7 @@ public abstract class PostingsConsumer {
} else {
payload = null;
}
- this.addPosition(position, payload);
+ this.addPosition(position, payload, postingsEnum.startOffset(), postingsEnum.endOffset());
}
this.finishDoc();
df++;
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/PostingsReaderBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/PostingsReaderBase.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/PostingsReaderBase.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/PostingsReaderBase.java Sun Jan 15 23:17:45 2012
@@ -55,7 +55,8 @@ public abstract class PostingsReaderBase
/** Must fully consume state, since after this call that
* TermState may be reused. */
- public abstract DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState state, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException;
+ public abstract DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState state, Bits skipDocs, DocsAndPositionsEnum reuse,
+ boolean needsOffsets) throws IOException;
public abstract void close() throws IOException;
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/TermVectorsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/TermVectorsWriter.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/TermVectorsWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/TermVectorsWriter.java Sun Jan 15 23:17:45 2012
@@ -20,7 +20,6 @@ package org.apache.lucene.codecs;
import java.io.Closeable;
import java.io.IOException;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
@@ -185,7 +184,6 @@ public abstract class TermVectorsWriter
String lastFieldName = null;
while((fieldName = fieldsEnum.next()) != null) {
-
final FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldName);
assert lastFieldName == null || fieldName.compareTo(lastFieldName) > 0: "lastFieldName=" + lastFieldName + " fieldName=" + fieldName;
@@ -200,79 +198,79 @@ public abstract class TermVectorsWriter
if (numTerms == -1) {
throw new IllegalStateException("vector.getUniqueTermCount() must be implemented (it returned -1)");
}
-
- final boolean positions;
-
- OffsetAttribute offsetAtt;
-
final TermsEnum termsEnum = terms.iterator(null);
DocsAndPositionsEnum docsAndPositionsEnum = null;
- if (termsEnum.next() != null) {
- assert numTerms > 0;
- docsAndPositionsEnum = termsEnum.docsAndPositions(null, null);
- if (docsAndPositionsEnum != null) {
- // has positions
- positions = true;
- if (docsAndPositionsEnum.attributes().hasAttribute(OffsetAttribute.class)) {
- offsetAtt = docsAndPositionsEnum.attributes().getAttribute(OffsetAttribute.class);
- } else {
- offsetAtt = null;
- }
- } else {
- positions = false;
- offsetAtt = null;
- }
- } else {
- // no terms in this field (hmm why is field present
- // then...?)
- assert numTerms == 0;
- positions = false;
- offsetAtt = null;
- }
-
- startField(fieldInfo, numTerms, positions, offsetAtt != null);
+ boolean startedField = false;
- int termCount = 1;
+ // NOTE: this is tricky, because TermVectors allow
+ // indexing offsets but NOT positions. So we must
+ // lazily init the field by checking whether first
+ // position we see is -1 or not.
+
+ int termCount = 0;
+ while(termsEnum.next() != null) {
+ termCount++;
- // NOTE: we already .next()'d the TermsEnum above, to
- // peek @ first term to see if positions/offsets are
- // present
- while(true) {
final int freq = (int) termsEnum.totalTermFreq();
- startTerm(termsEnum.term(), freq);
- if (positions || offsetAtt != null) {
- DocsAndPositionsEnum dp = termsEnum.docsAndPositions(null, docsAndPositionsEnum);
- // TODO: add startOffset()/endOffset() to d&pEnum... this is insanity
- if (dp != docsAndPositionsEnum) {
- // producer didnt reuse, must re-pull attributes
- if (offsetAtt != null) {
- assert dp.attributes().hasAttribute(OffsetAttribute.class);
- offsetAtt = dp.attributes().getAttribute(OffsetAttribute.class);
- }
- }
- docsAndPositionsEnum = dp;
+ if (startedField) {
+ startTerm(termsEnum.term(), freq);
+ }
+
+ // TODO: we need a "query" API where we can ask (via
+ // flex API) what this term was indexed with...
+ // Both positions & offsets:
+ docsAndPositionsEnum = termsEnum.docsAndPositions(null, null, true);
+ final boolean hasOffsets;
+ boolean hasPositions = false;
+ if (docsAndPositionsEnum == null) {
+ // Fallback: no offsets
+ docsAndPositionsEnum = termsEnum.docsAndPositions(null, null, false);
+ hasOffsets = false;
+ } else {
+ hasOffsets = true;
+ }
+
+ if (docsAndPositionsEnum != null) {
final int docID = docsAndPositionsEnum.nextDoc();
assert docID != DocsEnum.NO_MORE_DOCS;
assert docsAndPositionsEnum.freq() == freq;
for(int posUpto=0; posUpto<freq; posUpto++) {
final int pos = docsAndPositionsEnum.nextPosition();
- final int startOffset = offsetAtt == null ? -1 : offsetAtt.startOffset();
- final int endOffset = offsetAtt == null ? -1 : offsetAtt.endOffset();
-
+ if (!startedField) {
+ assert numTerms > 0;
+ hasPositions = pos != -1;
+ startField(fieldInfo, numTerms, hasPositions, hasOffsets);
+ startTerm(termsEnum.term(), freq);
+ startedField = true;
+ }
+ final int startOffset;
+ final int endOffset;
+ if (hasOffsets) {
+ startOffset = docsAndPositionsEnum.startOffset();
+ endOffset = docsAndPositionsEnum.endOffset();
+ assert startOffset != -1;
+ assert endOffset != -1;
+ } else {
+ startOffset = -1;
+ endOffset = -1;
+ }
+ assert !hasPositions || pos >= 0;
addPosition(pos, startOffset, endOffset);
}
+ } else {
+ if (!startedField) {
+ assert numTerms > 0;
+ startField(fieldInfo, numTerms, hasPositions, hasOffsets);
+ startTerm(termsEnum.term(), freq);
+ startedField = true;
+ }
}
-
- if (termsEnum.next() == null) {
- assert termCount == numTerms;
- break;
- }
- termCount++;
}
+ assert termCount == numTerms;
}
}
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/TermsConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/TermsConsumer.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/TermsConsumer.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/TermsConsumer.java Sun Jan 15 23:17:45 2012
@@ -119,8 +119,41 @@ public abstract class TermsConsumer {
}
}
}
+ } else if (mergeState.fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+ if (postingsEnum == null) {
+ postingsEnum = new MappingMultiDocsAndPositionsEnum();
+ }
+ postingsEnum.setMergeState(mergeState);
+ MultiDocsAndPositionsEnum postingsEnumIn = null;
+ while((term = termsEnum.next()) != null) {
+ // We can pass null for liveDocs, because the
+ // mapping enum will skip the non-live docs:
+ postingsEnumIn = (MultiDocsAndPositionsEnum) termsEnum.docsAndPositions(null, postingsEnumIn, false);
+ assert postingsEnumIn != null;
+ postingsEnum.reset(postingsEnumIn);
+ // set PayloadProcessor
+ if (mergeState.payloadProcessorProvider != null) {
+ for (int i = 0; i < mergeState.readers.size(); i++) {
+ if (mergeState.dirPayloadProcessor[i] != null) {
+ mergeState.currentPayloadProcessor[i] = mergeState.dirPayloadProcessor[i].getProcessor(mergeState.fieldInfo.name, term);
+ }
+ }
+ }
+ final PostingsConsumer postingsConsumer = startTerm(term);
+ final TermStats stats = postingsConsumer.merge(mergeState, postingsEnum, visitedDocs);
+ if (stats.docFreq > 0) {
+ finishTerm(term, stats);
+ sumTotalTermFreq += stats.totalTermFreq;
+ sumDFsinceLastAbortCheck += stats.docFreq;
+ sumDocFreq += stats.docFreq;
+ if (sumDFsinceLastAbortCheck > 60000) {
+ mergeState.checkAbort.work(sumDFsinceLastAbortCheck/5.0);
+ sumDFsinceLastAbortCheck = 0;
+ }
+ }
+ }
} else {
- assert mergeState.fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
+ assert mergeState.fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
if (postingsEnum == null) {
postingsEnum = new MappingMultiDocsAndPositionsEnum();
}
@@ -129,7 +162,7 @@ public abstract class TermsConsumer {
while((term = termsEnum.next()) != null) {
// We can pass null for liveDocs, because the
// mapping enum will skip the non-live docs:
- postingsEnumIn = (MultiDocsAndPositionsEnum) termsEnum.docsAndPositions(null, postingsEnumIn);
+ postingsEnumIn = (MultiDocsAndPositionsEnum) termsEnum.docsAndPositions(null, postingsEnumIn, true);
assert postingsEnumIn != null;
postingsEnum.reset(postingsEnumIn);
// set PayloadProcessor
@@ -154,7 +187,6 @@ public abstract class TermsConsumer {
}
}
}
-
finish(sumTotalTermFreq, sumDocFreq, visitedDocs.cardinality());
}
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFields.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFields.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFields.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFields.java Sun Jan 15 23:17:45 2012
@@ -966,7 +966,12 @@ public class Lucene3xFields extends Fiel
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
+ public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
+ if (needsOffsets) {
+ // Pre-4.0 indices never have offsets:
+ return null;
+ }
+
PreDocsAndPositionsEnum docsPosEnum;
if (fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
return null;
@@ -1082,6 +1087,16 @@ public class Lucene3xFields extends Fiel
}
@Override
+ public int startOffset() throws IOException {
+ return -1;
+ }
+
+ @Override
+ public int endOffset() throws IOException {
+ return -1;
+ }
+
+ @Override
public boolean hasPayload() {
assert docID != NO_MORE_DOCS;
return pos.isPayloadAvailable();
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/TermInfosReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/TermInfosReader.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/TermInfosReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/TermInfosReader.java Sun Jan 15 23:17:45 2012
@@ -215,7 +215,9 @@ public final class TermInfosReader {
TermInfo seekEnum(SegmentTermEnum enumerator, Term term, boolean useCache) throws IOException {
if (useCache) {
- return seekEnum(enumerator, term, termsCache.get(new CloneableTerm(term)), useCache);
+ return seekEnum(enumerator, term,
+ termsCache.get(new CloneableTerm(term.deepCopyOf())),
+ useCache);
} else {
return seekEnum(enumerator, term, null, useCache);
}
@@ -247,7 +249,8 @@ public final class TermInfosReader {
// of terms in order
if (tiOrd == null) {
if (useCache) {
- termsCache.put(new CloneableTerm(term), new TermInfoAndOrd(ti, enumerator.position));
+ termsCache.put(new CloneableTerm(term.deepCopyOf()),
+ new TermInfoAndOrd(ti, enumerator.position));
}
} else {
assert sameTermInfo(ti, tiOrd, enumerator);
@@ -279,7 +282,8 @@ public final class TermInfosReader {
ti = enumerator.termInfo;
if (tiOrd == null) {
if (useCache) {
- termsCache.put(new CloneableTerm(term), new TermInfoAndOrd(ti, enumerator.position));
+ termsCache.put(new CloneableTerm(term.deepCopyOf()),
+ new TermInfoAndOrd(ti, enumerator.position));
}
} else {
assert sameTermInfo(ti, tiOrd, enumerator);
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java Sun Jan 15 23:17:45 2012
@@ -80,6 +80,8 @@ public class Lucene40FieldInfosReader ex
} else {
throw new CorruptIndexException("Corrupt fieldinfos, OMIT_POSITIONS set but format=" + format + " (resource: " + input + ")");
}
+ } else if (format <= Lucene40FieldInfosWriter.FORMAT_FLEX && (bits & Lucene40FieldInfosWriter.STORE_OFFSETS_IN_POSTINGS) != 0) {
+ indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
} else {
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java Sun Jan 15 23:17:45 2012
@@ -47,6 +47,7 @@ public class Lucene40FieldInfosWriter ex
static final byte IS_INDEXED = 0x1;
static final byte STORE_TERMVECTOR = 0x2;
+ static final byte STORE_OFFSETS_IN_POSTINGS = 0x4;
static final byte OMIT_NORMS = 0x10;
static final byte STORE_PAYLOADS = 0x20;
static final byte OMIT_TERM_FREQ_AND_POSITIONS = 0x40;
@@ -68,6 +69,8 @@ public class Lucene40FieldInfosWriter ex
if (fi.storePayloads) bits |= STORE_PAYLOADS;
if (fi.indexOptions == IndexOptions.DOCS_ONLY) {
bits |= OMIT_TERM_FREQ_AND_POSITIONS;
+ } else if (fi.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
+ bits |= STORE_OFFSETS_IN_POSTINGS;
} else if (fi.indexOptions == IndexOptions.DOCS_AND_FREQS) {
bits |= OMIT_POSITIONS;
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java Sun Jan 15 23:17:45 2012
@@ -241,11 +241,15 @@ public class Lucene40PostingsReader exte
}
@Override
- public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
- if (fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+ public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs,
+ DocsAndPositionsEnum reuse, boolean needsOffsets)
+ throws IOException {
+
+ if (needsOffsets) {
+ // TODO: once we index offsets into postings fix this!
return null;
}
-
+
// TODO: refactor
if (fieldInfo.storePayloads) {
SegmentDocsAndPositionsAndPayloadsEnum docsEnum;
@@ -366,7 +370,7 @@ public class Lucene40PostingsReader exte
start = count; // buffer is consumed
- return doc = skipTo(target, liveDocs);
+ return doc = skipTo(target);
}
private final int binarySearch(int hi, int low, int target, int[] docs) {
@@ -448,7 +452,7 @@ public class Lucene40PostingsReader exte
}
- private final int skipTo(int target, Bits liveDocs) throws IOException {
+ private final int skipTo(int target) throws IOException {
if ((target - skipInterval) >= accum && limit >= skipMinimum) {
// There are enough docs in the posting to have
@@ -841,6 +845,16 @@ public class Lucene40PostingsReader exte
return position;
}
+ @Override
+ public int startOffset() throws IOException {
+ return -1;
+ }
+
+ @Override
+ public int endOffset() throws IOException {
+ return -1;
+ }
+
/** Returns the payload at this position, or null if no
* payload was indexed. */
@Override
@@ -1074,6 +1088,16 @@ public class Lucene40PostingsReader exte
return position;
}
+ @Override
+ public int startOffset() throws IOException {
+ return -1;
+ }
+
+ @Override
+ public int endOffset() throws IOException {
+ return -1;
+ }
+
/** Returns the payload at this position, or null if no
* payload was indexed. */
@Override
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java Sun Jan 15 23:17:45 2012
@@ -155,6 +155,10 @@ public final class Lucene40PostingsWrite
*/
this.fieldInfo = fieldInfo;
indexOptions = fieldInfo.indexOptions;
+ if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) {
+ throw new IllegalArgumentException("this codec cannot index offsets");
+ }
+
storePayloads = fieldInfo.storePayloads;
//System.out.println(" set init blockFreqStart=" + freqStart);
//System.out.println(" set init blockProxStart=" + proxStart);
@@ -197,11 +201,19 @@ public final class Lucene40PostingsWrite
/** Add a new position & payload */
@Override
- public void addPosition(int position, BytesRef payload) throws IOException {
+ public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException {
//if (DEBUG) System.out.println("SPW: addPos pos=" + position + " payload=" + (payload == null ? "null" : (payload.length + " bytes")) + " proxFP=" + proxOut.getFilePointer());
assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS: "invalid indexOptions: " + indexOptions;
assert proxOut != null;
+ // TODO: when we add offsets... often
+ // endOffset-startOffset will be constant or near
+ // constant for all docs (eg if the term wasn't stemmed
+ // then this will usually be the utf16 length of the
+ // term); would be nice to write that length once up
+ // front and then not encode endOffset for each
+ // position..
+
final int delta = position - lastPosition;
assert delta >= 0: "position=" + position + " lastPosition=" + lastPosition; // not quite right (if pos=0 is repeated twice we don't catch it)
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java Sun Jan 15 23:17:45 2012
@@ -24,7 +24,6 @@ import java.util.HashMap;
import java.util.Map;
import java.util.Set;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocsAndPositionsEnum;
@@ -518,21 +517,20 @@ public class Lucene40TermVectorsReader e
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
+ public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
+ if (needsOffsets && !storeOffsets) {
+ return null;
+ }
+
if (!storePositions && !storeOffsets) {
return null;
}
TVDocsAndPositionsEnum docsAndPositionsEnum;
- if (reuse != null) {
+ if (reuse != null && reuse instanceof TVDocsAndPositionsEnum) {
docsAndPositionsEnum = (TVDocsAndPositionsEnum) reuse;
- if (docsAndPositionsEnum.canReuse(storeOffsets)) {
- docsAndPositionsEnum = (TVDocsAndPositionsEnum) reuse;
- } else {
- docsAndPositionsEnum = new TVDocsAndPositionsEnum(storeOffsets);
- }
} else {
- docsAndPositionsEnum = new TVDocsAndPositionsEnum(storeOffsets);
+ docsAndPositionsEnum = new TVDocsAndPositionsEnum();
}
docsAndPositionsEnum.reset(liveDocs, positions, startOffsets, endOffsets);
return docsAndPositionsEnum;
@@ -592,7 +590,6 @@ public class Lucene40TermVectorsReader e
}
private static class TVDocsAndPositionsEnum extends DocsAndPositionsEnum {
- private final OffsetAttribute offsetAtt;
private boolean didNext;
private int doc = -1;
private int nextPos;
@@ -601,18 +598,6 @@ public class Lucene40TermVectorsReader e
private int[] startOffsets;
private int[] endOffsets;
- public TVDocsAndPositionsEnum(boolean storeOffsets) {
- if (storeOffsets) {
- offsetAtt = attributes().addAttribute(OffsetAttribute.class);
- } else {
- offsetAtt = null;
- }
- }
-
- public boolean canReuse(boolean storeOffsets) {
- return storeOffsets == (offsetAtt != null);
- }
-
@Override
public int freq() {
if (positions != null) {
@@ -651,7 +636,6 @@ public class Lucene40TermVectorsReader e
this.liveDocs = liveDocs;
this.positions = positions;
this.startOffsets = startOffsets;
- assert (offsetAtt != null) == (startOffsets != null);
this.endOffsets = endOffsets;
this.doc = -1;
didNext = false;
@@ -673,10 +657,6 @@ public class Lucene40TermVectorsReader e
assert (positions != null && nextPos < positions.length) ||
startOffsets != null && nextPos < startOffsets.length;
- if (startOffsets != null) {
- offsetAtt.setOffset(startOffsets[nextPos],
- endOffsets[nextPos]);
- }
if (positions != null) {
return positions[nextPos++];
} else {
@@ -684,6 +664,18 @@ public class Lucene40TermVectorsReader e
return -1;
}
}
+
+ @Override
+ public int startOffset() {
+ assert startOffsets != null;
+ return startOffsets[nextPos-1];
+ }
+
+ @Override
+ public int endOffset() {
+ assert endOffsets != null;
+ return endOffsets[nextPos-1];
+ }
}
@Override
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java Sun Jan 15 23:17:45 2012
@@ -131,7 +131,7 @@ public class MemoryPostingsFormat extend
}
@Override
- public void addPosition(int pos, BytesRef payload) throws IOException {
+ public void addPosition(int pos, BytesRef payload, int startOffset, int endOffset) throws IOException {
assert payload == null || field.storePayloads;
if (VERBOSE) System.out.println(" addPos pos=" + pos + " payload=" + payload);
@@ -249,6 +249,9 @@ public class MemoryPostingsFormat extend
return new FieldsConsumer() {
@Override
public TermsConsumer addField(FieldInfo field) {
+ if (field.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) {
+ throw new IllegalArgumentException("this codec cannot index offsets");
+ }
if (VERBOSE) System.out.println("\naddField field=" + field.name);
return new TermsWriter(out, field);
}
@@ -328,7 +331,7 @@ public class MemoryPostingsFormat extend
assert freq > 0;
}
- if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+ if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
// Skip positions
for(int posUpto=0;posUpto<freq;posUpto++) {
if (!storePayloads) {
@@ -501,6 +504,16 @@ public class MemoryPostingsFormat extend
}
@Override
+ public int startOffset() {
+ return -1;
+ }
+
+ @Override
+ public int endOffset() {
+ return -1;
+ }
+
+ @Override
public BytesRef getPayload() {
payloadRetrieved = true;
return payload;
@@ -618,8 +631,14 @@ public class MemoryPostingsFormat extend
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
- if (field.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+ public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
+
+ if (needsOffsets) {
+ // Not until we can index offsets...
+ return null;
+ }
+
+ if (field.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
return null;
}
decodeMetaData();
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java Sun Jan 15 23:17:45 2012
@@ -215,10 +215,8 @@ public class PulsingPostingsReader exten
}
@Override
- public DocsAndPositionsEnum docsAndPositions(FieldInfo field, BlockTermState _termState, Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
- if (field.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
- return null;
- }
+ public DocsAndPositionsEnum docsAndPositions(FieldInfo field, BlockTermState _termState, Bits liveDocs, DocsAndPositionsEnum reuse,
+ boolean needsOffsets) throws IOException {
//System.out.println("D&P: field=" + field.name);
final PulsingTermState termState = (PulsingTermState) _termState;
@@ -245,11 +243,12 @@ public class PulsingPostingsReader exten
return postings.reset(liveDocs, termState);
} else {
if (reuse instanceof PulsingDocsAndPositionsEnum) {
- DocsAndPositionsEnum wrapped = wrappedPostingsReader.docsAndPositions(field, termState.wrappedTermState, liveDocs, (DocsAndPositionsEnum) getOther(reuse));
+ DocsAndPositionsEnum wrapped = wrappedPostingsReader.docsAndPositions(field, termState.wrappedTermState, liveDocs, (DocsAndPositionsEnum) getOther(reuse),
+ needsOffsets);
setOther(wrapped, reuse); // wrapped.other = reuse
return wrapped;
} else {
- return wrappedPostingsReader.docsAndPositions(field, termState.wrappedTermState, liveDocs, reuse);
+ return wrappedPostingsReader.docsAndPositions(field, termState.wrappedTermState, liveDocs, reuse, needsOffsets);
}
}
}
@@ -486,6 +485,16 @@ public class PulsingPostingsReader exten
return position;
}
+ @Override
+ public int startOffset() {
+ return -1;
+ }
+
+ @Override
+ public int endOffset() {
+ return -1;
+ }
+
private void skipPositions() throws IOException {
while(posPending != 0) {
nextPosition();
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsWriter.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsWriter.java Sun Jan 15 23:17:45 2012
@@ -115,6 +115,9 @@ public final class PulsingPostingsWriter
@Override
public void setField(FieldInfo fieldInfo) {
this.indexOptions = fieldInfo.indexOptions;
+ if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) {
+ throw new IllegalArgumentException("this codec cannot index offsets: " + indexOptions);
+ }
if (DEBUG) System.out.println("PW field=" + fieldInfo.name + " indexOptions=" + indexOptions);
storePayloads = fieldInfo.storePayloads;
wrappedPostingsWriter.setField(fieldInfo);
@@ -165,7 +168,7 @@ public final class PulsingPostingsWriter
}
@Override
- public void addPosition(int position, BytesRef payload) throws IOException {
+ public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException {
if (DEBUG) System.out.println("PW pos=" + position + " payload=" + (payload == null ? "null" : payload.length + " bytes"));
if (pendingCount == pending.length) {
@@ -175,7 +178,7 @@ public final class PulsingPostingsWriter
if (pendingCount == -1) {
// We've already seen too many docs for this term --
// just forward to our fallback writer
- wrappedPostingsWriter.addPosition(position, payload);
+ wrappedPostingsWriter.addPosition(position, payload, -1, -1);
} else {
// buffer up
final Position pos = pending[pendingCount++];
@@ -360,7 +363,7 @@ public final class PulsingPostingsWriter
wrappedPostingsWriter.startTerm();
// Flush all buffered docs
- if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+ if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
Position doc = null;
for(Position pos : pending) {
if (doc == null) {
@@ -376,7 +379,7 @@ public final class PulsingPostingsWriter
wrappedPostingsWriter.startDoc(doc.docID, doc.termFreq);
}
if (DEBUG) System.out.println("PW: wrapped.addPos pos=" + pos.pos);
- wrappedPostingsWriter.addPosition(pos.pos, pos.payload);
+ wrappedPostingsWriter.addPosition(pos.pos, pos.payload, -1, -1);
}
//wrappedPostingsWriter.finishDoc();
} else {
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java Sun Jan 15 23:17:45 2012
@@ -294,7 +294,18 @@ public class SepPostingsReader extends P
}
@Override
- public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState _termState, Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
+ public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState _termState, Bits liveDocs,
+ DocsAndPositionsEnum reuse, boolean needsOffsets)
+ throws IOException {
+
+ if (fieldInfo.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
+ return null;
+ }
+
+ if (needsOffsets) {
+ return null;
+ }
+
assert fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
final SepTermState termState = (SepTermState) _termState;
SepDocsAndPositionsEnum postingsEnum;
@@ -713,6 +724,16 @@ public class SepPostingsReader extends P
return position;
}
+ @Override
+ public int startOffset() {
+ return -1;
+ }
+
+ @Override
+ public int endOffset() {
+ return -1;
+ }
+
private BytesRef payload;
@Override
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/sep/SepPostingsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/sep/SepPostingsWriter.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/sep/SepPostingsWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/sep/SepPostingsWriter.java Sun Jan 15 23:17:45 2012
@@ -188,6 +188,9 @@ public final class SepPostingsWriter ext
public void setField(FieldInfo fieldInfo) {
this.fieldInfo = fieldInfo;
this.indexOptions = fieldInfo.indexOptions;
+ if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) {
+ throw new IllegalArgumentException("this codec cannot index offsets");
+ }
skipListWriter.setIndexOptions(indexOptions);
storePayloads = indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS && fieldInfo.storePayloads;
}
@@ -222,7 +225,7 @@ public final class SepPostingsWriter ext
/** Add a new position & payload */
@Override
- public void addPosition(int position, BytesRef payload) throws IOException {
+ public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException {
assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
final int delta = position - lastPosition;
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosReader.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosReader.java Sun Jan 15 23:17:45 2012
@@ -103,7 +103,7 @@ public class SimpleTextFieldInfosReader
IndexOptions indexOptions = IndexOptions.valueOf(readString(INDEXOPTIONS.length, scratch));
hasVectors |= storeTermVector;
- hasProx |= isIndexed && indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
+ hasProx |= isIndexed && indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
hasFreq |= isIndexed && indexOptions != IndexOptions.DOCS_ONLY;
infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector,
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosWriter.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosWriter.java Sun Jan 15 23:17:45 2012
@@ -62,7 +62,7 @@ public class SimpleTextFieldInfosWriter
SimpleTextUtil.writeNewline(out);
for (FieldInfo fi : infos) {
- assert fi.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !fi.storePayloads;
+ assert fi.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.storePayloads;
SimpleTextUtil.write(out, NAME);
SimpleTextUtil.write(out, fi.name, scratch);
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java Sun Jan 15 23:17:45 2012
@@ -50,13 +50,15 @@ class SimpleTextFieldsReader extends Fie
private final IndexInput in;
private final FieldInfos fieldInfos;
- final static BytesRef END = SimpleTextFieldsWriter.END;
- final static BytesRef FIELD = SimpleTextFieldsWriter.FIELD;
- final static BytesRef TERM = SimpleTextFieldsWriter.TERM;
- final static BytesRef DOC = SimpleTextFieldsWriter.DOC;
- final static BytesRef FREQ = SimpleTextFieldsWriter.FREQ;
- final static BytesRef POS = SimpleTextFieldsWriter.POS;
- final static BytesRef PAYLOAD = SimpleTextFieldsWriter.PAYLOAD;
+ final static BytesRef END = SimpleTextFieldsWriter.END;
+ final static BytesRef FIELD = SimpleTextFieldsWriter.FIELD;
+ final static BytesRef TERM = SimpleTextFieldsWriter.TERM;
+ final static BytesRef DOC = SimpleTextFieldsWriter.DOC;
+ final static BytesRef FREQ = SimpleTextFieldsWriter.FREQ;
+ final static BytesRef POS = SimpleTextFieldsWriter.POS;
+ final static BytesRef START_OFFSET = SimpleTextFieldsWriter.START_OFFSET;
+ final static BytesRef END_OFFSET = SimpleTextFieldsWriter.END_OFFSET;
+ final static BytesRef PAYLOAD = SimpleTextFieldsWriter.PAYLOAD;
public SimpleTextFieldsReader(SegmentReadState state) throws IOException {
in = state.dir.openInput(SimpleTextPostingsFormat.getPostingsFileName(state.segmentInfo.name, state.segmentSuffix), state.context);
@@ -204,8 +206,16 @@ class SimpleTextFieldsReader extends Fie
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
- if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+ public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
+
+ if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
+ // Positions were not indexed
+ return null;
+ }
+
+ if (needsOffsets &&
+ indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) < 0) {
+ // Offsets were not indexed
return null;
}
@@ -215,7 +225,7 @@ class SimpleTextFieldsReader extends Fie
} else {
docsAndPositionsEnum = new SimpleTextDocsAndPositionsEnum();
}
- return docsAndPositionsEnum.reset(docsStart, liveDocs);
+ return docsAndPositionsEnum.reset(docsStart, liveDocs, indexOptions);
}
@Override
@@ -289,6 +299,10 @@ class SimpleTextFieldsReader extends Fie
termFreq = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
} else if (StringHelper.startsWith(scratch, POS)) {
// skip termFreq++;
+ } else if (StringHelper.startsWith(scratch, START_OFFSET)) {
+ // skip
+ } else if (StringHelper.startsWith(scratch, END_OFFSET)) {
+ // skip
} else if (StringHelper.startsWith(scratch, PAYLOAD)) {
// skip
} else {
@@ -325,6 +339,10 @@ class SimpleTextFieldsReader extends Fie
private final CharsRef scratchUTF16_2 = new CharsRef(10);
private BytesRef payload;
private long nextDocStart;
+ private boolean readOffsets;
+ private boolean readPositions;
+ private int startOffset = -1;
+ private int endOffset = -1;
public SimpleTextDocsAndPositionsEnum() {
this.inStart = SimpleTextFieldsReader.this.in;
@@ -335,10 +353,12 @@ class SimpleTextFieldsReader extends Fie
return in == inStart;
}
- public SimpleTextDocsAndPositionsEnum reset(long fp, Bits liveDocs) {
+ public SimpleTextDocsAndPositionsEnum reset(long fp, Bits liveDocs, IndexOptions indexOptions) {
this.liveDocs = liveDocs;
nextDocStart = fp;
docID = -1;
+ readPositions = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+ readOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
return this;
}
@@ -360,6 +380,7 @@ class SimpleTextFieldsReader extends Fie
while(true) {
final long lineStart = in.getFilePointer();
SimpleTextUtil.readLine(in, scratch);
+ //System.out.println("NEXT DOC: " + scratch.utf8ToString());
if (StringHelper.startsWith(scratch, DOC)) {
if (!first && (liveDocs == null || liveDocs.get(docID))) {
nextDocStart = lineStart;
@@ -376,6 +397,10 @@ class SimpleTextFieldsReader extends Fie
posStart = in.getFilePointer();
} else if (StringHelper.startsWith(scratch, POS)) {
// skip
+ } else if (StringHelper.startsWith(scratch, START_OFFSET)) {
+ // skip
+ } else if (StringHelper.startsWith(scratch, END_OFFSET)) {
+ // skip
} else if (StringHelper.startsWith(scratch, PAYLOAD)) {
// skip
} else {
@@ -399,10 +424,27 @@ class SimpleTextFieldsReader extends Fie
@Override
public int nextPosition() throws IOException {
- SimpleTextUtil.readLine(in, scratch);
- assert StringHelper.startsWith(scratch, POS): "got line=" + scratch.utf8ToString();
- UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+POS.length, scratch.length-POS.length, scratchUTF16_2);
- final int pos = ArrayUtil.parseInt(scratchUTF16_2.chars, 0, scratchUTF16_2.length);
+ final int pos;
+ if (readPositions) {
+ SimpleTextUtil.readLine(in, scratch);
+ assert StringHelper.startsWith(scratch, POS): "got line=" + scratch.utf8ToString();
+ UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+POS.length, scratch.length-POS.length, scratchUTF16_2);
+ pos = ArrayUtil.parseInt(scratchUTF16_2.chars, 0, scratchUTF16_2.length);
+ } else {
+ pos = -1;
+ }
+
+ if (readOffsets) {
+ SimpleTextUtil.readLine(in, scratch);
+ assert StringHelper.startsWith(scratch, START_OFFSET): "got line=" + scratch.utf8ToString();
+ UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+START_OFFSET.length, scratch.length-START_OFFSET.length, scratchUTF16_2);
+ startOffset = ArrayUtil.parseInt(scratchUTF16_2.chars, 0, scratchUTF16_2.length);
+ SimpleTextUtil.readLine(in, scratch);
+ assert StringHelper.startsWith(scratch, END_OFFSET): "got line=" + scratch.utf8ToString();
+ UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+END_OFFSET.length, scratch.length-END_OFFSET.length, scratchUTF16_2);
+ endOffset = ArrayUtil.parseInt(scratchUTF16_2.chars, 0, scratchUTF16_2.length);
+ }
+
final long fp = in.getFilePointer();
SimpleTextUtil.readLine(in, scratch);
if (StringHelper.startsWith(scratch, PAYLOAD)) {
@@ -421,6 +463,16 @@ class SimpleTextFieldsReader extends Fie
}
@Override
+ public int startOffset() throws IOException {
+ return startOffset;
+ }
+
+ @Override
+ public int endOffset() throws IOException {
+ return endOffset;
+ }
+
+ @Override
public BytesRef getPayload() {
// Some tests rely on only being able to retrieve the
// payload once
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java Sun Jan 15 23:17:45 2012
@@ -35,13 +35,15 @@ class SimpleTextFieldsWriter extends Fie
private final IndexOutput out;
private final BytesRef scratch = new BytesRef(10);
- final static BytesRef END = new BytesRef("END");
- final static BytesRef FIELD = new BytesRef("field ");
- final static BytesRef TERM = new BytesRef(" term ");
- final static BytesRef DOC = new BytesRef(" doc ");
- final static BytesRef FREQ = new BytesRef(" freq ");
- final static BytesRef POS = new BytesRef(" pos ");
- final static BytesRef PAYLOAD = new BytesRef(" payload ");
+ final static BytesRef END = new BytesRef("END");
+ final static BytesRef FIELD = new BytesRef("field ");
+ final static BytesRef TERM = new BytesRef(" term ");
+ final static BytesRef DOC = new BytesRef(" doc ");
+ final static BytesRef FREQ = new BytesRef(" freq ");
+ final static BytesRef POS = new BytesRef(" pos ");
+ final static BytesRef START_OFFSET = new BytesRef(" startOffset ");
+ final static BytesRef END_OFFSET = new BytesRef(" endOffset ");
+ final static BytesRef PAYLOAD = new BytesRef(" payload ");
public SimpleTextFieldsWriter(SegmentWriteState state) throws IOException {
final String fileName = SimpleTextPostingsFormat.getPostingsFileName(state.segmentName, state.segmentSuffix);
@@ -97,10 +99,19 @@ class SimpleTextFieldsWriter extends Fie
private class SimpleTextPostingsWriter extends PostingsConsumer {
private BytesRef term;
private boolean wroteTerm;
- private IndexOptions indexOptions;
+ private final IndexOptions indexOptions;
+ private final boolean writePositions;
+ private final boolean writeOffsets;
+
+ // for assert:
+ private int lastEndOffset = -1;
public SimpleTextPostingsWriter(FieldInfo field) {
this.indexOptions = field.indexOptions;
+ writePositions = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+ writeOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+ //System.out.println("writeOffsets=" + writeOffsets);
+ //System.out.println("writePos=" + writePositions);
}
@Override
@@ -121,10 +132,10 @@ class SimpleTextFieldsWriter extends Fie
write(Integer.toString(termDocFreq));
newline();
}
+
+ lastEndOffset = -1;
}
-
-
public PostingsConsumer reset(BytesRef term) {
this.term = term;
wroteTerm = false;
@@ -132,10 +143,25 @@ class SimpleTextFieldsWriter extends Fie
}
@Override
- public void addPosition(int position, BytesRef payload) throws IOException {
- write(POS);
- write(Integer.toString(position));
- newline();
+ public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException {
+ if (writePositions) {
+ write(POS);
+ write(Integer.toString(position));
+ newline();
+ }
+
+ if (writeOffsets) {
+ assert endOffset >= startOffset;
+ assert startOffset >= lastEndOffset: "startOffset=" + startOffset + " lastEndOffset=" + lastEndOffset;
+ lastEndOffset = endOffset;
+ write(START_OFFSET);
+ write(Integer.toString(startOffset));
+ newline();
+ write(END_OFFSET);
+ write(Integer.toString(endOffset));
+ newline();
+ }
+
if (payload != null && payload.length > 0) {
assert payload.length != 0;
write(PAYLOAD);
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsFormat.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsFormat.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsFormat.java Sun Jan 15 23:17:45 2012
@@ -38,7 +38,7 @@ public class SimpleTextTermVectorsFormat
@Override
public TermVectorsReader vectorsReader(Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext context) throws IOException {
- return new SimpleTextTermVectorsReader(directory, segmentInfo, fieldInfos, context);
+ return new SimpleTextTermVectorsReader(directory, segmentInfo, context);
}
@Override
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java Sun Jan 15 23:17:45 2012
@@ -26,11 +26,9 @@ import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
-import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.IndexFileNames;
@@ -63,7 +61,7 @@ public class SimpleTextTermVectorsReader
private BytesRef scratch = new BytesRef();
private CharsRef scratchUTF16 = new CharsRef();
- public SimpleTextTermVectorsReader(Directory directory, SegmentInfo si, FieldInfos fieldInfos, IOContext context) throws IOException {
+ public SimpleTextTermVectorsReader(Directory directory, SegmentInfo si, IOContext context) throws IOException {
boolean success = false;
try {
in = directory.openInput(IndexFileNames.segmentFileName(si.name, "", VECTORS_EXTENSION), context);
@@ -114,7 +112,8 @@ public class SimpleTextTermVectorsReader
for (int i = 0; i < numFields; i++) {
readLine();
assert StringHelper.startsWith(scratch, FIELD);
- int fieldNumber = parseIntAt(FIELD.length);
+ // skip fieldNumber:
+ parseIntAt(FIELD.length);
readLine();
assert StringHelper.startsWith(scratch, FIELDNAME);
@@ -373,13 +372,16 @@ public class SimpleTextTermVectorsReader
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
+ public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
SimpleTVPostings postings = current.getValue();
if (postings.positions == null && postings.startOffsets == null) {
return null;
}
+ if (needsOffsets && (postings.startOffsets == null || postings.endOffsets == null)) {
+ return null;
+ }
// TODO: reuse
- SimpleTVDocsAndPositionsEnum e = new SimpleTVDocsAndPositionsEnum(postings.startOffsets != null);
+ SimpleTVDocsAndPositionsEnum e = new SimpleTVDocsAndPositionsEnum();
e.reset(liveDocs, postings.positions, postings.startOffsets, postings.endOffsets);
return e;
}
@@ -436,7 +438,6 @@ public class SimpleTextTermVectorsReader
}
private static class SimpleTVDocsAndPositionsEnum extends DocsAndPositionsEnum {
- private final OffsetAttribute offsetAtt;
private boolean didNext;
private int doc = -1;
private int nextPos;
@@ -445,18 +446,6 @@ public class SimpleTextTermVectorsReader
private int[] startOffsets;
private int[] endOffsets;
- public SimpleTVDocsAndPositionsEnum(boolean storeOffsets) {
- if (storeOffsets) {
- offsetAtt = attributes().addAttribute(OffsetAttribute.class);
- } else {
- offsetAtt = null;
- }
- }
-
- public boolean canReuse(boolean storeOffsets) {
- return storeOffsets == (offsetAtt != null);
- }
-
@Override
public int freq() {
if (positions != null) {
@@ -495,7 +484,6 @@ public class SimpleTextTermVectorsReader
this.liveDocs = liveDocs;
this.positions = positions;
this.startOffsets = startOffsets;
- assert (offsetAtt != null) == (startOffsets != null);
this.endOffsets = endOffsets;
this.doc = -1;
didNext = false;
@@ -516,11 +504,6 @@ public class SimpleTextTermVectorsReader
public int nextPosition() {
assert (positions != null && nextPos < positions.length) ||
startOffsets != null && nextPos < startOffsets.length;
-
- if (startOffsets != null) {
- offsetAtt.setOffset(startOffsets[nextPos],
- endOffsets[nextPos]);
- }
if (positions != null) {
return positions[nextPos++];
} else {
@@ -528,5 +511,15 @@ public class SimpleTextTermVectorsReader
return -1;
}
}
+
+ @Override
+ public int startOffset() {
+ return startOffsets[nextPos-1];
+ }
+
+ @Override
+ public int endOffset() {
+ return endOffsets[nextPos-1];
+ }
}
}