You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2012/01/16 00:17:47 UTC
svn commit: r1231794 [2/3] - in /lucene/dev/trunk: lucene/
lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/
lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/
lucene/contrib/memory/src/java/org/apache/l...
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CheckIndex.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CheckIndex.java Sun Jan 15 23:17:45 2012
@@ -289,6 +289,20 @@ public class CheckIndex {
infoStream = null;
}
+ private boolean crossCheckTermVectors;
+
+ /** If true, term vectors are compared against postings to
+ * make sure they are the same. This will likely
+ * drastically increase time it takes to run CheckIndex! */
+ public void setCrossCheckTermVectors(boolean v) {
+ crossCheckTermVectors = v;
+ }
+
+ /** See {@link #setCrossCheckTermVectors}. */
+ public boolean getCrossCheckTermVectors() {
+ return crossCheckTermVectors;
+ }
+
private boolean verbose;
/** Set infoStream where messages should go. If null, no
@@ -563,7 +577,7 @@ public class CheckIndex {
segInfoStat.fieldNormStatus = testFieldNorms(fieldInfos, reader);
// Test the Term Index
- segInfoStat.termIndexStatus = testTermIndex(reader);
+ segInfoStat.termIndexStatus = testPostings(reader);
// Test Stored Fields
segInfoStat.storedFieldStatus = testStoredFields(info, reader, nf);
@@ -678,7 +692,11 @@ public class CheckIndex {
/**
* Test the term index.
*/
- private Status.TermIndexStatus testTermIndex(SegmentReader reader) {
+ private Status.TermIndexStatus testPostings(SegmentReader reader) {
+
+ // TODO: we should go and verify term vectors match, if
+ // crossCheckTermVectors is on...
+
final Status.TermIndexStatus status = new Status.TermIndexStatus();
final int maxDoc = reader.maxDoc();
@@ -760,7 +778,7 @@ public class CheckIndex {
docs = termsEnum.docs(liveDocs, docs, false);
docsAndFreqs = termsEnum.docs(liveDocs, docsAndFreqs, true);
- postings = termsEnum.docsAndPositions(liveDocs, postings);
+ postings = termsEnum.docsAndPositions(liveDocs, postings, false);
if (hasOrd) {
long ord = -1;
@@ -890,7 +908,7 @@ public class CheckIndex {
if (hasPositions) {
for(int idx=0;idx<7;idx++) {
final int skipDocID = (int) (((idx+1)*(long) maxDoc)/8);
- postings = termsEnum.docsAndPositions(liveDocs, postings);
+ postings = termsEnum.docsAndPositions(liveDocs, postings, false);
final int docID = postings.advance(skipDocID);
if (docID == DocsEnum.NO_MORE_DOCS) {
break;
@@ -1256,7 +1274,10 @@ public class CheckIndex {
private Status.TermVectorStatus testTermVectors(SegmentInfo info, SegmentReader reader, NumberFormat format) {
final Status.TermVectorStatus status = new Status.TermVectorStatus();
- TermsEnum termsEnum = null;
+ // TODO: in theory we could test that term vectors have
+ // same terms/pos/offsets as the postings, but it'd be
+ // very slow...
+
try {
if (infoStream != null) {
infoStream.print(" test: term vectors........");
@@ -1264,9 +1285,25 @@ public class CheckIndex {
// TODO: maybe we can factor out testTermIndex and reuse here?
DocsEnum docs = null;
- DocsEnum docsAndFreqs = null;
DocsAndPositionsEnum postings = null;
+
+ // Only used if crossCheckTermVectors is true:
+ DocsEnum postingsDocs = null;
+ DocsAndPositionsEnum postingsPostings = null;
+
final Bits liveDocs = reader.getLiveDocs();
+
+ final Fields postingsFields;
+ // TODO: testTermsIndex
+ if (crossCheckTermVectors) {
+ postingsFields = reader.fields();
+ } else {
+ postingsFields = null;
+ }
+
+ TermsEnum termsEnum = null;
+ TermsEnum postingsTermsEnum = null;
+
for (int j = 0; j < info.docCount; ++j) {
if (liveDocs == null || liveDocs.get(j)) {
status.docCount++;
@@ -1290,6 +1327,16 @@ public class CheckIndex {
Terms terms = tfv.terms(field);
termsEnum = terms.iterator(termsEnum);
+
+ if (crossCheckTermVectors) {
+ Terms postingsTerms = postingsFields.terms(field);
+ if (postingsTerms == null) {
+ throw new RuntimeException("vector field=" + field + " does not exist in postings; doc=" + j);
+ }
+ postingsTermsEnum = postingsTerms.iterator(postingsTermsEnum);
+ } else {
+ postingsTermsEnum = null;
+ }
long tfvComputedTermCountForField = 0;
long tfvComputedSumTotalTermFreq = 0;
@@ -1308,52 +1355,171 @@ public class CheckIndex {
throw new RuntimeException("totalTermFreq: " + totalTermFreq + " is out of bounds");
}
- postings = termsEnum.docsAndPositions(null, postings);
+ final boolean hasPositions;
+ final boolean hasOffsets;
+ final boolean hasFreqs;
+
+ // TODO: really we need a reflection/query
+ // API so we can just ask what was indexed
+ // instead of "probing"...
+
+ // Try offsets:
+ postings = termsEnum.docsAndPositions(null, postings, true);
if (postings == null) {
- docsAndFreqs = termsEnum.docs(null, docsAndFreqs, true);
- if (docsAndFreqs == null) {
- docs = termsEnum.docs(null, docs, false);
+ hasOffsets = false;
+ // Try only positions:
+ postings = termsEnum.docsAndPositions(null, postings, false);
+ if (postings == null) {
+ hasPositions = false;
+ // Try docIDs & freqs:
+ docs = termsEnum.docs(null, docs, true);
+ if (docs == null) {
+ // OK, only docIDs:
+ hasFreqs = false;
+ docs = termsEnum.docs(null, docs, false);
+ } else {
+ hasFreqs = true;
+ }
+ } else {
+ hasPositions = true;
+ hasFreqs = true;
+ }
+ } else {
+ hasOffsets = true;
+ // NOTE: may be a lie... but we accept -1 below
+ hasPositions = true;
+ hasFreqs = true;
+ }
+
+ final DocsEnum docs2;
+ if (hasPositions || hasOffsets) {
+ assert postings != null;
+ docs2 = postings;
+ } else {
+ assert docs != null;
+ docs2 = docs;
+ }
+
+ final DocsEnum postingsDocs2;
+ final boolean postingsHasFreq;
+ if (crossCheckTermVectors) {
+ if (!postingsTermsEnum.seekExact(term, true)) {
+ throw new RuntimeException("vector term=" + term + " field=" + field + " does not exist in postings; doc=" + j);
+ }
+ postingsPostings = postingsTermsEnum.docsAndPositions(null, postingsPostings, true);
+ if (postingsPostings == null) {
+ // Term vectors were indexed w/ offsets but postings were not
+ postingsPostings = postingsTermsEnum.docsAndPositions(null, postingsPostings, false);
+ if (postingsPostings == null) {
+ postingsDocs = postingsTermsEnum.docs(null, postingsDocs, true);
+ if (postingsDocs == null) {
+ postingsHasFreq = false;
+ postingsDocs = postingsTermsEnum.docs(null, postingsDocs, false);
+ if (postingsDocs == null) {
+ throw new RuntimeException("vector term=" + term + " field=" + field + " does not exist in postings; doc=" + j);
+ }
+ } else {
+ postingsHasFreq = true;
+ }
+ } else {
+ postingsHasFreq = true;
+ }
} else {
- docs = docsAndFreqs;
+ postingsHasFreq = true;
+ }
+
+ if (postingsPostings != null) {
+ postingsDocs2 = postingsPostings;
+ } else {
+ postingsDocs2 = postingsDocs;
+ }
+
+ final int advanceDoc = postingsDocs2.advance(j);
+ if (advanceDoc != j) {
+ throw new RuntimeException("vector term=" + term + " field=" + field + ": doc=" + j + " was not found in postings (got: " + advanceDoc + ")");
}
} else {
- docs = docsAndFreqs = postings;
+ postingsDocs2 = null;
+ postingsHasFreq = false;
}
- final int doc = docs.nextDoc();
+ final int doc = docs2.nextDoc();
if (doc != 0) {
throw new RuntimeException("vector for doc " + j + " didn't return docID=0: got docID=" + doc);
}
- if (docsAndFreqs != null) {
- final int tf = docsAndFreqs.freq();
+ if (hasFreqs) {
+ final int tf = docs2.freq();
if (tf <= 0) {
throw new RuntimeException("vector freq " + tf + " is out of bounds");
}
if (totalTermFreq != -1 && totalTermFreq != tf) {
throw new RuntimeException("vector totalTermFreq " + totalTermFreq + " != tf " + tf);
}
+ if (crossCheckTermVectors && postingsHasFreq) {
+ if (postingsDocs2.freq() != tf) {
+ throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": freq=" + tf + " differs from postings freq=" + postingsDocs2.freq());
+ }
+ }
tfvComputedSumTotalTermFreq += tf;
- if (postings != null) {
+ if (hasPositions || hasOffsets) {
int lastPosition = -1;
+ //int lastStartOffset = -1;
for (int i = 0; i < tf; i++) {
int pos = postings.nextPosition();
- if (pos != -1 && pos < 0) {
- throw new RuntimeException("vector position " + pos + " is out of bounds");
- }
+ if (hasPositions) {
+ if (pos != -1 && pos < 0) {
+ throw new RuntimeException("vector position " + pos + " is out of bounds");
+ }
+ if (pos < lastPosition) {
+ throw new RuntimeException("vector position " + pos + " < lastPos " + lastPosition);
+ }
- if (pos < lastPosition) {
- throw new RuntimeException("vector position " + pos + " < lastPos " + lastPosition);
+ lastPosition = pos;
+ }
+
+ if (crossCheckTermVectors && postingsPostings != null) {
+ int postingsPos = postingsPostings.nextPosition();
+ if (pos != -1 && postingsPos != -1 && pos != postingsPos) {
+ throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": pos=" + pos + " differs from postings pos=" + postingsPos);
+ }
+ }
+
+ if (hasOffsets) {
+ // Call the methods to at least make
+ // sure they don't throw exc:
+ final int startOffset = postings.startOffset();
+ final int endOffset = postings.endOffset();
+ // TODO: these are too anal...?
+ /*
+ if (endOffset < startOffset) {
+ throw new RuntimeException("vector startOffset=" + startOffset + " is > endOffset=" + endOffset);
+ }
+ if (startOffset < lastStartOffset) {
+ throw new RuntimeException("vector startOffset=" + startOffset + " is < prior startOffset=" + lastStartOffset);
+ }
+ lastStartOffset = startOffset;
+ */
+
+ if (crossCheckTermVectors && postingsPostings != null) {
+ final int postingsStartOffset = postingsPostings.startOffset();
+
+ final int postingsEndOffset = postingsPostings.endOffset();
+ if (startOffset != -1 && postingsStartOffset != -1 && startOffset != postingsStartOffset) {
+ throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": startOffset=" + startOffset + " differs from postings startOffset=" + postingsStartOffset);
+ }
+ if (endOffset != -1 && postingsEndOffset != -1 && endOffset != postingsEndOffset) {
+ throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": endOffset=" + endOffset + " differs from postings endOffset=" + postingsEndOffset);
+ }
+ }
}
-
- lastPosition = pos;
}
}
}
- if (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
+ if (docs2.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
throw new RuntimeException("vector for doc " + j + " references multiple documents!");
}
}
@@ -1474,6 +1640,7 @@ public class CheckIndex {
public static void main(String[] args) throws IOException, InterruptedException {
boolean doFix = false;
+ boolean doCrossCheckTermVectors = false;
Codec codec = Codec.getDefault(); // only used when fixing
boolean verbose = false;
List<String> onlySegments = new ArrayList<String>();
@@ -1484,6 +1651,8 @@ public class CheckIndex {
String arg = args[i];
if ("-fix".equals(arg)) {
doFix = true;
+ } else if ("-crossCheckTermVectors".equals(arg)) {
+ doCrossCheckTermVectors = true;
} else if ("-codec".equals(arg)) {
if (i == args.length-1) {
System.out.println("ERROR: missing name for -codec option");
@@ -1519,9 +1688,10 @@ public class CheckIndex {
if (indexPath == null) {
System.out.println("\nERROR: index path not specified");
- System.out.println("\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y] [-dir-impl X]\n" +
+ System.out.println("\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-crossCheckTermVectors] [-segment X] [-segment Y] [-dir-impl X]\n" +
"\n" +
" -fix: actually write a new segments_N file, removing any problematic segments\n" +
+ " -crossCheckTermVectors: verifies that term vectors match postings; THIS IS VERY SLOW!\n" +
" -codec X: when fixing, codec to write the new segments_N file with\n" +
" -verbose: print additional details\n" +
" -segment X: only check the specified segments. This can be specified multiple\n" +
@@ -1570,6 +1740,7 @@ public class CheckIndex {
}
CheckIndex checker = new CheckIndex(dir);
+ checker.setCrossCheckTermVectors(doCrossCheckTermVectors);
checker.setInfoStream(System.out, verbose);
Status result = checker.checkIndex(onlySegments);
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java Sun Jan 15 23:17:45 2012
@@ -73,8 +73,9 @@ final class DocInverterPerField extends
// tokenized.
if (field.fieldType().indexed() && doInvert) {
- if (i > 0)
+ if (i > 0) {
fieldState.position += docState.analyzer == null ? 0 : docState.analyzer.getPositionIncrementGap(fieldInfo.name);
+ }
final TokenStream stream = field.tokenStream(docState.analyzer);
// reset the TokenStream to the first token
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocTermOrds.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocTermOrds.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocTermOrds.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocTermOrds.java Sun Jan 15 23:17:45 2012
@@ -655,8 +655,8 @@ public class DocTermOrds {
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
- return termsEnum.docsAndPositions(liveDocs, reuse);
+ public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
+ return termsEnum.docsAndPositions(liveDocs, reuse, needsOffsets);
}
@Override
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java Sun Jan 15 23:17:45 2012
@@ -26,9 +26,20 @@ public abstract class DocsAndPositionsEn
/** Returns the next position. You should only call this
* up to {@link DocsEnum#freq()} times else
- * the behavior is not defined. */
+ * the behavior is not defined. If positions were not
+ * indexed this will return -1; this only happens if
+ * offsets were indexed and you passed needsOffset=true
+ * when pulling the enum. */
public abstract int nextPosition() throws IOException;
+ /** Returns start offset for the current position, or -1
+ * if offsets were not indexed. */
+ public abstract int startOffset() throws IOException;
+
+ /** Returns end offset for the current position, or -1 if
+ * offsets were not indexed. */
+ public abstract int endOffset() throws IOException;
+
/** Returns the payload at this position, or null if no
* payload was indexed. Only call this once per
* position. */
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfo.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfo.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfo.java Sun Jan 15 23:17:45 2012
@@ -38,13 +38,18 @@ public final class FieldInfo {
* @lucene.experimental
*/
public static enum IndexOptions {
+ // NOTE: order is important here; FieldInfo uses this
+ // order to merge two conflicting IndexOptions (always
+ // "downgrades" by picking the lowest).
/** only documents are indexed: term frequencies and positions are omitted */
// TODO: maybe rename to just DOCS?
DOCS_ONLY,
/** only documents and term frequencies are indexed: positions are omitted */
DOCS_AND_FREQS,
- /** full postings: documents, frequencies, and positions */
- DOCS_AND_FREQS_AND_POSITIONS
+ /** documents, frequencies and positions */
+ DOCS_AND_FREQS_AND_POSITIONS,
+ /** documents, frequencies, positions and offsets */
+ DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS,
};
/**
@@ -67,7 +72,7 @@ public final class FieldInfo {
this.omitNorms = false;
this.indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
}
- assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !storePayloads;
+ assert indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !storePayloads;
}
@Override
@@ -95,10 +100,13 @@ public final class FieldInfo {
if (this.indexOptions != indexOptions) {
// downgrade
this.indexOptions = this.indexOptions.compareTo(indexOptions) < 0 ? this.indexOptions : indexOptions;
- this.storePayloads = false;
+ if (this.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
+ // cannot store payloads if we don't store positions:
+ this.storePayloads = false;
+ }
}
}
- assert this.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !this.storePayloads;
+ assert this.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !this.storePayloads;
}
void setDocValuesType(DocValues.Type v) {
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java Sun Jan 15 23:17:45 2012
@@ -185,7 +185,7 @@ public final class FieldInfos implements
}
// mutable FIs must check!
for (FieldInfo fi : this) {
- if (fi.isIndexed && fi.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+ if (fi.isIndexed && fi.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
return true;
}
}
@@ -430,7 +430,7 @@ public final class FieldInfos implements
FieldInfo clone = (FieldInfo) (fieldInfo).clone();
roFis.putInternal(clone);
roFis.hasVectors |= clone.storeTermVector;
- roFis.hasProx |= clone.isIndexed && clone.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
+ roFis.hasProx |= clone.isIndexed && clone.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
roFis.hasFreq |= clone.isIndexed && clone.indexOptions != IndexOptions.DOCS_ONLY;
}
return roFis;
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java Sun Jan 15 23:17:45 2012
@@ -176,8 +176,8 @@ public class FilterIndexReader extends I
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
- return in.docsAndPositions(liveDocs, reuse);
+ public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
+ return in.docsAndPositions(liveDocs, reuse, needsOffsets);
}
@Override
@@ -259,6 +259,16 @@ public class FilterIndexReader extends I
}
@Override
+ public int startOffset() throws IOException {
+ return in.startOffset();
+ }
+
+ @Override
+ public int endOffset() throws IOException {
+ return in.endOffset();
+ }
+
+ @Override
public BytesRef getPayload() throws IOException {
return in.getPayload();
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilteredTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilteredTermsEnum.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilteredTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilteredTermsEnum.java Sun Jan 15 23:17:45 2012
@@ -171,8 +171,8 @@ public abstract class FilteredTermsEnum
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits bits, DocsAndPositionsEnum reuse) throws IOException {
- return tenum.docsAndPositions(bits, reuse);
+ public DocsAndPositionsEnum docsAndPositions(Bits bits, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
+ return tenum.docsAndPositions(bits, reuse, needsOffsets);
}
/** This enum does not support seeking!
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java Sun Jan 15 23:17:45 2012
@@ -83,7 +83,7 @@ final class FreqProxTermsWriter extends
// Aggregate the storePayload as seen by the same
// field across multiple threads
- if (fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+ if (fieldInfo.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
fieldInfo.storePayloads |= fieldWriter.hasPayloads;
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java Sun Jan 15 23:17:45 2012
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.Comparator;
import java.util.Map;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.PostingsConsumer;
@@ -43,7 +44,11 @@ final class FreqProxTermsWriterPerField
final DocumentsWriterPerThread.DocState docState;
final FieldInvertState fieldState;
IndexOptions indexOptions;
+ private boolean writeFreq;
+ private boolean writeProx;
+ private boolean writeOffsets;
PayloadAttribute payloadAttribute;
+ OffsetAttribute offsetAttribute;
public FreqProxTermsWriterPerField(TermsHashPerField termsHashPerField, FreqProxTermsWriter parent, FieldInfo fieldInfo) {
this.termsHashPerField = termsHashPerField;
@@ -51,15 +56,16 @@ final class FreqProxTermsWriterPerField
this.fieldInfo = fieldInfo;
docState = termsHashPerField.docState;
fieldState = termsHashPerField.fieldState;
- indexOptions = fieldInfo.indexOptions;
+ setIndexOptions(fieldInfo.indexOptions);
}
@Override
int getStreamCount() {
- if (fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
+ if (!writeProx) {
return 1;
- else
+ } else {
return 2;
+ }
}
@Override
@@ -74,13 +80,21 @@ final class FreqProxTermsWriterPerField
return fieldInfo.name.compareTo(other.fieldInfo.name);
}
+ // Called after flush
void reset() {
// Record, up front, whether our in-RAM format will be
// with or without term freqs:
- indexOptions = fieldInfo.indexOptions;
+ setIndexOptions(fieldInfo.indexOptions);
payloadAttribute = null;
}
+ private void setIndexOptions(IndexOptions indexOptions) {
+ this.indexOptions = indexOptions;
+ writeFreq = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
+ writeProx = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+ writeOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+ }
+
@Override
boolean start(IndexableField[] fields, int count) {
for(int i=0;i<count;i++) {
@@ -98,9 +112,16 @@ final class FreqProxTermsWriterPerField
} else {
payloadAttribute = null;
}
+ if (writeOffsets) {
+ offsetAttribute = fieldState.attributeSource.addAttribute(OffsetAttribute.class);
+ } else {
+ offsetAttribute = null;
+ }
}
void writeProx(final int termID, int proxCode) {
+ //System.out.println("writeProx termID=" + termID + " proxCode=" + proxCode);
+ assert writeProx;
final Payload payload;
if (payloadAttribute == null) {
payload = null;
@@ -113,12 +134,24 @@ final class FreqProxTermsWriterPerField
termsHashPerField.writeVInt(1, payload.length);
termsHashPerField.writeBytes(1, payload.data, payload.offset, payload.length);
hasPayloads = true;
- } else
+ } else {
termsHashPerField.writeVInt(1, proxCode<<1);
+ }
FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
postings.lastPositions[termID] = fieldState.position;
+ }
+
+ void writeOffsets(final int termID, int prevOffset) {
+ assert writeOffsets;
+ final int startOffset = offsetAttribute.startOffset();
+ final int endOffset = offsetAttribute.endOffset();
+ //System.out.println("writeOffsets termID=" + termID + " prevOffset=" + prevOffset + " startOff=" + startOffset + " endOff=" + endOffset);
+ termsHashPerField.writeVInt(1, startOffset - prevOffset);
+ termsHashPerField.writeVInt(1, endOffset - startOffset);
+ FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
+ postings.lastOffsets[termID] = startOffset;
}
@Override
@@ -129,13 +162,18 @@ final class FreqProxTermsWriterPerField
FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
postings.lastDocIDs[termID] = docState.docID;
- if (indexOptions == IndexOptions.DOCS_ONLY) {
+ if (!writeFreq) {
postings.lastDocCodes[termID] = docState.docID;
} else {
postings.lastDocCodes[termID] = docState.docID << 1;
postings.docFreqs[termID] = 1;
- if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+ if (writeProx) {
writeProx(termID, fieldState.position);
+ if (writeOffsets) {
+ writeOffsets(termID, fieldState.offset);
+ }
+ } else {
+ assert !writeOffsets;
}
}
fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
@@ -149,9 +187,10 @@ final class FreqProxTermsWriterPerField
FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
- assert indexOptions == IndexOptions.DOCS_ONLY || postings.docFreqs[termID] > 0;
+ assert !writeFreq || postings.docFreqs[termID] > 0;
- if (indexOptions == IndexOptions.DOCS_ONLY) {
+ if (!writeFreq) {
+ assert postings.docFreqs == null;
if (docState.docID != postings.lastDocIDs[termID]) {
assert docState.docID > postings.lastDocIDs[termID];
termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]);
@@ -159,59 +198,76 @@ final class FreqProxTermsWriterPerField
postings.lastDocIDs[termID] = docState.docID;
fieldState.uniqueTermCount++;
}
- } else {
- if (docState.docID != postings.lastDocIDs[termID]) {
- assert docState.docID > postings.lastDocIDs[termID]:"id: "+docState.docID + " postings ID: "+ postings.lastDocIDs[termID] + " termID: "+termID;
- // Term not yet seen in the current doc but previously
- // seen in other doc(s) since the last flush
-
- // Now that we know doc freq for previous doc,
- // write it & lastDocCode
- if (1 == postings.docFreqs[termID])
- termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]|1);
- else {
- termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]);
- termsHashPerField.writeVInt(0, postings.docFreqs[termID]);
- }
- postings.docFreqs[termID] = 1;
- fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
- postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1;
- postings.lastDocIDs[termID] = docState.docID;
- if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
- writeProx(termID, fieldState.position);
- }
- fieldState.uniqueTermCount++;
+ } else if (docState.docID != postings.lastDocIDs[termID]) {
+ assert docState.docID > postings.lastDocIDs[termID]:"id: "+docState.docID + " postings ID: "+ postings.lastDocIDs[termID] + " termID: "+termID;
+ // Term not yet seen in the current doc but previously
+ // seen in other doc(s) since the last flush
+
+ // Now that we know doc freq for previous doc,
+ // write it & lastDocCode
+ if (1 == postings.docFreqs[termID]) {
+ termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]|1);
} else {
- fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, ++postings.docFreqs[termID]);
- if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
- writeProx(termID, fieldState.position-postings.lastPositions[termID]);
+ termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]);
+ termsHashPerField.writeVInt(0, postings.docFreqs[termID]);
+ }
+ postings.docFreqs[termID] = 1;
+ fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
+ postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1;
+ postings.lastDocIDs[termID] = docState.docID;
+ if (writeProx) {
+ writeProx(termID, fieldState.position);
+ if (writeOffsets) {
+ writeOffsets(termID, fieldState.offset);
}
+ } else {
+ assert !writeOffsets;
+ }
+ fieldState.uniqueTermCount++;
+ } else {
+ fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, ++postings.docFreqs[termID]);
+ if (writeProx) {
+ writeProx(termID, fieldState.position-postings.lastPositions[termID]);
+ }
+ if (writeOffsets) {
+ writeOffsets(termID, postings.lastOffsets[termID]);
}
}
}
@Override
ParallelPostingsArray createPostingsArray(int size) {
- return new FreqProxPostingsArray(size);
+ return new FreqProxPostingsArray(size, writeFreq, writeProx, writeOffsets);
}
static final class FreqProxPostingsArray extends ParallelPostingsArray {
- public FreqProxPostingsArray(int size) {
+ public FreqProxPostingsArray(int size, boolean writeFreqs, boolean writeProx, boolean writeOffsets) {
super(size);
- docFreqs = new int[size];
+ if (writeFreqs) {
+ docFreqs = new int[size];
+ }
lastDocIDs = new int[size];
lastDocCodes = new int[size];
- lastPositions = new int[size];
+ if (writeProx) {
+ lastPositions = new int[size];
+ if (writeOffsets) {
+ lastOffsets = new int[size];
+ }
+ } else {
+ assert !writeOffsets;
+ }
+ //System.out.println("PA init freqs=" + writeFreqs + " pos=" + writeProx + " offs=" + writeOffsets);
}
int docFreqs[]; // # times this term occurs in the current doc
int lastDocIDs[]; // Last docID where this term occurred
int lastDocCodes[]; // Code for prior doc
int lastPositions[]; // Last position where this term occurred
+ int lastOffsets[]; // Last endOffset where this term occurred
@Override
ParallelPostingsArray newInstance(int size) {
- return new FreqProxPostingsArray(size);
+ return new FreqProxPostingsArray(size, docFreqs != null, lastPositions != null, lastOffsets != null);
}
@Override
@@ -221,15 +277,36 @@ final class FreqProxTermsWriterPerField
super.copyTo(toArray, numToCopy);
- System.arraycopy(docFreqs, 0, to.docFreqs, 0, numToCopy);
System.arraycopy(lastDocIDs, 0, to.lastDocIDs, 0, numToCopy);
System.arraycopy(lastDocCodes, 0, to.lastDocCodes, 0, numToCopy);
- System.arraycopy(lastPositions, 0, to.lastPositions, 0, numToCopy);
+ if (lastPositions != null) {
+ assert to.lastPositions != null;
+ System.arraycopy(lastPositions, 0, to.lastPositions, 0, numToCopy);
+ }
+ if (lastOffsets != null) {
+ assert to.lastOffsets != null;
+ System.arraycopy(lastOffsets, 0, to.lastOffsets, 0, numToCopy);
+ }
+ if (docFreqs != null) {
+ assert to.docFreqs != null;
+ System.arraycopy(docFreqs, 0, to.docFreqs, 0, numToCopy);
+ }
}
@Override
int bytesPerPosting() {
- return ParallelPostingsArray.BYTES_PER_POSTING + 4 * RamUsageEstimator.NUM_BYTES_INT;
+ int bytes = ParallelPostingsArray.BYTES_PER_POSTING + 2 * RamUsageEstimator.NUM_BYTES_INT;
+ if (lastPositions != null) {
+ bytes += RamUsageEstimator.NUM_BYTES_INT;
+ }
+ if (lastOffsets != null) {
+ bytes += RamUsageEstimator.NUM_BYTES_INT;
+ }
+ if (docFreqs != null) {
+ bytes += RamUsageEstimator.NUM_BYTES_INT;
+ }
+
+ return bytes;
}
}
@@ -246,8 +323,33 @@ final class FreqProxTermsWriterPerField
final TermsConsumer termsConsumer = consumer.addField(fieldInfo);
final Comparator<BytesRef> termComp = termsConsumer.getComparator();
+ // CONFUSING: this.indexOptions holds the index options
+ // that were current when we first saw this field. But
+ // it's possible this has changed, eg when other
+ // documents are indexed that cause a "downgrade" of the
+ // IndexOptions. So we must decode the in-RAM buffer
+ // according to this.indexOptions, but then write the
+ // new segment to the directory according to
+ // currentFieldIndexOptions:
final IndexOptions currentFieldIndexOptions = fieldInfo.indexOptions;
+ final boolean writeTermFreq = currentFieldIndexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
+ final boolean writePositions = currentFieldIndexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+ final boolean writeOffsets = currentFieldIndexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+
+ final boolean readTermFreq = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
+ final boolean readPositions = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+ final boolean readOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+
+ //System.out.println("flush readTF=" + readTermFreq + " readPos=" + readPositions + " readOffs=" + readOffsets);
+
+ // Make sure FieldInfo.update is working correctly!:
+ assert !writeTermFreq || readTermFreq;
+ assert !writePositions || readPositions;
+ assert !writeOffsets || readOffsets;
+
+ assert !writeOffsets || writePositions;
+
final Map<Term,Integer> segDeletes;
if (state.segDeletes != null && state.segDeletes.terms.size() > 0) {
segDeletes = state.segDeletes.terms;
@@ -268,12 +370,13 @@ final class FreqProxTermsWriterPerField
for (int i = 0; i < numTerms; i++) {
final int termID = termIDs[i];
+ //System.out.println("term=" + termID);
// Get BytesRef
final int textStart = postings.textStarts[termID];
termsHashPerField.bytePool.setBytesRef(text, textStart);
termsHashPerField.initReader(freq, termID, 0);
- if (fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+ if (readPositions || readOffsets) {
termsHashPerField.initReader(prox, termID, 1);
}
@@ -303,15 +406,18 @@ final class FreqProxTermsWriterPerField
int numDocs = 0;
long totTF = 0;
int docID = 0;
- int termFreq = 0;
while(true) {
+ //System.out.println(" cycle");
+ final int termDocFreq;
if (freq.eof()) {
if (postings.lastDocCodes[termID] != -1) {
// Return last doc
docID = postings.lastDocIDs[termID];
- if (indexOptions != IndexOptions.DOCS_ONLY) {
- termFreq = postings.docFreqs[termID];
+ if (readTermFreq) {
+ termDocFreq = postings.docFreqs[termID];
+ } else {
+ termDocFreq = 0;
}
postings.lastDocCodes[termID] = -1;
} else {
@@ -320,14 +426,15 @@ final class FreqProxTermsWriterPerField
}
} else {
final int code = freq.readVInt();
- if (indexOptions == IndexOptions.DOCS_ONLY) {
+ if (!readTermFreq) {
docID += code;
+ termDocFreq = 0;
} else {
docID += code >>> 1;
if ((code & 1) != 0) {
- termFreq = 1;
+ termDocFreq = 1;
} else {
- termFreq = freq.readVInt();
+ termDocFreq = freq.readVInt();
}
}
@@ -336,7 +443,6 @@ final class FreqProxTermsWriterPerField
numDocs++;
assert docID < state.numDocs: "doc=" + docID + " maxDoc=" + state.numDocs;
- final int termDocFreq = termFreq;
// NOTE: we could check here if the docID was
// deleted, and skip it. However, this is somewhat
@@ -362,45 +468,54 @@ final class FreqProxTermsWriterPerField
state.liveDocs.clear(docID);
}
- if (currentFieldIndexOptions != IndexOptions.DOCS_ONLY) {
- totTF += termDocFreq;
- }
+ totTF += termDocFreq;
// Carefully copy over the prox + payload info,
// changing the format to match Lucene's segment
// format.
- if (currentFieldIndexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
- // we do write positions & payload
+ if (readPositions || readOffsets) {
+ // we did record positions (& maybe payload) and/or offsets
int position = 0;
+ int offset = 0;
for(int j=0;j<termDocFreq;j++) {
- final int code = prox.readVInt();
- position += code >> 1;
-
- final int payloadLength;
final BytesRef thisPayload;
- if ((code & 1) != 0) {
- // This position has a payload
- payloadLength = prox.readVInt();
+ if (readPositions) {
+ final int code = prox.readVInt();
+ position += code >> 1;
+
+ if ((code & 1) != 0) {
+
+ // This position has a payload
+ final int payloadLength = prox.readVInt();
+
+ if (payload == null) {
+ payload = new BytesRef();
+ payload.bytes = new byte[payloadLength];
+ } else if (payload.bytes.length < payloadLength) {
+ payload.grow(payloadLength);
+ }
+
+ prox.readBytes(payload.bytes, 0, payloadLength);
+ payload.length = payloadLength;
+ thisPayload = payload;
- if (payload == null) {
- payload = new BytesRef();
- payload.bytes = new byte[payloadLength];
- } else if (payload.bytes.length < payloadLength) {
- payload.grow(payloadLength);
+ } else {
+ thisPayload = null;
}
- prox.readBytes(payload.bytes, 0, payloadLength);
- payload.length = payloadLength;
- thisPayload = payload;
-
- } else {
- payloadLength = 0;
- thisPayload = null;
+ if (readOffsets) {
+ final int startOffset = offset + prox.readVInt();
+ final int endOffset = startOffset + prox.readVInt();
+ offset = startOffset;
+ if (writePositions) {
+ postingsConsumer.addPosition(position, thisPayload, startOffset, endOffset);
+ }
+ } else if (writePositions) {
+ postingsConsumer.addPosition(position, thisPayload, -1, -1);
+ }
}
-
- postingsConsumer.addPosition(position, thisPayload);
}
postingsConsumer.finishDoc();
@@ -413,6 +528,4 @@ final class FreqProxTermsWriterPerField
termsConsumer.finish(sumTotalTermFreq, sumDocFreq, visitedDocs.cardinality());
}
-
}
-
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexReader.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexReader.java Sun Jan 15 23:17:45 2012
@@ -788,9 +788,9 @@ public abstract class IndexReader implem
/** Returns {@link DocsAndPositionsEnum} for the specified
* field & term. This may return null, if either the
- * field or term does not exist, or, positions were not
- * indexed for this field. */
- public final DocsAndPositionsEnum termPositionsEnum(Bits liveDocs, String field, BytesRef term) throws IOException {
+ * field or term does not exist, or needsOffsets is
+ * true but offsets were not indexed for this field. */
+ public final DocsAndPositionsEnum termPositionsEnum(Bits liveDocs, String field, BytesRef term, boolean needsOffsets) throws IOException {
assert field != null;
assert term != null;
final Fields fields = fields();
@@ -799,7 +799,7 @@ public abstract class IndexReader implem
if (terms != null) {
final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(term, true)) {
- return termsEnum.docsAndPositions(liveDocs, null);
+ return termsEnum.docsAndPositions(liveDocs, null, needsOffsets);
}
}
}
@@ -830,8 +830,9 @@ public abstract class IndexReader implem
* Returns {@link DocsAndPositionsEnum} for the specified field and
* {@link TermState}. This may return null, if either the field or the term
* does not exists, the {@link TermState} is invalid for the underlying
- * implementation, or positions were not indexed for this field. */
- public final DocsAndPositionsEnum termPositionsEnum(Bits liveDocs, String field, BytesRef term, TermState state) throws IOException {
+ * implementation, or needsOffsets is true but offsets
+ * were not indexed for this field. */
+ public final DocsAndPositionsEnum termPositionsEnum(Bits liveDocs, String field, BytesRef term, TermState state, boolean needsOffsets) throws IOException {
assert state != null;
assert field != null;
final Fields fields = fields();
@@ -840,7 +841,7 @@ public abstract class IndexReader implem
if (terms != null) {
final TermsEnum termsEnum = terms.iterator(null);
termsEnum.seekExact(term, state);
- return termsEnum.docsAndPositions(liveDocs, null);
+ return termsEnum.docsAndPositions(liveDocs, null, needsOffsets);
}
}
return null;
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java Sun Jan 15 23:17:45 2012
@@ -126,6 +126,16 @@ public final class MultiDocsAndPositions
}
@Override
+ public int startOffset() throws IOException {
+ return current.startOffset();
+ }
+
+ @Override
+ public int endOffset() throws IOException {
+ return current.endOffset();
+ }
+
+ @Override
public boolean hasPayload() {
return current.hasPayload();
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiFields.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiFields.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiFields.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiFields.java Sun Jan 15 23:17:45 2012
@@ -167,14 +167,14 @@ public final class MultiFields extends F
/** Returns {@link DocsAndPositionsEnum} for the specified
* field & term. This may return null if the term does
* not exist or positions were not indexed. */
- public static DocsAndPositionsEnum getTermPositionsEnum(IndexReader r, Bits liveDocs, String field, BytesRef term) throws IOException {
+ public static DocsAndPositionsEnum getTermPositionsEnum(IndexReader r, Bits liveDocs, String field, BytesRef term, boolean needsOffsets) throws IOException {
assert field != null;
assert term != null;
final Terms terms = getTerms(r, field);
if (terms != null) {
final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(term, true)) {
- return termsEnum.docsAndPositions(liveDocs, null);
+ return termsEnum.docsAndPositions(liveDocs, null, needsOffsets);
}
}
return null;
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java Sun Jan 15 23:17:45 2012
@@ -418,7 +418,7 @@ public final class MultiTermsEnum extend
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
+ public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
MultiDocsAndPositionsEnum docsAndPositionsEnum;
// Can only reuse if incoming enum is also a MultiDocsAndPositionsEnum
if (reuse != null && reuse instanceof MultiDocsAndPositionsEnum) {
@@ -469,7 +469,7 @@ public final class MultiTermsEnum extend
}
assert entry.index < docsAndPositionsEnum.subDocsAndPositionsEnum.length: entry.index + " vs " + docsAndPositionsEnum.subDocsAndPositionsEnum.length + "; " + subs.length;
- final DocsAndPositionsEnum subPostings = entry.terms.docsAndPositions(b, docsAndPositionsEnum.subDocsAndPositionsEnum[entry.index]);
+ final DocsAndPositionsEnum subPostings = entry.terms.docsAndPositions(b, docsAndPositionsEnum.subDocsAndPositionsEnum[entry.index], needsOffsets);
if (subPostings != null) {
docsAndPositionsEnum.subDocsAndPositionsEnum[entry.index] = subPostings;
@@ -479,8 +479,8 @@ public final class MultiTermsEnum extend
} else {
if (entry.terms.docs(b, null, false) != null) {
// At least one of our subs does not store
- // positions -- we can't correctly produce a
- // MultiDocsAndPositions enum
+ // offsets or positions -- we can't correctly
+ // produce a MultiDocsAndPositions enum
return null;
}
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/Term.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/Term.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/Term.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/Term.java Sun Jan 15 23:17:45 2012
@@ -44,7 +44,7 @@ public final class Term implements Compa
field = fld;
this.bytes = bytes;
}
-
+
/** Constructs a Term with the given field and text.
* <p>Note that a null field or null text value results in undefined
* behavior for most Lucene APIs that accept a Term parameter. */
@@ -132,4 +132,8 @@ public final class Term implements Compa
@Override
public final String toString() { return field + ":" + bytes.utf8ToString(); }
+
+ public Term deepCopyOf() {
+ return new Term(field, BytesRef.deepCopyOf(bytes));
+ }
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java Sun Jan 15 23:17:45 2012
@@ -38,7 +38,7 @@ final class TermVectorsConsumerPerField
boolean doVectorOffsets;
int maxNumPostings;
- OffsetAttribute offsetAttribute = null;
+ OffsetAttribute offsetAttribute;
public TermVectorsConsumerPerField(TermsHashPerField termsHashPerField, TermVectorsConsumer termsWriter, FieldInfo fieldInfo) {
this.termsHashPerField = termsHashPerField;
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermsEnum.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermsEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermsEnum.java Sun Jan 15 23:17:45 2012
@@ -160,12 +160,13 @@ public abstract class TermsEnum {
/** Get {@link DocsAndPositionsEnum} for the current term.
* Do not call this when the enum is unpositioned.
- * This method will only return null if positions were
- * not indexed into the postings by this codec.
+ * This method will only return null if needsOffsets is
+ * true but offsets were not indexed.
* @param liveDocs unset bits are documents that should not
* be returned
- * @param reuse pass a prior DocsAndPositionsEnum for possible reuse */
- public abstract DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException;
+ * @param reuse pass a prior DocsAndPositionsEnum for possible reuse
+ * @param needsOffsets true if offsets are required */
+ public abstract DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException;
/**
* Expert: Returns the TermsEnums internal state to position the TermsEnum
@@ -238,7 +239,7 @@ public abstract class TermsEnum {
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) {
+ public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) {
throw new IllegalStateException("this method should never be called");
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermsHashPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermsHashPerField.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermsHashPerField.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermsHashPerField.java Sun Jan 15 23:17:45 2012
@@ -293,7 +293,7 @@ final class TermsHashPerField extends In
@Override
public int[] init() {
- if(perField.postingsArray == null) {
+ if (perField.postingsArray == null) {
perField.postingsArray = perField.consumer.createPostingsArray(2);
bytesUsed.addAndGet(perField.postingsArray.size * perField.postingsArray.bytesPerPosting());
}
@@ -305,8 +305,7 @@ final class TermsHashPerField extends In
ParallelPostingsArray postingsArray = perField.postingsArray;
final int oldSize = perField.postingsArray.size;
postingsArray = perField.postingsArray = postingsArray.grow();
- bytesUsed
- .addAndGet((postingsArray.bytesPerPosting() * (postingsArray.size - oldSize)));
+ bytesUsed.addAndGet((postingsArray.bytesPerPosting() * (postingsArray.size - oldSize)));
return postingsArray.textStarts;
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java Sun Jan 15 23:17:45 2012
@@ -1043,7 +1043,7 @@ class FieldCacheImpl implements FieldCac
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
+ public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
throw new UnsupportedOperationException();
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java Sun Jan 15 23:17:45 2012
@@ -272,8 +272,8 @@ public final class FuzzyTermsEnum extend
@Override
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs,
- DocsAndPositionsEnum reuse) throws IOException {
- return actualEnum.docsAndPositions(liveDocs, reuse);
+ DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
+ return actualEnum.docsAndPositions(liveDocs, reuse, needsOffsets);
}
@Override
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java Sun Jan 15 23:17:45 2012
@@ -225,7 +225,7 @@ public class MultiPhraseQuery extends Qu
return null;
}
termsEnum.seekExact(term.bytes(), termState);
- postingsEnum = termsEnum.docsAndPositions(liveDocs, null);
+ postingsEnum = termsEnum.docsAndPositions(liveDocs, null, false);
if (postingsEnum == null) {
// term does exist, but has no positions
@@ -475,7 +475,7 @@ class UnionDocsAndPositionsEnum extends
continue;
}
termsEnum.seekExact(term.bytes(), termState);
- DocsAndPositionsEnum postings = termsEnum.docsAndPositions(liveDocs, null);
+ DocsAndPositionsEnum postings = termsEnum.docsAndPositions(liveDocs, null, false);
if (postings == null) {
// term does exist, but has no positions
throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")");
@@ -528,6 +528,16 @@ class UnionDocsAndPositionsEnum extends
}
@Override
+ public int startOffset() {
+ return -1;
+ }
+
+ @Override
+ public int endOffset() {
+ return -1;
+ }
+
+ @Override
public BytesRef getPayload() {
throw new UnsupportedOperationException();
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PhraseQuery.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PhraseQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PhraseQuery.java Sun Jan 15 23:17:45 2012
@@ -239,7 +239,7 @@ public class PhraseQuery extends Query {
return null;
}
te.seekExact(t.bytes(), state);
- DocsAndPositionsEnum postingsEnum = te.docsAndPositions(liveDocs, null);
+ DocsAndPositionsEnum postingsEnum = te.docsAndPositions(liveDocs, null, false);
// PhraseQuery on a field that did not index
// positions.
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java Sun Jan 15 23:17:45 2012
@@ -120,7 +120,7 @@ public class SpanTermQuery extends SpanQ
final TermsEnum termsEnum = context.reader.terms(term.field()).iterator(null);
termsEnum.seekExact(term.bytes(), state);
- final DocsAndPositionsEnum postings = termsEnum.docsAndPositions(acceptDocs, null);
+ final DocsAndPositionsEnum postings = termsEnum.docsAndPositions(acceptDocs, null, false);
if (postings != null) {
return new TermSpans(postings, term);
Added: lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/analysis/CannedAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/analysis/CannedAnalyzer.java?rev=1231794&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/analysis/CannedAnalyzer.java (added)
+++ lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/analysis/CannedAnalyzer.java Sun Jan 15 23:17:45 2012
@@ -0,0 +1,73 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.IOException;
+
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+
+public class CannedAnalyzer extends Analyzer {
+ private final Token[] tokens;
+
+ public CannedAnalyzer(Token[] tokens) {
+ this.tokens = tokens;
+ }
+
+ @Override
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ return new TokenStreamComponents(new CannedTokenizer(tokens));
+ }
+
+ public static class CannedTokenizer extends Tokenizer {
+ private final Token[] tokens;
+ private int upto = 0;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+
+ public CannedTokenizer(Token[] tokens) {
+ this.tokens = tokens;
+ }
+
+ @Override
+ public final boolean incrementToken() throws IOException {
+ if (upto < tokens.length) {
+ final Token token = tokens[upto++];
+ // TODO: can we just capture/restoreState so
+ // we get all attrs...?
+ clearAttributes();
+ termAtt.setEmpty();
+ termAtt.append(token.toString());
+ posIncrAtt.setPositionIncrement(token.getPositionIncrement());
+ offsetAtt.setOffset(token.startOffset(), token.endOffset());
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ @Override
+ public void reset() throws IOException {
+ super.reset();
+ this.upto = 0;
+ }
+ }
+}
Modified: lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexFieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexFieldsWriter.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexFieldsWriter.java (original)
+++ lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexFieldsWriter.java Sun Jan 15 23:17:45 2012
@@ -88,6 +88,9 @@ class PreFlexFieldsWriter extends Fields
@Override
public TermsConsumer addField(FieldInfo field) throws IOException {
assert field.number != -1;
+ if (field.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) {
+ throw new IllegalArgumentException("this codec cannot index offsets");
+ }
//System.out.println("w field=" + field.name + " storePayload=" + field.storePayloads + " number=" + field.number);
return new PreFlexTermsWriter(field);
}
@@ -157,8 +160,10 @@ class PreFlexFieldsWriter extends Fields
}
@Override
- public void addPosition(int position, BytesRef payload) throws IOException {
+ public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException {
assert proxOut != null;
+ assert startOffset == -1;
+ assert endOffset == -1;
//System.out.println(" w pos=" + position + " payl=" + payload);
final int delta = position - lastPosition;
Modified: lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java (original)
+++ lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java Sun Jan 15 23:17:45 2012
@@ -37,6 +37,7 @@ import org.apache.lucene.codecs.TermStat
import org.apache.lucene.codecs.TermsConsumer;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.IndexFileNames;
@@ -197,6 +198,9 @@ public class RAMOnlyPostingsFormat exten
@Override
public TermsConsumer addField(FieldInfo field) {
+ if (field.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) {
+ throw new IllegalArgumentException("this codec cannot index offsets");
+ }
RAMField ramField = new RAMField(field.name);
postings.fieldToTerms.put(field.name, ramField);
termsConsumer.reset(ramField);
@@ -265,7 +269,9 @@ public class RAMOnlyPostingsFormat exten
}
@Override
- public void addPosition(int position, BytesRef payload) {
+ public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) {
+ assert startOffset == -1;
+ assert endOffset == -1;
current.positions[posUpto] = position;
if (payload != null && payload.length > 0) {
if (current.payloads == null) {
@@ -388,7 +394,10 @@ public class RAMOnlyPostingsFormat exten
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) {
+ public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) {
+ if (needsOffsets) {
+ return null;
+ }
return new RAMDocsAndPositionsEnum(ramField.termToDocs.get(current), liveDocs);
}
}
@@ -494,6 +503,16 @@ public class RAMOnlyPostingsFormat exten
}
@Override
+ public int startOffset() {
+ return -1;
+ }
+
+ @Override
+ public int endOffset() {
+ return -1;
+ }
+
+ @Override
public boolean hasPayload() {
return current.payloads != null && current.payloads[posUpto-1] != null;
}
Modified: lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/index/RandomIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/index/RandomIndexWriter.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/index/RandomIndexWriter.java (original)
+++ lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/index/RandomIndexWriter.java Sun Jan 15 23:17:45 2012
@@ -122,6 +122,10 @@ public class RandomIndexWriter implement
* @see IndexWriter#addDocument(Iterable)
*/
public <T extends IndexableField> void addDocument(final Iterable<T> doc) throws IOException {
+ addDocument(doc, w.getAnalyzer());
+ }
+
+ public <T extends IndexableField> void addDocument(final Iterable<T> doc, Analyzer a) throws IOException {
if (doDocValues && doc instanceof Document) {
randomPerDocFieldValues(r, (Document) doc);
}
@@ -157,9 +161,9 @@ public class RandomIndexWriter implement
}
};
}
- });
+ }, a);
} else {
- w.addDocument(doc);
+ w.addDocument(doc, a);
}
maybeCommit();
Modified: lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/util/LuceneTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/util/LuceneTestCase.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/util/LuceneTestCase.java (original)
+++ lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/util/LuceneTestCase.java Sun Jan 15 23:17:45 2012
@@ -1106,6 +1106,10 @@ public abstract class LuceneTestCase ext
return new Field(name, value, type);
}
+ // TODO: once all core & test codecs can index
+ // offsets, sometimes randomly turn on offsets if we are
+ // already indexing positions...
+
FieldType newType = new FieldType(type);
if (!newType.stored() && random.nextBoolean()) {
newType.setStored(true); // randomly store it
Modified: lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/util/_TestUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/util/_TestUtil.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/util/_TestUtil.java (original)
+++ lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/util/_TestUtil.java Sun Jan 15 23:17:45 2012
@@ -157,6 +157,7 @@ public class _TestUtil {
public static CheckIndex.Status checkIndex(Directory dir) throws IOException {
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
CheckIndex checker = new CheckIndex(dir);
+ checker.setCrossCheckTermVectors(true);
checker.setInfoStream(new PrintStream(bos), false);
CheckIndex.Status indexStatus = checker.checkIndex(null);
if (indexStatus == null || indexStatus.clean == false) {
@@ -567,7 +568,10 @@ public class _TestUtil {
if (random.nextBoolean()) {
if (random.nextBoolean()) {
// TODO: cast re-use to D&PE if we can...?
- final DocsAndPositionsEnum docsAndPositions = termsEnum.docsAndPositions(liveDocs, null);
+ DocsAndPositionsEnum docsAndPositions = termsEnum.docsAndPositions(liveDocs, null, true);
+ if (docsAndPositions == null) {
+ docsAndPositions = termsEnum.docsAndPositions(liveDocs, null, false);
+ }
if (docsAndPositions != null) {
return docsAndPositions;
}
@@ -586,7 +590,10 @@ public class _TestUtil {
if (random.nextBoolean()) {
if (random.nextBoolean()) {
// TODO: cast re-use to D&PE if we can...?
- final DocsAndPositionsEnum docsAndPositions = termsEnum.docsAndPositions(liveDocs, null);
+ DocsAndPositionsEnum docsAndPositions = termsEnum.docsAndPositions(liveDocs, null, true);
+ if (docsAndPositions == null) {
+ docsAndPositions = termsEnum.docsAndPositions(liveDocs, null, false);
+ }
if (docsAndPositions != null) {
return docsAndPositions;
}
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java Sun Jan 15 23:17:45 2012
@@ -74,7 +74,8 @@ public class TestCachingTokenFilter exte
DocsAndPositionsEnum termPositions = MultiFields.getTermPositionsEnum(reader,
MultiFields.getLiveDocs(reader),
"preanalyzed",
- new BytesRef("term1"));
+ new BytesRef("term1"),
+ false);
assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS);
assertEquals(1, termPositions.freq());
assertEquals(0, termPositions.nextPosition());
@@ -82,7 +83,8 @@ public class TestCachingTokenFilter exte
termPositions = MultiFields.getTermPositionsEnum(reader,
MultiFields.getLiveDocs(reader),
"preanalyzed",
- new BytesRef("term2"));
+ new BytesRef("term2"),
+ false);
assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS);
assertEquals(2, termPositions.freq());
assertEquals(1, termPositions.nextPosition());
@@ -91,7 +93,8 @@ public class TestCachingTokenFilter exte
termPositions = MultiFields.getTermPositionsEnum(reader,
MultiFields.getLiveDocs(reader),
"preanalyzed",
- new BytesRef("term3"));
+ new BytesRef("term3"),
+ false);
assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS);
assertEquals(1, termPositions.freq());
assertEquals(2, termPositions.nextPosition());
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/codecs/pulsing/TestPulsingReuse.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/codecs/pulsing/TestPulsingReuse.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/codecs/pulsing/TestPulsingReuse.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/codecs/pulsing/TestPulsingReuse.java Sun Jan 15 23:17:45 2012
@@ -23,7 +23,6 @@ import java.util.Map;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.nestedpulsing.NestedPulsingPostingsFormat;
-import org.apache.lucene.codecs.pulsing.Pulsing40PostingsFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
@@ -70,7 +69,7 @@ public class TestPulsingReuse extends Lu
DocsAndPositionsEnum posReuse = null;
te = segment.terms("foo").iterator(null);
while (te.next() != null) {
- posReuse = te.docsAndPositions(null, posReuse);
+ posReuse = te.docsAndPositions(null, posReuse, false);
allEnums.put(posReuse, true);
}
@@ -112,7 +111,7 @@ public class TestPulsingReuse extends Lu
DocsAndPositionsEnum posReuse = null;
te = segment.terms("foo").iterator(null);
while (te.next() != null) {
- posReuse = te.docsAndPositions(null, posReuse);
+ posReuse = te.docsAndPositions(null, posReuse, false);
allEnums.put(posReuse, true);
}
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/document/TestDocument.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/document/TestDocument.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/document/TestDocument.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/document/TestDocument.java Sun Jan 15 23:17:45 2012
@@ -347,7 +347,7 @@ public class TestDocument extends Lucene
assertEquals(2, tvs.getUniqueTermCount());
TermsEnum tvsEnum = tvs.iterator(null);
assertEquals(new BytesRef("abc"), tvsEnum.next());
- final DocsAndPositionsEnum dpEnum = tvsEnum.docsAndPositions(null, null);
+ final DocsAndPositionsEnum dpEnum = tvsEnum.docsAndPositions(null, null, false);
if (field.equals("tv")) {
assertNull(dpEnum);
} else {
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestCodecs.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestCodecs.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestCodecs.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestCodecs.java Sun Jan 15 23:17:45 2012
@@ -166,7 +166,7 @@ public class TestCodecs extends LuceneTe
totTF += positions[i].length;
for(int j=0;j<positions[i].length;j++) {
final PositionData pos = positions[i][j];
- postingsConsumer.addPosition(pos.pos, pos.payload);
+ postingsConsumer.addPosition(pos.pos, pos.payload, -1, -1);
}
postingsConsumer.finishDoc();
}
@@ -480,7 +480,7 @@ public class TestCodecs extends LuceneTe
if (field.omitTF) {
this.verifyDocs(term.docs, term.positions, _TestUtil.docs(random, termsEnum, null, null, false), false);
} else {
- this.verifyDocs(term.docs, term.positions, termsEnum.docsAndPositions(null, null), true);
+ this.verifyDocs(term.docs, term.positions, termsEnum.docsAndPositions(null, null, false), true);
}
// Test random seek by ord:
@@ -500,7 +500,7 @@ public class TestCodecs extends LuceneTe
if (field.omitTF) {
this.verifyDocs(term.docs, term.positions, _TestUtil.docs(random, termsEnum, null, null, false), false);
} else {
- this.verifyDocs(term.docs, term.positions, termsEnum.docsAndPositions(null, null), true);
+ this.verifyDocs(term.docs, term.positions, termsEnum.docsAndPositions(null, null, false), true);
}
}
@@ -552,7 +552,7 @@ public class TestCodecs extends LuceneTe
final DocsEnum docsAndFreqs;
final DocsAndPositionsEnum postings;
if (!field.omitTF) {
- postings = termsEnum.docsAndPositions(null, null);
+ postings = termsEnum.docsAndPositions(null, null, false);
if (postings != null) {
docs = docsAndFreqs = postings;
} else {
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDoc.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDoc.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDoc.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDoc.java Sun Jan 15 23:17:45 2012
@@ -234,7 +234,7 @@ public class TestDoc extends LuceneTestC
out.print(" term=" + field + ":" + tis.term());
out.println(" DF=" + tis.docFreq());
- DocsAndPositionsEnum positions = tis.docsAndPositions(reader.getLiveDocs(), null);
+ DocsAndPositionsEnum positions = tis.docsAndPositions(reader.getLiveDocs(), null, false);
while (positions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
out.print(" doc=" + positions.docID());
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java Sun Jan 15 23:17:45 2012
@@ -96,7 +96,7 @@ public class TestDocsAndPositions extend
public DocsAndPositionsEnum getDocsAndPositions(IndexReader reader,
BytesRef bytes, Bits liveDocs) throws IOException {
- return reader.termPositionsEnum(null, fieldName, bytes);
+ return reader.termPositionsEnum(null, fieldName, bytes, false);
}
/**
@@ -358,7 +358,7 @@ public class TestDocsAndPositions extend
writer.addDocument(doc);
IndexReader reader = writer.getReader();
IndexReader r = getOnlySegmentReader(reader);
- DocsAndPositionsEnum disi = r.termPositionsEnum(null, "foo", new BytesRef("bar"));
+ DocsAndPositionsEnum disi = r.termPositionsEnum(null, "foo", new BytesRef("bar"), false);
int docid = disi.docID();
assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
@@ -366,7 +366,7 @@ public class TestDocsAndPositions extend
// now reuse and check again
TermsEnum te = r.terms("foo").iterator(null);
assertTrue(te.seekExact(new BytesRef("bar"), true));
- disi = te.docsAndPositions(null, disi);
+ disi = te.docsAndPositions(null, disi, false);
docid = disi.docID();
assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);