You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2012/01/18 23:28:20 UTC
svn commit: r1233096 [6/13] - in /lucene/dev/branches/solrcloud: ./
dev-tools/eclipse/ dev-tools/idea/.idea/ dev-tools/idea/lucene/contrib/
dev-tools/idea/modules/analysis/kuromoji/
dev-tools/idea/solr/contrib/analysis-extras/ dev-tools/maven/modules/a...
Modified: lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/document/StringField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/document/StringField.java?rev=1233096&r1=1233095&r2=1233096&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/document/StringField.java (original)
+++ lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/document/StringField.java Wed Jan 18 22:28:07 2012
@@ -51,12 +51,12 @@ public final class StringField extends F
TYPE_STORED.setIndexOptions(IndexOptions.DOCS_ONLY);
TYPE_STORED.freeze();
}
-
+
/** Creates a new un-stored StringField */
public StringField(String name, String value) {
super(name, value, TYPE_UNSTORED);
}
-
+
@Override
public String stringValue() {
return (fieldsData == null) ? null : fieldsData.toString();
Modified: lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/document/TextField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/document/TextField.java?rev=1233096&r1=1233095&r2=1233096&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/document/TextField.java (original)
+++ lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/document/TextField.java Wed Jan 18 22:28:07 2012
@@ -48,6 +48,8 @@ public final class TextField extends Fie
TYPE_STORED.freeze();
}
+ // TODO: add sugar for term vectors...?
+
/** Creates a new un-stored TextField */
public TextField(String name, Reader reader) {
super(name, reader, TextField.TYPE_UNSTORED);
Modified: lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/BaseMultiReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/BaseMultiReader.java?rev=1233096&r1=1233095&r2=1233096&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/BaseMultiReader.java (original)
+++ lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/BaseMultiReader.java Wed Jan 18 22:28:07 2012
@@ -18,9 +18,6 @@ package org.apache.lucene.index;
*/
import java.io.IOException;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.Set;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@@ -56,6 +53,11 @@ abstract class BaseMultiReader<R extends
}
@Override
+ public FieldInfos getFieldInfos() {
+ throw new UnsupportedOperationException("call getFieldInfos() on each sub reader, or use ReaderUtil.getMergedFieldInfos, instead");
+ }
+
+ @Override
public Fields fields() throws IOException {
throw new UnsupportedOperationException("please use MultiFields.getFields, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level Fields");
}
@@ -128,17 +130,6 @@ abstract class BaseMultiReader<R extends
}
@Override
- public Collection<String> getFieldNames (IndexReader.FieldOption fieldNames) {
- ensureOpen();
- // maintain a unique set of field names
- final Set<String> fieldSet = new HashSet<String>();
- for (IndexReader reader : subReaders) {
- fieldSet.addAll(reader.getFieldNames(fieldNames));
- }
- return fieldSet;
- }
-
- @Override
public IndexReader[] getSequentialSubReaders() {
return subReaders;
}
Modified: lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/CheckIndex.java?rev=1233096&r1=1233095&r2=1233096&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/CheckIndex.java Wed Jan 18 22:28:07 2012
@@ -23,7 +23,6 @@ import java.io.PrintStream;
import java.lang.reflect.Array;
import java.text.NumberFormat;
import java.util.ArrayList;
-import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
@@ -290,6 +289,20 @@ public class CheckIndex {
infoStream = null;
}
+ private boolean crossCheckTermVectors;
+
+ /** If true, term vectors are compared against postings to
+ * make sure they are the same. This will likely
+ * drastically increase time it takes to run CheckIndex! */
+ public void setCrossCheckTermVectors(boolean v) {
+ crossCheckTermVectors = v;
+ }
+
+ /** See {@link #setCrossCheckTermVectors}. */
+ public boolean getCrossCheckTermVectors() {
+ return crossCheckTermVectors;
+ }
+
private boolean verbose;
/** Set infoStream where messages should go. If null, no
@@ -552,19 +565,19 @@ public class CheckIndex {
if (reader.maxDoc() != info.docCount)
throw new RuntimeException("SegmentReader.maxDoc() " + reader.maxDoc() + " != SegmentInfos.docCount " + info.docCount);
- // Test getFieldNames()
+ // Test getFieldInfos()
if (infoStream != null) {
infoStream.print(" test: fields..............");
}
- Collection<String> fieldNames = reader.getFieldNames(IndexReader.FieldOption.ALL);
- msg("OK [" + fieldNames.size() + " fields]");
- segInfoStat.numFields = fieldNames.size();
+ FieldInfos fieldInfos = reader.getFieldInfos();
+ msg("OK [" + fieldInfos.size() + " fields]");
+ segInfoStat.numFields = fieldInfos.size();
// Test Field Norms
- segInfoStat.fieldNormStatus = testFieldNorms(fieldNames, reader);
+ segInfoStat.fieldNormStatus = testFieldNorms(fieldInfos, reader);
// Test the Term Index
- segInfoStat.termIndexStatus = testTermIndex(reader);
+ segInfoStat.termIndexStatus = testPostings(reader);
// Test Stored Fields
segInfoStat.storedFieldStatus = testStoredFields(info, reader, nf);
@@ -631,7 +644,7 @@ public class CheckIndex {
/**
* Test field norms.
*/
- private Status.FieldNormStatus testFieldNorms(Collection<String> fieldNames, SegmentReader reader) {
+ private Status.FieldNormStatus testFieldNorms(FieldInfos fieldInfos, SegmentReader reader) {
final Status.FieldNormStatus status = new Status.FieldNormStatus();
try {
@@ -639,29 +652,27 @@ public class CheckIndex {
if (infoStream != null) {
infoStream.print(" test: field norms.........");
}
- FieldInfos infos = reader.fieldInfos();
DocValues dv;
- for (final String fieldName : fieldNames) {
- FieldInfo info = infos.fieldInfo(fieldName);
- if (reader.hasNorms(fieldName)) {
- dv = reader.normValues(fieldName);
+ for (FieldInfo info : fieldInfos) {
+ if (reader.hasNorms(info.name)) {
+ dv = reader.normValues(info.name);
assert dv != null;
if (dv.getSource().hasArray()) {
Object array = dv.getSource().getArray();
if (Array.getLength(array) != reader.maxDoc()) {
- throw new RuntimeException("norms for field: " + fieldName + " are of the wrong size");
+ throw new RuntimeException("norms for field: " + info.name + " are of the wrong size");
}
}
if (!info.isIndexed || info.omitNorms) {
- throw new RuntimeException("field: " + fieldName + " should omit norms but has them!");
+ throw new RuntimeException("field: " + info.name + " should omit norms but has them!");
}
++status.totFields;
} else {
- if (reader.normValues(fieldName) != null) {
- throw new RuntimeException("field: " + fieldName + " should omit norms but has them!");
+ if (reader.normValues(info.name) != null) {
+ throw new RuntimeException("field: " + info.name + " should omit norms but has them!");
}
- if (info.isIndexed && !info.omitNorms) {
- throw new RuntimeException("field: " + fieldName + " should have norms but omits them!");
+ if (info.normsPresent()) {
+ throw new RuntimeException("field: " + info.name + " should have norms but omits them!");
}
}
}
@@ -681,7 +692,11 @@ public class CheckIndex {
/**
* Test the term index.
*/
- private Status.TermIndexStatus testTermIndex(SegmentReader reader) {
+ private Status.TermIndexStatus testPostings(SegmentReader reader) {
+
+ // TODO: we should go and verify term vectors match, if
+ // crossCheckTermVectors is on...
+
final Status.TermIndexStatus status = new Status.TermIndexStatus();
final int maxDoc = reader.maxDoc();
@@ -763,7 +778,7 @@ public class CheckIndex {
docs = termsEnum.docs(liveDocs, docs, false);
docsAndFreqs = termsEnum.docs(liveDocs, docsAndFreqs, true);
- postings = termsEnum.docsAndPositions(liveDocs, postings);
+ postings = termsEnum.docsAndPositions(liveDocs, postings, false);
if (hasOrd) {
long ord = -1;
@@ -893,7 +908,7 @@ public class CheckIndex {
if (hasPositions) {
for(int idx=0;idx<7;idx++) {
final int skipDocID = (int) (((idx+1)*(long) maxDoc)/8);
- postings = termsEnum.docsAndPositions(liveDocs, postings);
+ postings = termsEnum.docsAndPositions(liveDocs, postings, false);
final int docID = postings.advance(skipDocID);
if (docID == DocsEnum.NO_MORE_DOCS) {
break;
@@ -1259,7 +1274,10 @@ public class CheckIndex {
private Status.TermVectorStatus testTermVectors(SegmentInfo info, SegmentReader reader, NumberFormat format) {
final Status.TermVectorStatus status = new Status.TermVectorStatus();
- TermsEnum termsEnum = null;
+ // TODO: in theory we could test that term vectors have
+ // same terms/pos/offsets as the postings, but it'd be
+ // very slow...
+
try {
if (infoStream != null) {
infoStream.print(" test: term vectors........");
@@ -1267,9 +1285,25 @@ public class CheckIndex {
// TODO: maybe we can factor out testTermIndex and reuse here?
DocsEnum docs = null;
- DocsEnum docsAndFreqs = null;
DocsAndPositionsEnum postings = null;
+
+ // Only used if crossCheckTermVectors is true:
+ DocsEnum postingsDocs = null;
+ DocsAndPositionsEnum postingsPostings = null;
+
final Bits liveDocs = reader.getLiveDocs();
+
+ final Fields postingsFields;
+ // TODO: testTermsIndex
+ if (crossCheckTermVectors) {
+ postingsFields = reader.fields();
+ } else {
+ postingsFields = null;
+ }
+
+ TermsEnum termsEnum = null;
+ TermsEnum postingsTermsEnum = null;
+
for (int j = 0; j < info.docCount; ++j) {
if (liveDocs == null || liveDocs.get(j)) {
status.docCount++;
@@ -1293,14 +1327,37 @@ public class CheckIndex {
Terms terms = tfv.terms(field);
termsEnum = terms.iterator(termsEnum);
+
+ if (crossCheckTermVectors) {
+ Terms postingsTerms = postingsFields.terms(field);
+ if (postingsTerms == null) {
+ throw new RuntimeException("vector field=" + field + " does not exist in postings; doc=" + j);
+ }
+ postingsTermsEnum = postingsTerms.iterator(postingsTermsEnum);
+ } else {
+ postingsTermsEnum = null;
+ }
long tfvComputedTermCountForField = 0;
long tfvComputedSumTotalTermFreq = 0;
+ BytesRef lastTerm = null;
+ Comparator<BytesRef> termComp = terms.getComparator();
BytesRef term = null;
while ((term = termsEnum.next()) != null) {
tfvComputedTermCountForField++;
+ // make sure terms arrive in order according to
+ // the comp
+ if (lastTerm == null) {
+ lastTerm = BytesRef.deepCopyOf(term);
+ } else {
+ if (termComp.compare(lastTerm, term) >= 0) {
+ throw new RuntimeException("vector terms out of order for doc " + j + ": lastTerm=" + lastTerm + " term=" + term);
+ }
+ lastTerm.copyBytes(term);
+ }
+
if (termsEnum.docFreq() != 1) {
throw new RuntimeException("vector docFreq for doc " + j + ", field " + field + ", term" + term + " != 1");
}
@@ -1311,52 +1368,171 @@ public class CheckIndex {
throw new RuntimeException("totalTermFreq: " + totalTermFreq + " is out of bounds");
}
- postings = termsEnum.docsAndPositions(null, postings);
+ final boolean hasPositions;
+ final boolean hasOffsets;
+ final boolean hasFreqs;
+
+ // TODO: really we need a reflection/query
+ // API so we can just ask what was indexed
+ // instead of "probing"...
+
+ // Try offsets:
+ postings = termsEnum.docsAndPositions(null, postings, true);
if (postings == null) {
- docsAndFreqs = termsEnum.docs(null, docsAndFreqs, true);
- if (docsAndFreqs == null) {
- docs = termsEnum.docs(null, docs, false);
+ hasOffsets = false;
+ // Try only positions:
+ postings = termsEnum.docsAndPositions(null, postings, false);
+ if (postings == null) {
+ hasPositions = false;
+ // Try docIDs & freqs:
+ docs = termsEnum.docs(null, docs, true);
+ if (docs == null) {
+ // OK, only docIDs:
+ hasFreqs = false;
+ docs = termsEnum.docs(null, docs, false);
+ } else {
+ hasFreqs = true;
+ }
} else {
- docs = docsAndFreqs;
+ hasPositions = true;
+ hasFreqs = true;
}
} else {
- docs = docsAndFreqs = postings;
+ hasOffsets = true;
+ // NOTE: may be a lie... but we accept -1 below
+ hasPositions = true;
+ hasFreqs = true;
+ }
+
+ final DocsEnum docs2;
+ if (hasPositions || hasOffsets) {
+ assert postings != null;
+ docs2 = postings;
+ } else {
+ assert docs != null;
+ docs2 = docs;
}
- final int doc = docs.nextDoc();
+ final DocsEnum postingsDocs2;
+ final boolean postingsHasFreq;
+ if (crossCheckTermVectors) {
+ if (!postingsTermsEnum.seekExact(term, true)) {
+ throw new RuntimeException("vector term=" + term + " field=" + field + " does not exist in postings; doc=" + j);
+ }
+ postingsPostings = postingsTermsEnum.docsAndPositions(null, postingsPostings, true);
+ if (postingsPostings == null) {
+ // Term vectors were indexed w/ offsets but postings were not
+ postingsPostings = postingsTermsEnum.docsAndPositions(null, postingsPostings, false);
+ if (postingsPostings == null) {
+ postingsDocs = postingsTermsEnum.docs(null, postingsDocs, true);
+ if (postingsDocs == null) {
+ postingsHasFreq = false;
+ postingsDocs = postingsTermsEnum.docs(null, postingsDocs, false);
+ if (postingsDocs == null) {
+ throw new RuntimeException("vector term=" + term + " field=" + field + " does not exist in postings; doc=" + j);
+ }
+ } else {
+ postingsHasFreq = true;
+ }
+ } else {
+ postingsHasFreq = true;
+ }
+ } else {
+ postingsHasFreq = true;
+ }
+
+ if (postingsPostings != null) {
+ postingsDocs2 = postingsPostings;
+ } else {
+ postingsDocs2 = postingsDocs;
+ }
+
+ final int advanceDoc = postingsDocs2.advance(j);
+ if (advanceDoc != j) {
+ throw new RuntimeException("vector term=" + term + " field=" + field + ": doc=" + j + " was not found in postings (got: " + advanceDoc + ")");
+ }
+ } else {
+ postingsDocs2 = null;
+ postingsHasFreq = false;
+ }
+
+ final int doc = docs2.nextDoc();
if (doc != 0) {
throw new RuntimeException("vector for doc " + j + " didn't return docID=0: got docID=" + doc);
}
- if (docsAndFreqs != null) {
- final int tf = docsAndFreqs.freq();
+ if (hasFreqs) {
+ final int tf = docs2.freq();
if (tf <= 0) {
throw new RuntimeException("vector freq " + tf + " is out of bounds");
}
if (totalTermFreq != -1 && totalTermFreq != tf) {
throw new RuntimeException("vector totalTermFreq " + totalTermFreq + " != tf " + tf);
}
+ if (crossCheckTermVectors && postingsHasFreq) {
+ if (postingsDocs2.freq() != tf) {
+ throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": freq=" + tf + " differs from postings freq=" + postingsDocs2.freq());
+ }
+ }
tfvComputedSumTotalTermFreq += tf;
- if (postings != null) {
+ if (hasPositions || hasOffsets) {
int lastPosition = -1;
+ //int lastStartOffset = -1;
for (int i = 0; i < tf; i++) {
int pos = postings.nextPosition();
- if (pos != -1 && pos < 0) {
- throw new RuntimeException("vector position " + pos + " is out of bounds");
- }
+ if (hasPositions) {
+ if (pos != -1 && pos < 0) {
+ throw new RuntimeException("vector position " + pos + " is out of bounds");
+ }
+ if (pos < lastPosition) {
+ throw new RuntimeException("vector position " + pos + " < lastPos " + lastPosition);
+ }
- if (pos < lastPosition) {
- throw new RuntimeException("vector position " + pos + " < lastPos " + lastPosition);
+ lastPosition = pos;
+ }
+
+ if (crossCheckTermVectors && postingsPostings != null) {
+ int postingsPos = postingsPostings.nextPosition();
+ if (pos != -1 && postingsPos != -1 && pos != postingsPos) {
+ throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": pos=" + pos + " differs from postings pos=" + postingsPos);
+ }
+ }
+
+ if (hasOffsets) {
+ // Call the methods to at least make
+ // sure they don't throw exc:
+ final int startOffset = postings.startOffset();
+ final int endOffset = postings.endOffset();
+ // TODO: these are too anal...?
+ /*
+ if (endOffset < startOffset) {
+ throw new RuntimeException("vector startOffset=" + startOffset + " is > endOffset=" + endOffset);
+ }
+ if (startOffset < lastStartOffset) {
+ throw new RuntimeException("vector startOffset=" + startOffset + " is < prior startOffset=" + lastStartOffset);
+ }
+ lastStartOffset = startOffset;
+ */
+
+ if (crossCheckTermVectors && postingsPostings != null) {
+ final int postingsStartOffset = postingsPostings.startOffset();
+
+ final int postingsEndOffset = postingsPostings.endOffset();
+ if (startOffset != -1 && postingsStartOffset != -1 && startOffset != postingsStartOffset) {
+ throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": startOffset=" + startOffset + " differs from postings startOffset=" + postingsStartOffset);
+ }
+ if (endOffset != -1 && postingsEndOffset != -1 && endOffset != postingsEndOffset) {
+ throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": endOffset=" + endOffset + " differs from postings endOffset=" + postingsEndOffset);
+ }
+ }
}
-
- lastPosition = pos;
}
}
}
- if (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
+ if (docs2.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
throw new RuntimeException("vector for doc " + j + " references multiple documents!");
}
}
@@ -1477,6 +1653,7 @@ public class CheckIndex {
public static void main(String[] args) throws IOException, InterruptedException {
boolean doFix = false;
+ boolean doCrossCheckTermVectors = false;
Codec codec = Codec.getDefault(); // only used when fixing
boolean verbose = false;
List<String> onlySegments = new ArrayList<String>();
@@ -1487,6 +1664,8 @@ public class CheckIndex {
String arg = args[i];
if ("-fix".equals(arg)) {
doFix = true;
+ } else if ("-crossCheckTermVectors".equals(arg)) {
+ doCrossCheckTermVectors = true;
} else if ("-codec".equals(arg)) {
if (i == args.length-1) {
System.out.println("ERROR: missing name for -codec option");
@@ -1522,9 +1701,10 @@ public class CheckIndex {
if (indexPath == null) {
System.out.println("\nERROR: index path not specified");
- System.out.println("\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y] [-dir-impl X]\n" +
+ System.out.println("\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-crossCheckTermVectors] [-segment X] [-segment Y] [-dir-impl X]\n" +
"\n" +
" -fix: actually write a new segments_N file, removing any problematic segments\n" +
+ " -crossCheckTermVectors: verifies that term vectors match postings; THIS IS VERY SLOW!\n" +
" -codec X: when fixing, codec to write the new segments_N file with\n" +
" -verbose: print additional details\n" +
" -segment X: only check the specified segments. This can be specified multiple\n" +
@@ -1573,6 +1753,7 @@ public class CheckIndex {
}
CheckIndex checker = new CheckIndex(dir);
+ checker.setCrossCheckTermVectors(doCrossCheckTermVectors);
checker.setInfoStream(System.out, verbose);
Status result = checker.checkIndex(onlySegments);
Modified: lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java?rev=1233096&r1=1233095&r2=1233096&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java (original)
+++ lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java Wed Jan 18 22:28:07 2012
@@ -26,11 +26,9 @@ import java.util.Map;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesConsumer;
-import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosWriter;
import org.apache.lucene.codecs.PerDocConsumer;
import org.apache.lucene.index.DocumentsWriterPerThread.DocState;
-import org.apache.lucene.index.DocValues;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.IOUtils;
@@ -82,17 +80,19 @@ final class DocFieldProcessor extends Do
fieldsWriter.flush(state);
consumer.flush(childFields, state);
+ for (DocValuesConsumerAndDocID consumer : docValues.values()) {
+ consumer.docValuesConsumer.finish(state.numDocs);
+ }
+
// Important to save after asking consumer to flush so
// consumer can alter the FieldInfo* if necessary. EG,
// FreqProxTermsWriter does this with
// FieldInfo.storePayload.
FieldInfosWriter infosWriter = codec.fieldInfosFormat().getFieldInfosWriter();
infosWriter.write(state.directory, state.segmentName, state.fieldInfos, IOContext.DEFAULT);
- for (DocValuesConsumerAndDocID consumers : docValues.values()) {
- consumers.docValuesConsumer.finish(state.numDocs);
- }
+
// close perDocConsumer during flush to ensure all files are flushed due to PerCodec CFS
- IOUtils.close(perDocConsumers.values());
+ IOUtils.close(perDocConsumer);
}
@Override
@@ -112,7 +112,7 @@ final class DocFieldProcessor extends Do
field = next;
}
}
- IOUtils.closeWhileHandlingException(perDocConsumers.values());
+ IOUtils.closeWhileHandlingException(perDocConsumer);
// TODO add abort to PerDocConsumer!
try {
@@ -132,7 +132,6 @@ final class DocFieldProcessor extends Do
}
try {
- PerDocConsumer perDocConsumer = perDocConsumers.get(0);
if (perDocConsumer != null) {
perDocConsumer.abort();
}
@@ -176,7 +175,7 @@ final class DocFieldProcessor extends Do
fieldHash = new DocFieldProcessorPerField[2];
hashMask = 1;
totalFieldCount = 0;
- perDocConsumers.clear();
+ perDocConsumer = null;
docValues.clear();
}
@@ -270,9 +269,9 @@ final class DocFieldProcessor extends Do
if (field.fieldType().stored()) {
fieldsWriter.addField(field, fp.fieldInfo);
}
- final DocValue docValue = field.docValue();
- if (docValue != null) {
- docValuesConsumer(field.docValueType(), docState, fp.fieldInfo).add(docState.docID, docValue);
+ final DocValues.Type dvType = field.fieldType().docValueType();
+ if (dvType != null) {
+ docValuesConsumer(dvType, docState, fp.fieldInfo).add(docState.docID, field);
}
}
@@ -310,6 +309,8 @@ final class DocFieldProcessor extends Do
}
private static class DocValuesConsumerAndDocID {
+ // Only used to enforce that same DV field name is never
+ // added more than once per doc:
public int docID;
final DocValuesConsumer docValuesConsumer;
@@ -319,7 +320,7 @@ final class DocFieldProcessor extends Do
}
final private Map<String, DocValuesConsumerAndDocID> docValues = new HashMap<String, DocValuesConsumerAndDocID>();
- final private Map<Integer, PerDocConsumer> perDocConsumers = new HashMap<Integer, PerDocConsumer>();
+ private PerDocConsumer perDocConsumer;
DocValuesConsumer docValuesConsumer(DocValues.Type valueType, DocState docState, FieldInfo fieldInfo)
throws IOException {
@@ -333,15 +334,12 @@ final class DocFieldProcessor extends Do
return docValuesConsumerAndDocID.docValuesConsumer;
}
- PerDocConsumer perDocConsumer = perDocConsumers.get(0);
if (perDocConsumer == null) {
PerDocWriteState perDocWriteState = docState.docWriter.newPerDocWriteState("");
- DocValuesFormat dvFormat = docState.docWriter.codec.docValuesFormat();
- perDocConsumer = dvFormat.docsConsumer(perDocWriteState);
- perDocConsumers.put(0, perDocConsumer);
+ perDocConsumer = docState.docWriter.codec.docValuesFormat().docsConsumer(perDocWriteState);
}
DocValuesConsumer docValuesConsumer = perDocConsumer.addValuesField(valueType, fieldInfo);
- fieldInfo.setDocValuesType(valueType);
+ fieldInfo.setDocValuesType(valueType, false);
docValuesConsumerAndDocID = new DocValuesConsumerAndDocID(docValuesConsumer);
docValuesConsumerAndDocID.docID = docState.docID;
Modified: lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java?rev=1233096&r1=1233095&r2=1233096&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java (original)
+++ lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java Wed Jan 18 22:28:07 2012
@@ -73,8 +73,9 @@ final class DocInverterPerField extends
// tokenized.
if (field.fieldType().indexed() && doInvert) {
- if (i > 0)
+ if (i > 0) {
fieldState.position += docState.analyzer == null ? 0 : docState.analyzer.getPositionIncrementGap(fieldInfo.name);
+ }
final TokenStream stream = field.tokenStream(docState.analyzer);
// reset the TokenStream to the first token
Modified: lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/DocTermOrds.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/DocTermOrds.java?rev=1233096&r1=1233095&r2=1233096&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/DocTermOrds.java (original)
+++ lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/DocTermOrds.java Wed Jan 18 22:28:07 2012
@@ -655,8 +655,8 @@ public class DocTermOrds {
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
- return termsEnum.docsAndPositions(liveDocs, reuse);
+ public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
+ return termsEnum.docsAndPositions(liveDocs, reuse, needsOffsets);
}
@Override
Modified: lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/DocValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/DocValues.java?rev=1233096&r1=1233095&r2=1233096&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/DocValues.java (original)
+++ lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/DocValues.java Wed Jan 18 22:28:07 2012
@@ -493,6 +493,7 @@ public abstract class DocValues implemen
* </p>
*/
FIXED_INTS_64,
+
/**
* A 32 bit floating point value. By default there is no compression
* applied. To fit custom float values into less than 32bit either a custom
@@ -507,6 +508,7 @@ public abstract class DocValues implemen
* </p>
*/
FLOAT_32,
+
/**
*
* A 64 bit floating point value. By default there is no compression
@@ -613,7 +615,6 @@ public abstract class DocValues implemen
* @see SortedSource
*/
BYTES_FIXED_SORTED
-
}
/**
Modified: lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java?rev=1233096&r1=1233095&r2=1233096&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java (original)
+++ lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java Wed Jan 18 22:28:07 2012
@@ -26,9 +26,20 @@ public abstract class DocsAndPositionsEn
/** Returns the next position. You should only call this
* up to {@link DocsEnum#freq()} times else
- * the behavior is not defined. */
+ * the behavior is not defined. If positions were not
+ * indexed this will return -1; this only happens if
+ * offsets were indexed and you passed needsOffset=true
+ * when pulling the enum. */
public abstract int nextPosition() throws IOException;
+ /** Returns start offset for the current position, or -1
+ * if offsets were not indexed. */
+ public abstract int startOffset() throws IOException;
+
+ /** Returns end offset for the current position, or -1 if
+ * offsets were not indexed. */
+ public abstract int endOffset() throws IOException;
+
/** Returns the payload at this position, or null if no
* payload was indexed. Only call this once per
* position. */
Modified: lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=1233096&r1=1233095&r2=1233096&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java Wed Jan 18 22:28:07 2012
@@ -20,13 +20,12 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.Collection;
import java.util.Iterator;
-import java.util.LinkedList;
import java.util.List;
-import java.util.Queue;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.index.DocumentsWriterFlushQueue.SegmentFlushTicket;
import org.apache.lucene.index.DocumentsWriterPerThread.FlushedSegment;
import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain;
import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState;
@@ -117,7 +116,7 @@ final class DocumentsWriter {
// TODO: cut over to BytesRefHash in BufferedDeletes
volatile DocumentsWriterDeleteQueue deleteQueue = new DocumentsWriterDeleteQueue();
- private final TicketQueue ticketQueue = new TicketQueue();
+ private final DocumentsWriterFlushQueue ticketQueue = new DocumentsWriterFlushQueue();
/*
* we preserve changes during a full flush since IW might not checkout before
* we release all changes. NRT Readers otherwise suddenly return true from
@@ -177,12 +176,7 @@ final class DocumentsWriter {
private void applyAllDeletes(DocumentsWriterDeleteQueue deleteQueue) throws IOException {
if (deleteQueue != null && !flushControl.isFullFlush()) {
- synchronized (ticketQueue) {
- ticketQueue.incTicketCount();// first inc the ticket count - freeze opens a window for #anyChanges to fail
- // Freeze and insert the delete flush ticket in the queue
- ticketQueue.add(new FlushTicket(deleteQueue.freezeGlobalBuffer(null), false));
- applyFlushTickets();
- }
+ ticketQueue.addDeletesAndPurge(this, deleteQueue);
}
indexWriter.applyAllDeletes();
indexWriter.flushCount.incrementAndGet();
@@ -401,7 +395,7 @@ final class DocumentsWriter {
while (flushingDWPT != null) {
maybeMerge = true;
boolean success = false;
- FlushTicket ticket = null;
+ SegmentFlushTicket ticket = null;
try {
assert currentFullFlushDelQueue == null
|| flushingDWPT.deleteQueue == currentFullFlushDelQueue : "expected: "
@@ -422,34 +416,27 @@ final class DocumentsWriter {
* might miss to deletes documents in 'A'.
*/
try {
- synchronized (ticketQueue) {
- // Each flush is assigned a ticket in the order they acquire the ticketQueue lock
- ticket = new FlushTicket(flushingDWPT.prepareFlush(), true);
- ticketQueue.incrementAndAdd(ticket);
- }
+ // Each flush is assigned a ticket in the order they acquire the ticketQueue lock
+ ticket = ticketQueue.addFlushTicket(flushingDWPT);
// flush concurrently without locking
final FlushedSegment newSegment = flushingDWPT.flush();
- synchronized (ticketQueue) {
- ticket.segment = newSegment;
- }
+ ticketQueue.addSegment(ticket, newSegment);
// flush was successful once we reached this point - new seg. has been assigned to the ticket!
success = true;
} finally {
if (!success && ticket != null) {
- synchronized (ticketQueue) {
- // In the case of a failure make sure we are making progress and
- // apply all the deletes since the segment flush failed since the flush
- // ticket could hold global deletes see FlushTicket#canPublish()
- ticket.isSegmentFlush = false;
- }
+ // In the case of a failure make sure we are making progress and
+ // apply all the deletes since the segment flush failed since the flush
+ // ticket could hold global deletes see FlushTicket#canPublish()
+ ticketQueue.markTicketFailed(ticket);
}
}
/*
* Now we are done and try to flush the ticket queue if the head of the
* queue has already finished the flush.
*/
- applyFlushTickets();
+ ticketQueue.tryPurge(this);
} finally {
flushControl.doAfterFlush(flushingDWPT);
flushingDWPT.checkAndResetHasAborted();
@@ -476,25 +463,7 @@ final class DocumentsWriter {
return maybeMerge;
}
- private void applyFlushTickets() throws IOException {
- synchronized (ticketQueue) {
- while (true) {
- // Keep publishing eligible flushed segments:
- final FlushTicket head = ticketQueue.peek();
- if (head != null && head.canPublish()) {
- try {
- finishFlush(head.segment, head.frozenDeletes);
- } finally {
- ticketQueue.poll();
- }
- } else {
- break;
- }
- }
- }
- }
-
- private void finishFlush(FlushedSegment newSegment, FrozenBufferedDeletes bufferedDeletes)
+ void finishFlush(FlushedSegment newSegment, FrozenBufferedDeletes bufferedDeletes)
throws IOException {
// Finish the flushed segment and publish it to IndexWriter
if (newSegment == null) {
@@ -590,13 +559,11 @@ final class DocumentsWriter {
if (infoStream.isEnabled("DW")) {
infoStream.message("DW", Thread.currentThread().getName() + ": flush naked frozen global deletes");
}
- synchronized (ticketQueue) {
- ticketQueue.incTicketCount(); // first inc the ticket count - freeze opens a window for #anyChanges to fail
- ticketQueue.add(new FlushTicket(flushingDeleteQueue.freezeGlobalBuffer(null), false));
- }
- applyFlushTickets();
+ ticketQueue.addDeletesAndPurge(this, flushingDeleteQueue);
+ } else {
+ ticketQueue.forcePurge(this);
}
- assert !flushingDeleteQueue.anyChanges();
+ assert !flushingDeleteQueue.anyChanges() && !ticketQueue.hasTickets();
} finally {
assert flushingDeleteQueue == currentFullFlushDelQueue;
}
@@ -621,61 +588,8 @@ final class DocumentsWriter {
}
- static final class FlushTicket {
- final FrozenBufferedDeletes frozenDeletes;
- /* access to non-final members must be synchronized on DW#ticketQueue */
- FlushedSegment segment;
- boolean isSegmentFlush;
-
- FlushTicket(FrozenBufferedDeletes frozenDeletes, boolean isSegmentFlush) {
- this.frozenDeletes = frozenDeletes;
- this.isSegmentFlush = isSegmentFlush;
- }
-
- boolean canPublish() {
- return (!isSegmentFlush || segment != null);
- }
- }
- static final class TicketQueue {
- private final Queue<FlushTicket> queue = new LinkedList<FlushTicket>();
- final AtomicInteger ticketCount = new AtomicInteger();
-
- void incTicketCount() {
- ticketCount.incrementAndGet();
- }
-
- public boolean hasTickets() {
- assert ticketCount.get() >= 0;
- return ticketCount.get() != 0;
- }
-
- void incrementAndAdd(FlushTicket ticket) {
- incTicketCount();
- add(ticket);
- }
-
- void add(FlushTicket ticket) {
- queue.add(ticket);
- }
-
- FlushTicket peek() {
- return queue.peek();
- }
-
- FlushTicket poll() {
- try {
- return queue.poll();
- } finally {
- ticketCount.decrementAndGet();
- }
- }
-
- void clear() {
- queue.clear();
- ticketCount.set(0);
- }
- }
+
// use by IW during close to assert all DWPT are inactive after final flush
boolean assertNoActiveDWPT() {
Modified: lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/FieldInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/FieldInfo.java?rev=1233096&r1=1233095&r2=1233096&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/FieldInfo.java (original)
+++ lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/FieldInfo.java Wed Jan 18 22:28:07 2012
@@ -1,6 +1,7 @@
package org.apache.lucene.index;
-import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.DocValues.Type;
+
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -25,17 +26,14 @@ public final class FieldInfo {
public final int number;
public boolean isIndexed;
- private DocValues.Type docValues;
-
+ private DocValues.Type docValueType;
- // true if term vector for this field should be stored
+ // True if any document indexed term vectors
public boolean storeTermVector;
- public boolean storeOffsetWithTermVector;
- public boolean storePositionWithTermVector;
+ private DocValues.Type normType;
public boolean omitNorms; // omit norms associated with indexed fields
public IndexOptions indexOptions;
-
public boolean storePayloads; // whether this field stores payloads together with term positions
/**
@@ -43,53 +41,53 @@ public final class FieldInfo {
* @lucene.experimental
*/
public static enum IndexOptions {
+ // NOTE: order is important here; FieldInfo uses this
+ // order to merge two conflicting IndexOptions (always
+ // "downgrades" by picking the lowest).
/** only documents are indexed: term frequencies and positions are omitted */
// TODO: maybe rename to just DOCS?
DOCS_ONLY,
/** only documents and term frequencies are indexed: positions are omitted */
DOCS_AND_FREQS,
- /** full postings: documents, frequencies, and positions */
- DOCS_AND_FREQS_AND_POSITIONS
+ /** documents, frequencies and positions */
+ DOCS_AND_FREQS_AND_POSITIONS,
+ /** documents, frequencies, positions and offsets */
+ DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS,
};
/**
* @lucene.experimental
*/
public FieldInfo(String name, boolean isIndexed, int number, boolean storeTermVector,
- boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
- boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, DocValues.Type docValues) {
+ boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, DocValues.Type docValues, DocValues.Type normsType) {
this.name = name;
this.isIndexed = isIndexed;
this.number = number;
- this.docValues = docValues;
+ this.docValueType = docValues;
if (isIndexed) {
this.storeTermVector = storeTermVector;
- this.storeOffsetWithTermVector = storeOffsetWithTermVector;
- this.storePositionWithTermVector = storePositionWithTermVector;
this.storePayloads = storePayloads;
this.omitNorms = omitNorms;
this.indexOptions = indexOptions;
+ this.normType = !omitNorms ? normsType : null;
} else { // for non-indexed fields, leave defaults
this.storeTermVector = false;
- this.storeOffsetWithTermVector = false;
- this.storePositionWithTermVector = false;
this.storePayloads = false;
this.omitNorms = false;
this.indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
+ this.normType = null;
}
- assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !storePayloads;
+ assert indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !storePayloads;
}
@Override
public Object clone() {
- FieldInfo clone = new FieldInfo(name, isIndexed, number, storeTermVector, storePositionWithTermVector,
- storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions, docValues);
- return clone;
+ return new FieldInfo(name, isIndexed, number, storeTermVector,
+ omitNorms, storePayloads, indexOptions, docValueType, normType);
}
// should only be called by FieldInfos#addOrUpdate
- void update(boolean isIndexed, boolean storeTermVector, boolean storePositionWithTermVector,
- boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, IndexOptions indexOptions) {
+ void update(boolean isIndexed, boolean storeTermVector, boolean omitNorms, boolean storePayloads, IndexOptions indexOptions) {
if (this.isIndexed != isIndexed) {
this.isIndexed = true; // once indexed, always index
@@ -98,12 +96,6 @@ public final class FieldInfo {
if (this.storeTermVector != storeTermVector) {
this.storeTermVector = true; // once vector, always vector
}
- if (this.storePositionWithTermVector != storePositionWithTermVector) {
- this.storePositionWithTermVector = true; // once vector, always vector
- }
- if (this.storeOffsetWithTermVector != storeOffsetWithTermVector) {
- this.storeOffsetWithTermVector = true; // once vector, always vector
- }
if (this.storePayloads != storePayloads) {
this.storePayloads = true;
}
@@ -113,35 +105,53 @@ public final class FieldInfo {
if (this.indexOptions != indexOptions) {
// downgrade
this.indexOptions = this.indexOptions.compareTo(indexOptions) < 0 ? this.indexOptions : indexOptions;
- this.storePayloads = false;
+ if (this.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
+ // cannot store payloads if we don't store positions:
+ this.storePayloads = false;
+ }
}
}
- assert this.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !this.storePayloads;
+ assert this.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !this.storePayloads;
}
- void setDocValuesType(DocValues.Type v) {
- if (docValues == null) {
- docValues = v;
- }
- }
-
- public void resetDocValuesType(DocValues.Type v) {
- if (docValues != null) {
- docValues = v;
+ void setDocValuesType(DocValues.Type type, boolean force) {
+ if (docValueType == null || force) {
+ docValueType = type;
+ } else if (type != docValueType) {
+ throw new IllegalArgumentException("DocValues type already set to " + docValueType + " but was: " + type);
}
}
public boolean hasDocValues() {
- return docValues != null;
+ return docValueType != null;
}
public DocValues.Type getDocValuesType() {
- return docValues;
+ return docValueType;
+ }
+
+ public DocValues.Type getNormType() {
+ return normType;
}
- public void setStoreTermVectors(boolean withPositions, boolean withOffsets) {
+ public void setStoreTermVectors() {
storeTermVector = true;
- storePositionWithTermVector |= withPositions;
- storeOffsetWithTermVector |= withOffsets;
}
+
+ public void setNormValueType(Type type, boolean force) {
+ if (normType == null || force) {
+ normType = type;
+ } else if (type != normType) {
+ throw new IllegalArgumentException("Norm type already set to " + normType);
+ }
+ }
+
+ public boolean omitNorms() {
+ return omitNorms;
+ }
+
+ public boolean normsPresent() {
+ return isIndexed && !omitNorms && normType != null;
+ }
+
}
Modified: lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/FieldInfos.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/FieldInfos.java?rev=1233096&r1=1233095&r2=1233096&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/FieldInfos.java (original)
+++ lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/FieldInfos.java Wed Jan 18 22:28:07 2012
@@ -25,7 +25,6 @@ import java.util.SortedMap;
import java.util.TreeMap;
import org.apache.lucene.index.FieldInfo.IndexOptions;
-import org.apache.lucene.index.DocValues;
/** Access to the Field Info file that describes document fields and whether or
* not they are indexed. Each segment has a separate Field Info file. Objects
@@ -122,6 +121,16 @@ public final class FieldInfos implements
}
}
+ public FieldInfos() {
+ this(new FieldNumberBiMap());
+ }
+
+ public void add(FieldInfos other) {
+ for(FieldInfo fieldInfo : other){
+ add(fieldInfo);
+ }
+ }
+
/**
* Creates a new FieldInfos instance with the given {@link FieldNumberBiMap}.
* If the {@link FieldNumberBiMap} is <code>null</code> this instance will be read-only.
@@ -176,7 +185,7 @@ public final class FieldInfos implements
}
// mutable FIs must check!
for (FieldInfo fi : this) {
- if (fi.isIndexed && fi.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+ if (fi.isIndexed && fi.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
return true;
}
}
@@ -202,13 +211,10 @@ public final class FieldInfos implements
*
* @param names The names of the fields
* @param storeTermVectors Whether the fields store term vectors or not
- * @param storePositionWithTermVector true if positions should be stored.
- * @param storeOffsetWithTermVector true if offsets should be stored
*/
- synchronized public void addOrUpdateIndexed(Collection<String> names, boolean storeTermVectors, boolean storePositionWithTermVector,
- boolean storeOffsetWithTermVector) {
+ synchronized public void addOrUpdateIndexed(Collection<String> names, boolean storeTermVectors) {
for (String name : names) {
- addOrUpdate(name, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector);
+ addOrUpdate(name, true, storeTermVectors);
}
}
@@ -231,23 +237,12 @@ public final class FieldInfos implements
*
* @param name The name of the IndexableField
* @param isIndexed true if the field is indexed
- * @see #addOrUpdate(String, boolean, boolean, boolean, boolean)
+ * @see #addOrUpdate(String, boolean, boolean)
*/
synchronized public void addOrUpdate(String name, boolean isIndexed) {
- addOrUpdate(name, isIndexed, false, false, false, false);
+ addOrUpdate(name, isIndexed, false, false);
}
- /**
- * Calls 5 parameter add with false for term vector positions and offsets.
- *
- * @param name The name of the field
- * @param isIndexed true if the field is indexed
- * @param storeTermVector true if the term vector should be stored
- */
- synchronized public void addOrUpdate(String name, boolean isIndexed, boolean storeTermVector){
- addOrUpdate(name, isIndexed, storeTermVector, false, false, false);
- }
-
/** If the field is not yet known, adds it. If it is known, checks to make
* sure that the isIndexed flag is the same as was given previously for this
* field. If not - marks it as being indexed. Same goes for the TermVector
@@ -256,13 +251,9 @@ public final class FieldInfos implements
* @param name The name of the field
* @param isIndexed true if the field is indexed
* @param storeTermVector true if the term vector should be stored
- * @param storePositionWithTermVector true if the term vector with positions should be stored
- * @param storeOffsetWithTermVector true if the term vector with offsets should be stored
*/
- synchronized public void addOrUpdate(String name, boolean isIndexed, boolean storeTermVector,
- boolean storePositionWithTermVector, boolean storeOffsetWithTermVector) {
-
- addOrUpdate(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, false);
+ synchronized public void addOrUpdate(String name, boolean isIndexed, boolean storeTermVector) {
+ addOrUpdate(name, isIndexed, storeTermVector, false);
}
/** If the field is not yet known, adds it. If it is known, checks to make
@@ -273,14 +264,11 @@ public final class FieldInfos implements
* @param name The name of the field
* @param isIndexed true if the field is indexed
* @param storeTermVector true if the term vector should be stored
- * @param storePositionWithTermVector true if the term vector with positions should be stored
- * @param storeOffsetWithTermVector true if the term vector with offsets should be stored
* @param omitNorms true if the norms for the indexed field should be omitted
*/
synchronized public void addOrUpdate(String name, boolean isIndexed, boolean storeTermVector,
- boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, boolean omitNorms) {
- addOrUpdate(name, isIndexed, storeTermVector, storePositionWithTermVector,
- storeOffsetWithTermVector, omitNorms, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, null);
+ boolean omitNorms) {
+ addOrUpdate(name, isIndexed, storeTermVector, omitNorms, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, null, null);
}
/** If the field is not yet known, adds it. If it is known, checks to make
@@ -291,17 +279,13 @@ public final class FieldInfos implements
* @param name The name of the field
* @param isIndexed true if the field is indexed
* @param storeTermVector true if the term vector should be stored
- * @param storePositionWithTermVector true if the term vector with positions should be stored
- * @param storeOffsetWithTermVector true if the term vector with offsets should be stored
* @param omitNorms true if the norms for the indexed field should be omitted
* @param storePayloads true if payloads should be stored for this field
* @param indexOptions if term freqs should be omitted for this field
*/
synchronized public FieldInfo addOrUpdate(String name, boolean isIndexed, boolean storeTermVector,
- boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
- boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, DocValues.Type docValues) {
- return addOrUpdateInternal(name, -1, isIndexed, storeTermVector, storePositionWithTermVector,
- storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions, docValues);
+ boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, DocValues.Type docValues, DocValues.Type normType) {
+ return addOrUpdateInternal(name, -1, isIndexed, storeTermVector, omitNorms, storePayloads, indexOptions, docValues, normType);
}
// NOTE: this method does not carry over termVector
@@ -315,49 +299,52 @@ public final class FieldInfos implements
// rather, each component in the chain should update
// what it "owns". EG fieldType.indexOptions() should
// be updated by maybe FreqProxTermsWriterPerField:
- return addOrUpdateInternal(name, -1, fieldType.indexed(), false, false, false,
+ return addOrUpdateInternal(name, -1, fieldType.indexed(), false,
fieldType.omitNorms(), false,
- fieldType.indexOptions(), null);
+ fieldType.indexOptions(), null, null);
}
synchronized private FieldInfo addOrUpdateInternal(String name, int preferredFieldNumber, boolean isIndexed,
- boolean storeTermVector, boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
- boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, DocValues.Type docValues) {
+ boolean storeTermVector,
+ boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, DocValues.Type docValues, DocValues.Type normType) {
if (globalFieldNumbers == null) {
throw new IllegalStateException("FieldInfos are read-only, create a new instance with a global field map to make modifications to FieldInfos");
}
FieldInfo fi = fieldInfo(name);
if (fi == null) {
final int fieldNumber = nextFieldNumber(name, preferredFieldNumber);
- fi = addInternal(name, fieldNumber, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions, docValues);
+ fi = addInternal(name, fieldNumber, isIndexed, storeTermVector, omitNorms, storePayloads, indexOptions, docValues, normType);
} else {
- fi.update(isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions);
- fi.setDocValuesType(docValues);
+ fi.update(isIndexed, storeTermVector, omitNorms, storePayloads, indexOptions);
+ if (docValues != null) {
+ fi.setDocValuesType(docValues, true);
+ }
+ if (normType != null) {
+ fi.setNormValueType(normType, true);
+ }
}
version++;
return fi;
}
-
+
synchronized public FieldInfo add(FieldInfo fi) {
// IMPORTANT - reuse the field number if possible for consistent field numbers across segments
return addOrUpdateInternal(fi.name, fi.number, fi.isIndexed, fi.storeTermVector,
- fi.storePositionWithTermVector, fi.storeOffsetWithTermVector,
fi.omitNorms, fi.storePayloads,
- fi.indexOptions, fi.getDocValuesType());
+ fi.indexOptions, fi.getDocValuesType(), fi.getNormType());
}
/*
* NOTE: if you call this method from a public method make sure you check if we are modifiable and throw an exception otherwise
*/
private FieldInfo addInternal(String name, int fieldNumber, boolean isIndexed,
- boolean storeTermVector, boolean storePositionWithTermVector,
- boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, DocValues.Type docValuesType) {
+ boolean storeTermVector, boolean omitNorms, boolean storePayloads,
+ IndexOptions indexOptions, DocValues.Type docValuesType, DocValues.Type normType) {
// don't check modifiable here since we use that to initially build up FIs
if (globalFieldNumbers != null) {
globalFieldNumbers.setIfNotSet(fieldNumber, name);
}
- final FieldInfo fi = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, storePositionWithTermVector,
- storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions, docValuesType);
+ final FieldInfo fi = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, docValuesType, normType);
putInternal(fi);
return fi;
}
@@ -390,7 +377,7 @@ public final class FieldInfos implements
* doesn't exist.
*/
public FieldInfo fieldInfo(int fieldNumber) {
- return (fieldNumber >= 0) ? byNumber.get(fieldNumber) : null;
+ return (fieldNumber >= 0) ? byNumber.get(fieldNumber) : null;
}
public Iterator<FieldInfo> iterator() {
@@ -448,7 +435,7 @@ public final class FieldInfos implements
FieldInfo clone = (FieldInfo) (fieldInfo).clone();
roFis.putInternal(clone);
roFis.hasVectors |= clone.storeTermVector;
- roFis.hasProx |= clone.isIndexed && clone.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
+ roFis.hasProx |= clone.isIndexed && clone.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
roFis.hasFreq |= clone.isIndexed && clone.indexOptions != IndexOptions.DOCS_ONLY;
}
return roFis;
Modified: lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java?rev=1233096&r1=1233095&r2=1233096&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java (original)
+++ lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java Wed Jan 18 22:28:07 2012
@@ -22,7 +22,6 @@ import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;
-import java.util.Collection;
import java.util.Map;
import java.util.Comparator;
@@ -177,8 +176,8 @@ public class FilterIndexReader extends I
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
- return in.docsAndPositions(liveDocs, reuse);
+ public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
+ return in.docsAndPositions(liveDocs, reuse, needsOffsets);
}
@Override
@@ -260,6 +259,16 @@ public class FilterIndexReader extends I
}
@Override
+ public int startOffset() throws IOException {
+ return in.startOffset();
+ }
+
+ @Override
+ public int endOffset() throws IOException {
+ return in.endOffset();
+ }
+
+ @Override
public BytesRef getPayload() throws IOException {
return in.getPayload();
}
@@ -295,6 +304,11 @@ public class FilterIndexReader extends I
}
@Override
+ public FieldInfos getFieldInfos() {
+ return in.getFieldInfos();
+ }
+
+ @Override
public Fields getTermVectors(int docID)
throws IOException {
ensureOpen();
@@ -343,12 +357,6 @@ public class FilterIndexReader extends I
}
@Override
- public Collection<String> getFieldNames(IndexReader.FieldOption fieldNames) {
- ensureOpen();
- return in.getFieldNames(fieldNames);
- }
-
- @Override
public long getVersion() {
ensureOpen();
return in.getVersion();
Modified: lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/FilteredTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/FilteredTermsEnum.java?rev=1233096&r1=1233095&r2=1233096&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/FilteredTermsEnum.java (original)
+++ lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/FilteredTermsEnum.java Wed Jan 18 22:28:07 2012
@@ -171,8 +171,8 @@ public abstract class FilteredTermsEnum
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits bits, DocsAndPositionsEnum reuse) throws IOException {
- return tenum.docsAndPositions(bits, reuse);
+ public DocsAndPositionsEnum docsAndPositions(Bits bits, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
+ return tenum.docsAndPositions(bits, reuse, needsOffsets);
}
/** This enum does not support seeking!
Modified: lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java?rev=1233096&r1=1233095&r2=1233096&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java (original)
+++ lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java Wed Jan 18 22:28:07 2012
@@ -83,7 +83,7 @@ final class FreqProxTermsWriter extends
// Aggregate the storePayload as seen by the same
// field across multiple threads
- if (fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+ if (fieldInfo.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
fieldInfo.storePayloads |= fieldWriter.hasPayloads;
}
Modified: lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java?rev=1233096&r1=1233095&r2=1233096&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java (original)
+++ lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java Wed Jan 18 22:28:07 2012
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.Comparator;
import java.util.Map;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.PostingsConsumer;
@@ -43,7 +44,11 @@ final class FreqProxTermsWriterPerField
final DocumentsWriterPerThread.DocState docState;
final FieldInvertState fieldState;
IndexOptions indexOptions;
+ private boolean writeFreq;
+ private boolean writeProx;
+ private boolean writeOffsets;
PayloadAttribute payloadAttribute;
+ OffsetAttribute offsetAttribute;
public FreqProxTermsWriterPerField(TermsHashPerField termsHashPerField, FreqProxTermsWriter parent, FieldInfo fieldInfo) {
this.termsHashPerField = termsHashPerField;
@@ -51,15 +56,16 @@ final class FreqProxTermsWriterPerField
this.fieldInfo = fieldInfo;
docState = termsHashPerField.docState;
fieldState = termsHashPerField.fieldState;
- indexOptions = fieldInfo.indexOptions;
+ setIndexOptions(fieldInfo.indexOptions);
}
@Override
int getStreamCount() {
- if (fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
+ if (!writeProx) {
return 1;
- else
+ } else {
return 2;
+ }
}
@Override
@@ -74,13 +80,21 @@ final class FreqProxTermsWriterPerField
return fieldInfo.name.compareTo(other.fieldInfo.name);
}
+ // Called after flush
void reset() {
// Record, up front, whether our in-RAM format will be
// with or without term freqs:
- indexOptions = fieldInfo.indexOptions;
+ setIndexOptions(fieldInfo.indexOptions);
payloadAttribute = null;
}
+ private void setIndexOptions(IndexOptions indexOptions) {
+ this.indexOptions = indexOptions;
+ writeFreq = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
+ writeProx = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+ writeOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+ }
+
@Override
boolean start(IndexableField[] fields, int count) {
for(int i=0;i<count;i++) {
@@ -98,9 +112,16 @@ final class FreqProxTermsWriterPerField
} else {
payloadAttribute = null;
}
+ if (writeOffsets) {
+ offsetAttribute = fieldState.attributeSource.addAttribute(OffsetAttribute.class);
+ } else {
+ offsetAttribute = null;
+ }
}
void writeProx(final int termID, int proxCode) {
+ //System.out.println("writeProx termID=" + termID + " proxCode=" + proxCode);
+ assert writeProx;
final Payload payload;
if (payloadAttribute == null) {
payload = null;
@@ -113,12 +134,24 @@ final class FreqProxTermsWriterPerField
termsHashPerField.writeVInt(1, payload.length);
termsHashPerField.writeBytes(1, payload.data, payload.offset, payload.length);
hasPayloads = true;
- } else
+ } else {
termsHashPerField.writeVInt(1, proxCode<<1);
+ }
FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
postings.lastPositions[termID] = fieldState.position;
+ }
+
+ void writeOffsets(final int termID, int prevOffset) {
+ assert writeOffsets;
+ final int startOffset = offsetAttribute.startOffset();
+ final int endOffset = offsetAttribute.endOffset();
+ //System.out.println("writeOffsets termID=" + termID + " prevOffset=" + prevOffset + " startOff=" + startOffset + " endOff=" + endOffset);
+ termsHashPerField.writeVInt(1, startOffset - prevOffset);
+ termsHashPerField.writeVInt(1, endOffset - startOffset);
+ FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
+ postings.lastOffsets[termID] = startOffset;
}
@Override
@@ -129,13 +162,18 @@ final class FreqProxTermsWriterPerField
FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
postings.lastDocIDs[termID] = docState.docID;
- if (indexOptions == IndexOptions.DOCS_ONLY) {
+ if (!writeFreq) {
postings.lastDocCodes[termID] = docState.docID;
} else {
postings.lastDocCodes[termID] = docState.docID << 1;
postings.docFreqs[termID] = 1;
- if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+ if (writeProx) {
writeProx(termID, fieldState.position);
+ if (writeOffsets) {
+ writeOffsets(termID, fieldState.offset);
+ }
+ } else {
+ assert !writeOffsets;
}
}
fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
@@ -149,9 +187,10 @@ final class FreqProxTermsWriterPerField
FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
- assert indexOptions == IndexOptions.DOCS_ONLY || postings.docFreqs[termID] > 0;
+ assert !writeFreq || postings.docFreqs[termID] > 0;
- if (indexOptions == IndexOptions.DOCS_ONLY) {
+ if (!writeFreq) {
+ assert postings.docFreqs == null;
if (docState.docID != postings.lastDocIDs[termID]) {
assert docState.docID > postings.lastDocIDs[termID];
termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]);
@@ -159,59 +198,76 @@ final class FreqProxTermsWriterPerField
postings.lastDocIDs[termID] = docState.docID;
fieldState.uniqueTermCount++;
}
- } else {
- if (docState.docID != postings.lastDocIDs[termID]) {
- assert docState.docID > postings.lastDocIDs[termID]:"id: "+docState.docID + " postings ID: "+ postings.lastDocIDs[termID] + " termID: "+termID;
- // Term not yet seen in the current doc but previously
- // seen in other doc(s) since the last flush
-
- // Now that we know doc freq for previous doc,
- // write it & lastDocCode
- if (1 == postings.docFreqs[termID])
- termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]|1);
- else {
- termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]);
- termsHashPerField.writeVInt(0, postings.docFreqs[termID]);
- }
- postings.docFreqs[termID] = 1;
- fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
- postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1;
- postings.lastDocIDs[termID] = docState.docID;
- if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
- writeProx(termID, fieldState.position);
- }
- fieldState.uniqueTermCount++;
+ } else if (docState.docID != postings.lastDocIDs[termID]) {
+ assert docState.docID > postings.lastDocIDs[termID]:"id: "+docState.docID + " postings ID: "+ postings.lastDocIDs[termID] + " termID: "+termID;
+ // Term not yet seen in the current doc but previously
+ // seen in other doc(s) since the last flush
+
+ // Now that we know doc freq for previous doc,
+ // write it & lastDocCode
+ if (1 == postings.docFreqs[termID]) {
+ termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]|1);
} else {
- fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, ++postings.docFreqs[termID]);
- if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
- writeProx(termID, fieldState.position-postings.lastPositions[termID]);
+ termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]);
+ termsHashPerField.writeVInt(0, postings.docFreqs[termID]);
+ }
+ postings.docFreqs[termID] = 1;
+ fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
+ postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1;
+ postings.lastDocIDs[termID] = docState.docID;
+ if (writeProx) {
+ writeProx(termID, fieldState.position);
+ if (writeOffsets) {
+ writeOffsets(termID, fieldState.offset);
}
+ } else {
+ assert !writeOffsets;
+ }
+ fieldState.uniqueTermCount++;
+ } else {
+ fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, ++postings.docFreqs[termID]);
+ if (writeProx) {
+ writeProx(termID, fieldState.position-postings.lastPositions[termID]);
+ }
+ if (writeOffsets) {
+ writeOffsets(termID, postings.lastOffsets[termID]);
}
}
}
@Override
ParallelPostingsArray createPostingsArray(int size) {
- return new FreqProxPostingsArray(size);
+ return new FreqProxPostingsArray(size, writeFreq, writeProx, writeOffsets);
}
static final class FreqProxPostingsArray extends ParallelPostingsArray {
- public FreqProxPostingsArray(int size) {
+ public FreqProxPostingsArray(int size, boolean writeFreqs, boolean writeProx, boolean writeOffsets) {
super(size);
- docFreqs = new int[size];
+ if (writeFreqs) {
+ docFreqs = new int[size];
+ }
lastDocIDs = new int[size];
lastDocCodes = new int[size];
- lastPositions = new int[size];
+ if (writeProx) {
+ lastPositions = new int[size];
+ if (writeOffsets) {
+ lastOffsets = new int[size];
+ }
+ } else {
+ assert !writeOffsets;
+ }
+ //System.out.println("PA init freqs=" + writeFreqs + " pos=" + writeProx + " offs=" + writeOffsets);
}
int docFreqs[]; // # times this term occurs in the current doc
int lastDocIDs[]; // Last docID where this term occurred
int lastDocCodes[]; // Code for prior doc
int lastPositions[]; // Last position where this term occurred
+ int lastOffsets[]; // Last endOffset where this term occurred
@Override
ParallelPostingsArray newInstance(int size) {
- return new FreqProxPostingsArray(size);
+ return new FreqProxPostingsArray(size, docFreqs != null, lastPositions != null, lastOffsets != null);
}
@Override
@@ -221,15 +277,36 @@ final class FreqProxTermsWriterPerField
super.copyTo(toArray, numToCopy);
- System.arraycopy(docFreqs, 0, to.docFreqs, 0, numToCopy);
System.arraycopy(lastDocIDs, 0, to.lastDocIDs, 0, numToCopy);
System.arraycopy(lastDocCodes, 0, to.lastDocCodes, 0, numToCopy);
- System.arraycopy(lastPositions, 0, to.lastPositions, 0, numToCopy);
+ if (lastPositions != null) {
+ assert to.lastPositions != null;
+ System.arraycopy(lastPositions, 0, to.lastPositions, 0, numToCopy);
+ }
+ if (lastOffsets != null) {
+ assert to.lastOffsets != null;
+ System.arraycopy(lastOffsets, 0, to.lastOffsets, 0, numToCopy);
+ }
+ if (docFreqs != null) {
+ assert to.docFreqs != null;
+ System.arraycopy(docFreqs, 0, to.docFreqs, 0, numToCopy);
+ }
}
@Override
int bytesPerPosting() {
- return ParallelPostingsArray.BYTES_PER_POSTING + 4 * RamUsageEstimator.NUM_BYTES_INT;
+ int bytes = ParallelPostingsArray.BYTES_PER_POSTING + 2 * RamUsageEstimator.NUM_BYTES_INT;
+ if (lastPositions != null) {
+ bytes += RamUsageEstimator.NUM_BYTES_INT;
+ }
+ if (lastOffsets != null) {
+ bytes += RamUsageEstimator.NUM_BYTES_INT;
+ }
+ if (docFreqs != null) {
+ bytes += RamUsageEstimator.NUM_BYTES_INT;
+ }
+
+ return bytes;
}
}
@@ -246,8 +323,33 @@ final class FreqProxTermsWriterPerField
final TermsConsumer termsConsumer = consumer.addField(fieldInfo);
final Comparator<BytesRef> termComp = termsConsumer.getComparator();
+ // CONFUSING: this.indexOptions holds the index options
+ // that were current when we first saw this field. But
+ // it's possible this has changed, eg when other
+ // documents are indexed that cause a "downgrade" of the
+ // IndexOptions. So we must decode the in-RAM buffer
+ // according to this.indexOptions, but then write the
+ // new segment to the directory according to
+ // currentFieldIndexOptions:
final IndexOptions currentFieldIndexOptions = fieldInfo.indexOptions;
+ final boolean writeTermFreq = currentFieldIndexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
+ final boolean writePositions = currentFieldIndexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+ final boolean writeOffsets = currentFieldIndexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+
+ final boolean readTermFreq = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
+ final boolean readPositions = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+ final boolean readOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+
+ //System.out.println("flush readTF=" + readTermFreq + " readPos=" + readPositions + " readOffs=" + readOffsets);
+
+ // Make sure FieldInfo.update is working correctly!:
+ assert !writeTermFreq || readTermFreq;
+ assert !writePositions || readPositions;
+ assert !writeOffsets || readOffsets;
+
+ assert !writeOffsets || writePositions;
+
final Map<Term,Integer> segDeletes;
if (state.segDeletes != null && state.segDeletes.terms.size() > 0) {
segDeletes = state.segDeletes.terms;
@@ -268,12 +370,13 @@ final class FreqProxTermsWriterPerField
for (int i = 0; i < numTerms; i++) {
final int termID = termIDs[i];
+ //System.out.println("term=" + termID);
// Get BytesRef
final int textStart = postings.textStarts[termID];
termsHashPerField.bytePool.setBytesRef(text, textStart);
termsHashPerField.initReader(freq, termID, 0);
- if (fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+ if (readPositions || readOffsets) {
termsHashPerField.initReader(prox, termID, 1);
}
@@ -303,15 +406,18 @@ final class FreqProxTermsWriterPerField
int numDocs = 0;
long totTF = 0;
int docID = 0;
- int termFreq = 0;
while(true) {
+ //System.out.println(" cycle");
+ final int termDocFreq;
if (freq.eof()) {
if (postings.lastDocCodes[termID] != -1) {
// Return last doc
docID = postings.lastDocIDs[termID];
- if (indexOptions != IndexOptions.DOCS_ONLY) {
- termFreq = postings.docFreqs[termID];
+ if (readTermFreq) {
+ termDocFreq = postings.docFreqs[termID];
+ } else {
+ termDocFreq = 0;
}
postings.lastDocCodes[termID] = -1;
} else {
@@ -320,14 +426,15 @@ final class FreqProxTermsWriterPerField
}
} else {
final int code = freq.readVInt();
- if (indexOptions == IndexOptions.DOCS_ONLY) {
+ if (!readTermFreq) {
docID += code;
+ termDocFreq = 0;
} else {
docID += code >>> 1;
if ((code & 1) != 0) {
- termFreq = 1;
+ termDocFreq = 1;
} else {
- termFreq = freq.readVInt();
+ termDocFreq = freq.readVInt();
}
}
@@ -336,7 +443,6 @@ final class FreqProxTermsWriterPerField
numDocs++;
assert docID < state.numDocs: "doc=" + docID + " maxDoc=" + state.numDocs;
- final int termDocFreq = termFreq;
// NOTE: we could check here if the docID was
// deleted, and skip it. However, this is somewhat
@@ -362,45 +468,54 @@ final class FreqProxTermsWriterPerField
state.liveDocs.clear(docID);
}
- if (currentFieldIndexOptions != IndexOptions.DOCS_ONLY) {
- totTF += termDocFreq;
- }
+ totTF += termDocFreq;
// Carefully copy over the prox + payload info,
// changing the format to match Lucene's segment
// format.
- if (currentFieldIndexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
- // we do write positions & payload
+ if (readPositions || readOffsets) {
+ // we did record positions (& maybe payload) and/or offsets
int position = 0;
+ int offset = 0;
for(int j=0;j<termDocFreq;j++) {
- final int code = prox.readVInt();
- position += code >> 1;
-
- final int payloadLength;
final BytesRef thisPayload;
- if ((code & 1) != 0) {
- // This position has a payload
- payloadLength = prox.readVInt();
+ if (readPositions) {
+ final int code = prox.readVInt();
+ position += code >> 1;
+
+ if ((code & 1) != 0) {
+
+ // This position has a payload
+ final int payloadLength = prox.readVInt();
+
+ if (payload == null) {
+ payload = new BytesRef();
+ payload.bytes = new byte[payloadLength];
+ } else if (payload.bytes.length < payloadLength) {
+ payload.grow(payloadLength);
+ }
+
+ prox.readBytes(payload.bytes, 0, payloadLength);
+ payload.length = payloadLength;
+ thisPayload = payload;
- if (payload == null) {
- payload = new BytesRef();
- payload.bytes = new byte[payloadLength];
- } else if (payload.bytes.length < payloadLength) {
- payload.grow(payloadLength);
+ } else {
+ thisPayload = null;
}
- prox.readBytes(payload.bytes, 0, payloadLength);
- payload.length = payloadLength;
- thisPayload = payload;
-
- } else {
- payloadLength = 0;
- thisPayload = null;
+ if (readOffsets) {
+ final int startOffset = offset + prox.readVInt();
+ final int endOffset = startOffset + prox.readVInt();
+ offset = startOffset;
+ if (writePositions) {
+ postingsConsumer.addPosition(position, thisPayload, startOffset, endOffset);
+ }
+ } else if (writePositions) {
+ postingsConsumer.addPosition(position, thisPayload, -1, -1);
+ }
}
-
- postingsConsumer.addPosition(position, thisPayload);
}
postingsConsumer.finishDoc();
@@ -413,6 +528,4 @@ final class FreqProxTermsWriterPerField
termsConsumer.finish(sumTotalTermFreq, sumDocFreq, visitedDocs.cardinality());
}
-
}
-