You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/10/21 20:58:44 UTC
svn commit: r1534320 [25/39] - in /lucene/dev/branches/lucene4956: ./
dev-tools/ dev-tools/idea/.idea/ dev-tools/idea/lucene/expressions/
dev-tools/idea/solr/contrib/velocity/ dev-tools/maven/
dev-tools/maven/lucene/ dev-tools/maven/lucene/expressions/...
Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/FastDecompressionCompressingCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/FastDecompressionCompressingCodec.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/FastDecompressionCompressingCodec.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/FastDecompressionCompressingCodec.java Mon Oct 21 18:58:24 2013
@@ -1,8 +1,6 @@
package org.apache.lucene.codecs.compressing;
-import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.NormsFormat;
-import org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat;
import org.apache.lucene.codecs.lucene42.Lucene42NormsFormat;
import org.apache.lucene.util.packed.PackedInts;
@@ -42,9 +40,4 @@ public class FastDecompressionCompressin
public NormsFormat normsFormat() {
return new Lucene42NormsFormat(PackedInts.DEFAULT);
}
-
- @Override
- public DocValuesFormat docValuesFormat() {
- return new Lucene42DocValuesFormat(PackedInts.DEFAULT);
- }
}
Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesWriter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesWriter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesWriter.java Mon Oct 21 18:58:24 2013
@@ -24,7 +24,9 @@ import java.util.TreeSet;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.codecs.MissingOrdRemapper;
import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosReader.LegacyDocValuesType;
+import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
@@ -54,7 +56,7 @@ class Lucene40DocValuesWriter extends Do
long minValue = Long.MAX_VALUE;
long maxValue = Long.MIN_VALUE;
for (Number n : values) {
- long v = n.longValue();
+ long v = n == null ? 0 : n.longValue();
minValue = Math.min(minValue, v);
maxValue = Math.max(maxValue, v);
}
@@ -92,7 +94,7 @@ class Lucene40DocValuesWriter extends Do
Lucene40DocValuesFormat.INTS_VERSION_CURRENT);
output.writeInt(1); // size
for (Number n : values) {
- output.writeByte(n.byteValue());
+ output.writeByte(n == null ? 0 : n.byteValue());
}
}
@@ -103,7 +105,7 @@ class Lucene40DocValuesWriter extends Do
Lucene40DocValuesFormat.INTS_VERSION_CURRENT);
output.writeInt(2); // size
for (Number n : values) {
- output.writeShort(n.shortValue());
+ output.writeShort(n == null ? 0 : n.shortValue());
}
}
@@ -114,7 +116,7 @@ class Lucene40DocValuesWriter extends Do
Lucene40DocValuesFormat.INTS_VERSION_CURRENT);
output.writeInt(4); // size
for (Number n : values) {
- output.writeInt(n.intValue());
+ output.writeInt(n == null ? 0 : n.intValue());
}
}
@@ -131,7 +133,7 @@ class Lucene40DocValuesWriter extends Do
// writes longs
output.writeByte(Lucene40DocValuesFormat.VAR_INTS_FIXED_64);
for (Number n : values) {
- output.writeLong(n.longValue());
+ output.writeLong(n == null ? 0 : n.longValue());
}
} else {
// writes packed ints
@@ -143,7 +145,8 @@ class Lucene40DocValuesWriter extends Do
PackedInts.bitsRequired(delta),
PackedInts.DEFAULT);
for (Number n : values) {
- writer.add(n.longValue() - minValue);
+ long v = n == null ? 0 : n.longValue();
+ writer.add(v - minValue);
}
writer.finish();
}
@@ -156,6 +159,12 @@ class Lucene40DocValuesWriter extends Do
int minLength = Integer.MAX_VALUE;
int maxLength = Integer.MIN_VALUE;
for (BytesRef b : values) {
+ if (b == null) {
+ b = new BytesRef(); // 4.0 doesnt distinguish
+ }
+ if (b.length > Lucene40DocValuesFormat.MAX_BINARY_FIELD_LENGTH) {
+ throw new IllegalArgumentException("DocValuesField \"" + field.name + "\" is too large, must be <= " + Lucene40DocValuesFormat.MAX_BINARY_FIELD_LENGTH);
+ }
minLength = Math.min(minLength, b.length);
maxLength = Math.max(maxLength, b.length);
if (uniqueValues != null) {
@@ -243,7 +252,9 @@ class Lucene40DocValuesWriter extends Do
output.writeInt(length);
for (BytesRef v : values) {
- output.writeBytes(v.bytes, v.offset, v.length);
+ if (v != null) {
+ output.writeBytes(v.bytes, v.offset, v.length);
+ }
}
}
@@ -264,7 +275,9 @@ class Lucene40DocValuesWriter extends Do
final long startPos = data.getFilePointer();
for (BytesRef v : values) {
- data.writeBytes(v.bytes, v.offset, v.length);
+ if (v != null) {
+ data.writeBytes(v.bytes, v.offset, v.length);
+ }
}
/* addresses */
@@ -279,7 +292,9 @@ class Lucene40DocValuesWriter extends Do
long currentPosition = 0;
for (BytesRef v : values) {
w.add(currentPosition);
- currentPosition += v.length;
+ if (v != null) {
+ currentPosition += v.length;
+ }
}
// write sentinel
assert currentPosition == maxAddress;
@@ -301,7 +316,7 @@ class Lucene40DocValuesWriter extends Do
// deduplicate
TreeSet<BytesRef> dictionary = new TreeSet<BytesRef>();
for (BytesRef v : values) {
- dictionary.add(BytesRef.deepCopyOf(v));
+ dictionary.add(v == null ? new BytesRef() : BytesRef.deepCopyOf(v));
}
/* values */
@@ -318,6 +333,9 @@ class Lucene40DocValuesWriter extends Do
final PackedInts.Writer w = PackedInts.getWriter(index, maxDoc, PackedInts.bitsRequired(valueCount-1), PackedInts.DEFAULT);
for (BytesRef v : values) {
+ if (v == null) {
+ v = new BytesRef();
+ }
int ord = dictionary.headSet(v).size();
w.add(ord);
}
@@ -338,7 +356,7 @@ class Lucene40DocValuesWriter extends Do
// deduplicate
TreeSet<BytesRef> dictionary = new TreeSet<BytesRef>();
for (BytesRef v : values) {
- dictionary.add(BytesRef.deepCopyOf(v));
+ dictionary.add(v == null ? new BytesRef() : BytesRef.deepCopyOf(v));
}
/* values */
@@ -359,7 +377,7 @@ class Lucene40DocValuesWriter extends Do
final PackedInts.Writer w = PackedInts.getWriter(index, maxDoc, PackedInts.bitsRequired(currentAddress), PackedInts.DEFAULT);
for (BytesRef v : values) {
- w.add(valueToAddress.get(v));
+ w.add(valueToAddress.get(v == null ? new BytesRef() : v));
}
w.finish();
}
@@ -385,6 +403,15 @@ class Lucene40DocValuesWriter extends Do
maxLength = Math.max(maxLength, b.length);
}
+ // but dont use fixed if there are missing values (we are simulating how lucene40 wrote dv...)
+ boolean anyMissing = false;
+ for (Number n : docToOrd) {
+ if (n.longValue() == -1) {
+ anyMissing = true;
+ break;
+ }
+ }
+
boolean success = false;
IndexOutput data = null;
IndexOutput index = null;
@@ -394,12 +421,22 @@ class Lucene40DocValuesWriter extends Do
try {
data = dir.createOutput(dataName, state.context);
index = dir.createOutput(indexName, state.context);
- if (minLength == maxLength) {
+ if (minLength == maxLength && !anyMissing) {
// fixed byte[]
addFixedSortedBytesField(field, data, index, values, docToOrd, minLength);
} else {
// var byte[]
- addVarSortedBytesField(field, data, index, values, docToOrd);
+ // three cases for simulating the old writer:
+ // 1. no missing
+ // 2. missing (and empty string in use): remap ord=-1 -> ord=0
+ // 3. missing (and empty string not in use): remap all ords +1, insert empty string into values
+ if (!anyMissing) {
+ addVarSortedBytesField(field, data, index, values, docToOrd);
+ } else if (minLength == 0) {
+ addVarSortedBytesField(field, data, index, values, MissingOrdRemapper.mapMissingToOrd0(docToOrd));
+ } else {
+ addVarSortedBytesField(field, data, index, MissingOrdRemapper.insertEmptyValue(values), MissingOrdRemapper.mapAllOrds(docToOrd));
+ }
}
success = true;
} finally {
Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java Mon Oct 21 18:58:24 2013
@@ -37,6 +37,7 @@ import org.apache.lucene.util.IOUtils;
* @see Lucene40FieldInfosFormat
* @lucene.experimental
*/
+@Deprecated
public class Lucene40FieldInfosWriter extends FieldInfosWriter {
/** Sole constructor. */
@@ -44,7 +45,7 @@ public class Lucene40FieldInfosWriter ex
}
@Override
- public void write(Directory directory, String segmentName, FieldInfos infos, IOContext context) throws IOException {
+ public void write(Directory directory, String segmentName, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException {
final String fileName = IndexFileNames.segmentFileName(segmentName, "", Lucene40FieldInfosFormat.FIELD_INFOS_EXTENSION);
IndexOutput output = directory.createOutput(fileName, context);
boolean success = false;
Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java Mon Oct 21 18:58:24 2013
@@ -21,20 +21,18 @@ package org.apache.lucene.codecs.lucene4
* index file format */
import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
+import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
-import org.apache.lucene.codecs.PostingsWriterBase;
-import org.apache.lucene.codecs.TermStats;
+import org.apache.lucene.codecs.PushPostingsWriterBase;
import org.apache.lucene.index.CorruptIndexException;
-import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.DocsEnum; // javadocs
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
@@ -44,7 +42,7 @@ import org.apache.lucene.util.IOUtils;
* @see Lucene40PostingsFormat
* @lucene.experimental
*/
-public final class Lucene40PostingsWriter extends PostingsWriterBase {
+public final class Lucene40PostingsWriter extends PushPostingsWriterBase {
final IndexOutput freqOut;
final IndexOutput proxOut;
@@ -67,20 +65,18 @@ public final class Lucene40PostingsWrite
*/
final int maxSkipLevels = 10;
final int totalNumDocs;
- IndexOutput termsOut;
- IndexOptions indexOptions;
- boolean storePayloads;
- boolean storeOffsets;
// Starts a new term
long freqStart;
long proxStart;
- FieldInfo fieldInfo;
int lastPayloadLength;
int lastOffsetLength;
int lastPosition;
int lastOffset;
+ final static StandardTermState emptyState = new StandardTermState();
+ StandardTermState lastState;
+
// private String segment;
/** Creates a {@link Lucene40PostingsWriter}, with the
@@ -134,8 +130,7 @@ public final class Lucene40PostingsWrite
}
@Override
- public void start(IndexOutput termsOut) throws IOException {
- this.termsOut = termsOut;
+ public void init(IndexOutput termsOut) throws IOException {
CodecUtil.writeHeader(termsOut, Lucene40PostingsReader.TERMS_CODEC, Lucene40PostingsReader.VERSION_CURRENT);
termsOut.writeInt(skipInterval); // write skipInterval
termsOut.writeInt(maxSkipLevels); // write maxSkipLevels
@@ -143,6 +138,11 @@ public final class Lucene40PostingsWrite
}
@Override
+ public BlockTermState newTermState() {
+ return new StandardTermState();
+ }
+
+ @Override
public void startTerm() {
freqStart = freqOut.getFilePointer();
//if (DEBUG) System.out.println("SPW: startTerm freqOut.fp=" + freqStart);
@@ -159,7 +159,8 @@ public final class Lucene40PostingsWrite
// Currently, this instance is re-used across fields, so
// our parent calls setField whenever the field changes
@Override
- public void setField(FieldInfo fieldInfo) {
+ public int setField(FieldInfo fieldInfo) {
+ super.setField(fieldInfo);
//System.out.println("SPW: setField");
/*
if (BlockTreeTermsWriter.DEBUG && fieldInfo.name.equals("id")) {
@@ -168,18 +169,16 @@ public final class Lucene40PostingsWrite
DEBUG = false;
}
*/
- this.fieldInfo = fieldInfo;
- indexOptions = fieldInfo.getIndexOptions();
-
- storeOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
- storePayloads = fieldInfo.hasPayloads();
+
+ lastState = emptyState;
//System.out.println(" set init blockFreqStart=" + freqStart);
//System.out.println(" set init blockProxStart=" + proxStart);
+ return 0;
}
int lastDocID;
int df;
-
+
@Override
public void startDoc(int docID, int termDocFreq) throws IOException {
// if (DEBUG) System.out.println("SPW: startDoc seg=" + segment + " docID=" + docID + " tf=" + termDocFreq + " freqOut.fp=" + freqOut.getFilePointer());
@@ -191,7 +190,7 @@ public final class Lucene40PostingsWrite
}
if ((++df % skipInterval) == 0) {
- skipListWriter.setSkipData(lastDocID, storePayloads, lastPayloadLength, storeOffsets, lastOffsetLength);
+ skipListWriter.setSkipData(lastDocID, writePayloads, lastPayloadLength, writeOffsets, lastOffsetLength);
skipListWriter.bufferSkip(df);
}
@@ -226,7 +225,7 @@ public final class Lucene40PostingsWrite
int payloadLength = 0;
- if (storePayloads) {
+ if (writePayloads) {
payloadLength = payload == null ? 0 : payload.length;
if (payloadLength != lastPayloadLength) {
@@ -240,7 +239,7 @@ public final class Lucene40PostingsWrite
proxOut.writeVInt(delta);
}
- if (storeOffsets) {
+ if (writeOffsets) {
// don't use startOffset - lastEndOffset, because this creates lots of negative vints for synonyms,
// and the numbers aren't that much smaller anyways.
int offsetDelta = startOffset - lastOffset;
@@ -265,94 +264,48 @@ public final class Lucene40PostingsWrite
public void finishDoc() {
}
- private static class PendingTerm {
- public final long freqStart;
- public final long proxStart;
- public final long skipOffset;
-
- public PendingTerm(long freqStart, long proxStart, long skipOffset) {
- this.freqStart = freqStart;
- this.proxStart = proxStart;
- this.skipOffset = skipOffset;
- }
+ private static class StandardTermState extends BlockTermState {
+ public long freqStart;
+ public long proxStart;
+ public long skipOffset;
}
- private final List<PendingTerm> pendingTerms = new ArrayList<PendingTerm>();
-
/** Called when we are done adding docs to this term */
@Override
- public void finishTerm(TermStats stats) throws IOException {
-
+ public void finishTerm(BlockTermState _state) throws IOException {
+ StandardTermState state = (StandardTermState) _state;
// if (DEBUG) System.out.println("SPW: finishTerm seg=" + segment + " freqStart=" + freqStart);
- assert stats.docFreq > 0;
+ assert state.docFreq > 0;
// TODO: wasteful we are counting this (counting # docs
// for this term) in two places?
- assert stats.docFreq == df;
-
- final long skipOffset;
+ assert state.docFreq == df;
+ state.freqStart = freqStart;
+ state.proxStart = proxStart;
if (df >= skipMinimum) {
- skipOffset = skipListWriter.writeSkip(freqOut)-freqStart;
+ state.skipOffset = skipListWriter.writeSkip(freqOut)-freqStart;
} else {
- skipOffset = -1;
+ state.skipOffset = -1;
}
-
- pendingTerms.add(new PendingTerm(freqStart, proxStart, skipOffset));
-
lastDocID = 0;
df = 0;
}
- private final RAMOutputStream bytesWriter = new RAMOutputStream();
-
@Override
- public void flushTermsBlock(int start, int count) throws IOException {
- //if (DEBUG) System.out.println("SPW: flushTermsBlock start=" + start + " count=" + count + " left=" + (pendingTerms.size()-count) + " pendingTerms.size()=" + pendingTerms.size());
-
- if (count == 0) {
- termsOut.writeByte((byte) 0);
- return;
- }
-
- assert start <= pendingTerms.size();
- assert count <= start;
-
- final int limit = pendingTerms.size() - start + count;
- final PendingTerm firstTerm = pendingTerms.get(limit - count);
- // First term in block is abs coded:
- bytesWriter.writeVLong(firstTerm.freqStart);
-
- if (firstTerm.skipOffset != -1) {
- assert firstTerm.skipOffset > 0;
- bytesWriter.writeVLong(firstTerm.skipOffset);
+ public void encodeTerm(long[] empty, DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException {
+ StandardTermState state = (StandardTermState)_state;
+ if (absolute) {
+ lastState = emptyState;
+ }
+ out.writeVLong(state.freqStart - lastState.freqStart);
+ if (state.skipOffset != -1) {
+ assert state.skipOffset > 0;
+ out.writeVLong(state.skipOffset);
}
if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
- bytesWriter.writeVLong(firstTerm.proxStart);
- }
- long lastFreqStart = firstTerm.freqStart;
- long lastProxStart = firstTerm.proxStart;
- for(int idx=limit-count+1; idx<limit; idx++) {
- final PendingTerm term = pendingTerms.get(idx);
- //if (DEBUG) System.out.println(" write term freqStart=" + term.freqStart);
- // The rest of the terms term are delta coded:
- bytesWriter.writeVLong(term.freqStart - lastFreqStart);
- lastFreqStart = term.freqStart;
- if (term.skipOffset != -1) {
- assert term.skipOffset > 0;
- bytesWriter.writeVLong(term.skipOffset);
- }
- if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
- bytesWriter.writeVLong(term.proxStart - lastProxStart);
- lastProxStart = term.proxStart;
- }
+ out.writeVLong(state.proxStart - lastState.proxStart);
}
-
- termsOut.writeVInt((int) bytesWriter.getFilePointer());
- bytesWriter.writeTo(termsOut);
- bytesWriter.reset();
-
- // Remove the terms we just wrote:
- pendingTerms.subList(limit-count, limit).clear();
+ lastState = state;
}
@Override
Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWCodec.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWCodec.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWCodec.java Mon Oct 21 18:58:24 2013
@@ -6,6 +6,8 @@ import org.apache.lucene.codecs.DocValue
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FieldInfosWriter;
import org.apache.lucene.codecs.NormsFormat;
+import org.apache.lucene.codecs.SegmentInfoFormat;
+import org.apache.lucene.util.LuceneTestCase;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -25,14 +27,31 @@ import org.apache.lucene.codecs.NormsFor
*/
/** Read-write version of Lucene40Codec for testing */
+@SuppressWarnings("deprecation")
public final class Lucene40RWCodec extends Lucene40Codec {
+
private final FieldInfosFormat fieldInfos = new Lucene40FieldInfosFormat() {
@Override
public FieldInfosWriter getFieldInfosWriter() throws IOException {
- return new Lucene40FieldInfosWriter();
+ if (!LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE) {
+ return super.getFieldInfosWriter();
+ } else {
+ return new Lucene40FieldInfosWriter();
+ }
}
};
+ private final SegmentInfoFormat infosFormat = new Lucene40SegmentInfoFormat() {
+ @Override
+ public org.apache.lucene.codecs.SegmentInfoWriter getSegmentInfoWriter() {
+ if (!LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE) {
+ return super.getSegmentInfoWriter();
+ } else {
+ return new Lucene40SegmentInfoWriter();
+ }
+ }
+ };
+
private final DocValuesFormat docValues = new Lucene40RWDocValuesFormat();
private final NormsFormat norms = new Lucene40RWNormsFormat();
@@ -50,4 +69,10 @@ public final class Lucene40RWCodec exten
public NormsFormat normsFormat() {
return norms;
}
+
+ @Override
+ public SegmentInfoFormat segmentInfoFormat() {
+ return infosFormat;
+ }
+
}
Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWDocValuesFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWDocValuesFormat.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWDocValuesFormat.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWDocValuesFormat.java Mon Oct 21 18:58:24 2013
@@ -22,15 +22,21 @@ import java.io.IOException;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.util.LuceneTestCase;
/** Read-write version of {@link Lucene40DocValuesFormat} for testing */
+@SuppressWarnings("deprecation")
public class Lucene40RWDocValuesFormat extends Lucene40DocValuesFormat {
@Override
public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
- String filename = IndexFileNames.segmentFileName(state.segmentInfo.name,
- "dv",
- IndexFileNames.COMPOUND_FILE_EXTENSION);
- return new Lucene40DocValuesWriter(state, filename, Lucene40FieldInfosReader.LEGACY_DV_TYPE_KEY);
+ if (!LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE) {
+ return super.fieldsConsumer(state);
+ } else {
+ String filename = IndexFileNames.segmentFileName(state.segmentInfo.name,
+ "dv",
+ IndexFileNames.COMPOUND_FILE_EXTENSION);
+ return new Lucene40DocValuesWriter(state, filename, Lucene40FieldInfosReader.LEGACY_DV_TYPE_KEY);
+ }
}
}
Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWNormsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWNormsFormat.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWNormsFormat.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWNormsFormat.java Mon Oct 21 18:58:24 2013
@@ -22,15 +22,21 @@ import java.io.IOException;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.util.LuceneTestCase;
/** Read-write version of {@link Lucene40NormsFormat} for testing */
+@SuppressWarnings("deprecation")
public class Lucene40RWNormsFormat extends Lucene40NormsFormat {
@Override
public DocValuesConsumer normsConsumer(SegmentWriteState state) throws IOException {
- String filename = IndexFileNames.segmentFileName(state.segmentInfo.name,
- "nrm",
- IndexFileNames.COMPOUND_FILE_EXTENSION);
- return new Lucene40DocValuesWriter(state, filename, Lucene40FieldInfosReader.LEGACY_NORM_TYPE_KEY);
+ if (!LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE) {
+ return super.normsConsumer(state);
+ } else {
+ String filename = IndexFileNames.segmentFileName(state.segmentInfo.name,
+ "nrm",
+ IndexFileNames.COMPOUND_FILE_EXTENSION);
+ return new Lucene40DocValuesWriter(state, filename, Lucene40FieldInfosReader.LEGACY_NORM_TYPE_KEY);
+ }
}
}
Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWPostingsFormat.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWPostingsFormat.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWPostingsFormat.java Mon Oct 21 18:58:24 2013
@@ -1,12 +1,5 @@
package org.apache.lucene.codecs.lucene40;
-import java.io.IOException;
-
-import org.apache.lucene.codecs.BlockTreeTermsWriter;
-import org.apache.lucene.codecs.FieldsConsumer;
-import org.apache.lucene.codecs.PostingsWriterBase;
-import org.apache.lucene.index.SegmentWriteState;
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -24,26 +17,40 @@ import org.apache.lucene.index.SegmentWr
* limitations under the License.
*/
+import java.io.IOException;
+
+import org.apache.lucene.codecs.BlockTreeTermsWriter;
+import org.apache.lucene.codecs.FieldsConsumer;
+import org.apache.lucene.codecs.PostingsWriterBase;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.util.LuceneTestCase;
+
/**
* Read-write version of {@link Lucene40PostingsFormat} for testing.
*/
+@SuppressWarnings("deprecation")
public class Lucene40RWPostingsFormat extends Lucene40PostingsFormat {
+
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
- PostingsWriterBase docs = new Lucene40PostingsWriter(state);
-
- // TODO: should we make the terms index more easily
- // pluggable? Ie so that this codec would record which
- // index impl was used, and switch on loading?
- // Or... you must make a new Codec for this?
- boolean success = false;
- try {
- FieldsConsumer ret = new BlockTreeTermsWriter(state, docs, minBlockSize, maxBlockSize);
- success = true;
- return ret;
- } finally {
- if (!success) {
- docs.close();
+ if (!LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE) {
+ return super.fieldsConsumer(state);
+ } else {
+ PostingsWriterBase docs = new Lucene40PostingsWriter(state);
+
+ // TODO: should we make the terms index more easily
+ // pluggable? Ie so that this codec would record which
+ // index impl was used, and switch on loading?
+ // Or... you must make a new Codec for this?
+ boolean success = false;
+ try {
+ FieldsConsumer ret = new BlockTreeTermsWriter(state, docs, minBlockSize, maxBlockSize);
+ success = true;
+ return ret;
+ } finally {
+ if (!success) {
+ docs.close();
+ }
}
}
}
Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41/Lucene41RWCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41/Lucene41RWCodec.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41/Lucene41RWCodec.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41/Lucene41RWCodec.java Mon Oct 21 18:58:24 2013
@@ -6,11 +6,15 @@ import org.apache.lucene.codecs.DocValue
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FieldInfosWriter;
import org.apache.lucene.codecs.NormsFormat;
+import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat;
import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosWriter;
import org.apache.lucene.codecs.lucene40.Lucene40RWDocValuesFormat;
import org.apache.lucene.codecs.lucene40.Lucene40RWNormsFormat;
+import org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat;
+import org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoWriter;
+import org.apache.lucene.util.LuceneTestCase;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -32,18 +36,34 @@ import org.apache.lucene.codecs.lucene40
/**
* Read-write version of {@link Lucene41Codec} for testing.
*/
+@SuppressWarnings("deprecation")
public class Lucene41RWCodec extends Lucene41Codec {
private final StoredFieldsFormat fieldsFormat = new Lucene41StoredFieldsFormat();
private final FieldInfosFormat fieldInfos = new Lucene40FieldInfosFormat() {
@Override
public FieldInfosWriter getFieldInfosWriter() throws IOException {
- return new Lucene40FieldInfosWriter();
+ if (!LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE) {
+ return super.getFieldInfosWriter();
+ } else {
+ return new Lucene40FieldInfosWriter();
+ }
}
};
private final DocValuesFormat docValues = new Lucene40RWDocValuesFormat();
private final NormsFormat norms = new Lucene40RWNormsFormat();
+ private final SegmentInfoFormat segmentInfosFormat = new Lucene40SegmentInfoFormat() {
+ @Override
+ public org.apache.lucene.codecs.SegmentInfoWriter getSegmentInfoWriter() {
+ if (!LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE) {
+ return super.getSegmentInfoWriter();
+ } else {
+ return new Lucene40SegmentInfoWriter();
+ }
+ }
+ };
+
@Override
public FieldInfosFormat fieldInfosFormat() {
return fieldInfos;
@@ -63,4 +83,10 @@ public class Lucene41RWCodec extends Luc
public NormsFormat normsFormat() {
return norms;
}
+
+ @Override
+ public SegmentInfoFormat segmentInfoFormat() {
+ return segmentInfosFormat;
+ }
+
}
Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockFixedIntBlockPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockFixedIntBlockPostingsFormat.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockFixedIntBlockPostingsFormat.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockFixedIntBlockPostingsFormat.java Mon Oct 21 18:58:24 2013
@@ -37,8 +37,8 @@ import org.apache.lucene.codecs.sep.IntI
import org.apache.lucene.codecs.sep.IntStreamFactory;
import org.apache.lucene.codecs.sep.SepPostingsReader;
import org.apache.lucene.codecs.sep.SepPostingsWriter;
-import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.*;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockVariableIntBlockPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockVariableIntBlockPostingsFormat.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockVariableIntBlockPostingsFormat.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockVariableIntBlockPostingsFormat.java Mon Oct 21 18:58:24 2013
@@ -37,8 +37,8 @@ import org.apache.lucene.codecs.sep.IntI
import org.apache.lucene.codecs.sep.IntStreamFactory;
import org.apache.lucene.codecs.sep.SepPostingsReader;
import org.apache.lucene.codecs.sep.SepPostingsWriter;
-import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java Mon Oct 21 18:58:24 2013
@@ -40,6 +40,10 @@ import org.apache.lucene.codecs.blockter
import org.apache.lucene.codecs.blockterms.VariableGapTermsIndexWriter;
import org.apache.lucene.codecs.lucene41.Lucene41PostingsReader;
import org.apache.lucene.codecs.lucene41.Lucene41PostingsWriter;
+import org.apache.lucene.codecs.memory.FSTOrdTermsReader;
+import org.apache.lucene.codecs.memory.FSTOrdTermsWriter;
+import org.apache.lucene.codecs.memory.FSTTermsReader;
+import org.apache.lucene.codecs.memory.FSTTermsWriter;
import org.apache.lucene.codecs.mockintblock.MockFixedIntBlockPostingsFormat;
import org.apache.lucene.codecs.mockintblock.MockVariableIntBlockPostingsFormat;
import org.apache.lucene.codecs.mocksep.MockSingleIntFactory;
@@ -183,12 +187,33 @@ public final class MockRandomPostingsFor
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: writing pulsing postings with totTFCutoff=" + totTFCutoff);
}
- postingsWriter = new PulsingPostingsWriter(totTFCutoff, postingsWriter);
+ postingsWriter = new PulsingPostingsWriter(state, totTFCutoff, postingsWriter);
}
final FieldsConsumer fields;
+ final int t1 = random.nextInt(4);
- if (random.nextBoolean()) {
+ if (t1 == 0) {
+ boolean success = false;
+ try {
+ fields = new FSTTermsWriter(state, postingsWriter);
+ success = true;
+ } finally {
+ if (!success) {
+ postingsWriter.close();
+ }
+ }
+ } else if (t1 == 1) {
+ boolean success = false;
+ try {
+ fields = new FSTOrdTermsWriter(state, postingsWriter);
+ success = true;
+ } finally {
+ if (!success) {
+ postingsWriter.close();
+ }
+ }
+ } else if (t1 == 2) {
// Use BlockTree terms dict
if (LuceneTestCase.VERBOSE) {
@@ -322,12 +347,32 @@ public final class MockRandomPostingsFor
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: reading pulsing postings with totTFCutoff=" + totTFCutoff);
}
- postingsReader = new PulsingPostingsReader(postingsReader);
+ postingsReader = new PulsingPostingsReader(state, postingsReader);
}
final FieldsProducer fields;
-
- if (random.nextBoolean()) {
+ final int t1 = random.nextInt(4);
+ if (t1 == 0) {
+ boolean success = false;
+ try {
+ fields = new FSTTermsReader(state, postingsReader);
+ success = true;
+ } finally {
+ if (!success) {
+ postingsReader.close();
+ }
+ }
+ } else if (t1 == 1) {
+ boolean success = false;
+ try {
+ fields = new FSTOrdTermsReader(state, postingsReader);
+ success = true;
+ } finally {
+ if (!success) {
+ postingsReader.close();
+ }
+ }
+ } else if (t1 == 2) {
// Use BlockTree terms dict
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: reading BlockTree terms dict");
Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSepPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSepPostingsFormat.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSepPostingsFormat.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSepPostingsFormat.java Mon Oct 21 18:58:24 2013
@@ -32,8 +32,8 @@ import org.apache.lucene.codecs.blockter
import org.apache.lucene.codecs.blockterms.TermsIndexWriterBase;
import org.apache.lucene.codecs.sep.SepPostingsReader;
import org.apache.lucene.codecs.sep.SepPostingsWriter;
-import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.BytesRef;
/**
Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSingleIntIndexOutput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSingleIntIndexOutput.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSingleIntIndexOutput.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSingleIntIndexOutput.java Mon Oct 21 18:58:24 2013
@@ -18,6 +18,7 @@ package org.apache.lucene.codecs.mocksep
*/
import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IOUtils;
@@ -86,7 +87,7 @@ public class MockSingleIntIndexOutput ex
}
}
@Override
- public void write(IndexOutput indexOut, boolean absolute)
+ public void write(DataOutput indexOut, boolean absolute)
throws IOException {
if (absolute) {
indexOut.writeVLong(fp);
Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java Mon Oct 21 18:58:24 2013
@@ -57,8 +57,8 @@ public final class NestedPulsingPostings
try {
docsWriter = new Lucene41PostingsWriter(state);
- pulsingWriterInner = new PulsingPostingsWriter(2, docsWriter);
- pulsingWriter = new PulsingPostingsWriter(1, pulsingWriterInner);
+ pulsingWriterInner = new PulsingPostingsWriter(state, 2, docsWriter);
+ pulsingWriter = new PulsingPostingsWriter(state, 1, pulsingWriterInner);
FieldsConsumer ret = new BlockTreeTermsWriter(state, pulsingWriter,
BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
success = true;
@@ -78,8 +78,8 @@ public final class NestedPulsingPostings
boolean success = false;
try {
docsReader = new Lucene41PostingsReader(state.directory, state.fieldInfos, state.segmentInfo, state.context, state.segmentSuffix);
- pulsingReaderInner = new PulsingPostingsReader(docsReader);
- pulsingReader = new PulsingPostingsReader(pulsingReaderInner);
+ pulsingReaderInner = new PulsingPostingsReader(state, docsReader);
+ pulsingReader = new PulsingPostingsReader(state, pulsingReaderInner);
FieldsProducer ret = new BlockTreeTermsReader(
state.directory, state.fieldInfos, state.segmentInfo,
pulsingReader,
Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java Mon Oct 21 18:58:24 2013
@@ -20,7 +20,6 @@ package org.apache.lucene.codecs.ramonly
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
-import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
@@ -32,14 +31,13 @@ import java.util.concurrent.atomic.Atomi
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
-import org.apache.lucene.codecs.PostingsConsumer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.TermStats;
-import org.apache.lucene.codecs.TermsConsumer;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
@@ -49,7 +47,9 @@ import org.apache.lucene.store.IndexInpu
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.RamUsageEstimator;
/** Stores all postings data in RAM, but writes a small
* token (header + single int) to identify which "slot" the
@@ -59,41 +59,6 @@ import org.apache.lucene.util.IOUtils;
public final class RAMOnlyPostingsFormat extends PostingsFormat {
- // For fun, test that we can override how terms are
- // sorted, and basic things still work -- this comparator
- // sorts in reversed unicode code point order:
- private static final Comparator<BytesRef> reverseUnicodeComparator = new Comparator<BytesRef>() {
- @Override
- public int compare(BytesRef t1, BytesRef t2) {
- byte[] b1 = t1.bytes;
- byte[] b2 = t2.bytes;
- int b1Stop;
- int b1Upto = t1.offset;
- int b2Upto = t2.offset;
- if (t1.length < t2.length) {
- b1Stop = t1.offset + t1.length;
- } else {
- b1Stop = t1.offset + t2.length;
- }
- while(b1Upto < b1Stop) {
- final int bb1 = b1[b1Upto++] & 0xff;
- final int bb2 = b2[b2Upto++] & 0xff;
- if (bb1 != bb2) {
- //System.out.println("cmp 1=" + t1 + " 2=" + t2 + " return " + (bb2-bb1));
- return bb2 - bb1;
- }
- }
-
- // One is prefix of another, or they are equal
- return t2.length-t1.length;
- }
-
- @Override
- public boolean equals(Object other) {
- return this == other;
- }
- };
-
public RAMOnlyPostingsFormat() {
super("RAMOnly");
}
@@ -120,6 +85,15 @@ public final class RAMOnlyPostingsFormat
@Override
public void close() {
}
+
+ @Override
+ public long ramBytesUsed() {
+ long sizeInBytes = 0;
+ for(RAMField field : fieldToTerms.values()) {
+ sizeInBytes += field.ramBytesUsed();
+ }
+ return sizeInBytes;
+ }
}
static class RAMField extends Terms {
@@ -135,6 +109,15 @@ public final class RAMOnlyPostingsFormat
this.info = info;
}
+ /** Returns approximate RAM bytes used */
+ public long ramBytesUsed() {
+ long sizeInBytes = 0;
+ for(RAMTerm term : termToDocs.values()) {
+ sizeInBytes += term.ramBytesUsed();
+ }
+ return sizeInBytes;
+ }
+
@Override
public long size() {
return termToDocs.size();
@@ -161,8 +144,8 @@ public final class RAMOnlyPostingsFormat
}
@Override
- public Comparator<BytesRef> getComparator() {
- return reverseUnicodeComparator;
+ public boolean hasFreqs() {
+ return info.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
}
@Override
@@ -188,6 +171,15 @@ public final class RAMOnlyPostingsFormat
public RAMTerm(String term) {
this.term = term;
}
+
+ /** Returns approximate RAM bytes used */
+ public long ramBytesUsed() {
+ long sizeInBytes = 0;
+ for(RAMDoc rDoc : docs) {
+ sizeInBytes += rDoc.ramBytesUsed();
+ }
+ return sizeInBytes;
+ }
}
static class RAMDoc {
@@ -199,6 +191,19 @@ public final class RAMOnlyPostingsFormat
this.docID = docID;
positions = new int[freq];
}
+
+ /** Returns approximate RAM bytes used */
+ public long ramBytesUsed() {
+ long sizeInBytes = 0;
+ sizeInBytes += (positions!=null) ? RamUsageEstimator.sizeOf(positions) : 0;
+
+ if (payloads != null) {
+ for(byte[] payload: payloads) {
+ sizeInBytes += (payload!=null) ? RamUsageEstimator.sizeOf(payload) : 0;
+ }
+ }
+ return sizeInBytes;
+ }
}
// Classes for writing to the postings state
@@ -206,29 +211,128 @@ public final class RAMOnlyPostingsFormat
private final RAMPostings postings;
private final RAMTermsConsumer termsConsumer = new RAMTermsConsumer();
+ private final SegmentWriteState state;
- public RAMFieldsConsumer(RAMPostings postings) {
+ public RAMFieldsConsumer(SegmentWriteState writeState, RAMPostings postings) {
this.postings = postings;
+ this.state = writeState;
}
@Override
- public TermsConsumer addField(FieldInfo field) {
- if (field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) {
- throw new UnsupportedOperationException("this codec cannot index offsets");
- }
- RAMField ramField = new RAMField(field.name, field);
- postings.fieldToTerms.put(field.name, ramField);
- termsConsumer.reset(ramField);
- return termsConsumer;
- }
+ public void write(Fields fields) throws IOException {
+ for(String field : fields) {
- @Override
- public void close() {
- // TODO: finalize stuff
+ Terms terms = fields.terms(field);
+ if (terms == null) {
+ continue;
+ }
+
+ TermsEnum termsEnum = terms.iterator(null);
+
+ FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
+ if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) {
+ throw new UnsupportedOperationException("this codec cannot index offsets");
+ }
+
+ RAMField ramField = new RAMField(field, fieldInfo);
+ postings.fieldToTerms.put(field, ramField);
+ termsConsumer.reset(ramField);
+
+ FixedBitSet docsSeen = new FixedBitSet(state.segmentInfo.getDocCount());
+ long sumTotalTermFreq = 0;
+ long sumDocFreq = 0;
+ DocsEnum docsEnum = null;
+ DocsAndPositionsEnum posEnum = null;
+ int enumFlags;
+
+ IndexOptions indexOptions = fieldInfo.getIndexOptions();
+ boolean writeFreqs = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
+ boolean writePositions = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+ boolean writeOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+ boolean writePayloads = fieldInfo.hasPayloads();
+
+ if (writeFreqs == false) {
+ enumFlags = 0;
+ } else if (writePositions == false) {
+ enumFlags = DocsEnum.FLAG_FREQS;
+ } else if (writeOffsets == false) {
+ if (writePayloads) {
+ enumFlags = DocsAndPositionsEnum.FLAG_PAYLOADS;
+ } else {
+ enumFlags = 0;
+ }
+ } else {
+ if (writePayloads) {
+ enumFlags = DocsAndPositionsEnum.FLAG_PAYLOADS | DocsAndPositionsEnum.FLAG_OFFSETS;
+ } else {
+ enumFlags = DocsAndPositionsEnum.FLAG_OFFSETS;
+ }
+ }
+
+ while (true) {
+ BytesRef term = termsEnum.next();
+ if (term == null) {
+ break;
+ }
+ RAMPostingsWriterImpl postingsWriter = termsConsumer.startTerm(term);
+
+ if (writePositions) {
+ posEnum = termsEnum.docsAndPositions(null, posEnum, enumFlags);
+ docsEnum = posEnum;
+ } else {
+ docsEnum = termsEnum.docs(null, docsEnum, enumFlags);
+ posEnum = null;
+ }
+
+ int docFreq = 0;
+ long totalTermFreq = 0;
+ while (true) {
+ int docID = docsEnum.nextDoc();
+ if (docID == DocsEnum.NO_MORE_DOCS) {
+ break;
+ }
+ docsSeen.set(docID);
+ docFreq++;
+
+ int freq;
+ if (writeFreqs) {
+ freq = docsEnum.freq();
+ totalTermFreq += freq;
+ } else {
+ freq = -1;
+ }
+
+ postingsWriter.startDoc(docID, freq);
+ if (writePositions) {
+ for (int i=0;i<freq;i++) {
+ int pos = posEnum.nextPosition();
+ BytesRef payload = writePayloads ? posEnum.getPayload() : null;
+ int startOffset;
+ int endOffset;
+ if (writeOffsets) {
+ startOffset = posEnum.startOffset();
+ endOffset = posEnum.endOffset();
+ } else {
+ startOffset = -1;
+ endOffset = -1;
+ }
+ postingsWriter.addPosition(pos, payload, startOffset, endOffset);
+ }
+ }
+
+ postingsWriter.finishDoc();
+ }
+ termsConsumer.finishTerm(term, new TermStats(docFreq, totalTermFreq));
+ sumDocFreq += docFreq;
+ sumTotalTermFreq += totalTermFreq;
+ }
+
+ termsConsumer.finish(sumTotalTermFreq, sumDocFreq, docsSeen.cardinality());
+ }
}
}
- private static class RAMTermsConsumer extends TermsConsumer {
+ private static class RAMTermsConsumer {
private RAMField field;
private final RAMPostingsWriterImpl postingsWriter = new RAMPostingsWriterImpl();
RAMTerm current;
@@ -237,21 +341,13 @@ public final class RAMOnlyPostingsFormat
this.field = field;
}
- @Override
- public PostingsConsumer startTerm(BytesRef text) {
+ public RAMPostingsWriterImpl startTerm(BytesRef text) {
final String term = text.utf8ToString();
current = new RAMTerm(term);
postingsWriter.reset(current);
return postingsWriter;
}
-
- @Override
- public Comparator<BytesRef> getComparator() {
- return BytesRef.getUTF8SortedAsUnicodeComparator();
- }
-
- @Override
public void finishTerm(BytesRef text, TermStats stats) {
assert stats.docFreq > 0;
assert stats.docFreq == current.docs.size();
@@ -259,7 +355,6 @@ public final class RAMOnlyPostingsFormat
field.termToDocs.put(current.term, current);
}
- @Override
public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) {
field.sumTotalTermFreq = sumTotalTermFreq;
field.sumDocFreq = sumDocFreq;
@@ -267,7 +362,7 @@ public final class RAMOnlyPostingsFormat
}
}
- static class RAMPostingsWriterImpl extends PostingsConsumer {
+ static class RAMPostingsWriterImpl {
private RAMTerm term;
private RAMDoc current;
private int posUpto = 0;
@@ -276,14 +371,12 @@ public final class RAMOnlyPostingsFormat
this.term = term;
}
- @Override
public void startDoc(int docID, int freq) {
current = new RAMDoc(docID, freq);
term.docs.add(current);
posUpto = 0;
}
- @Override
public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) {
assert startOffset == -1;
assert endOffset == -1;
@@ -298,7 +391,6 @@ public final class RAMOnlyPostingsFormat
posUpto++;
}
- @Override
public void finishDoc() {
assert posUpto == current.positions.length;
}
@@ -314,11 +406,6 @@ public final class RAMOnlyPostingsFormat
}
@Override
- public Comparator<BytesRef> getComparator() {
- return BytesRef.getUTF8SortedAsUnicodeComparator();
- }
-
- @Override
public BytesRef next() {
if (it == null) {
if (current == null) {
@@ -545,7 +632,7 @@ public final class RAMOnlyPostingsFormat
}
final RAMPostings postings = new RAMPostings();
- final RAMFieldsConsumer consumer = new RAMFieldsConsumer(postings);
+ final RAMFieldsConsumer consumer = new RAMFieldsConsumer(writeState, postings);
synchronized(state) {
state.put(id, postings);
Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/index/AssertingAtomicReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/index/AssertingAtomicReader.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/index/AssertingAtomicReader.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/index/AssertingAtomicReader.java Mon Oct 21 18:58:24 2013
@@ -438,14 +438,14 @@ public class AssertingAtomicReader exten
this.in = in;
this.maxDoc = maxDoc;
this.valueCount = in.getValueCount();
- assert valueCount >= 1 && valueCount <= maxDoc;
+ assert valueCount >= 0 && valueCount <= maxDoc;
}
@Override
public int getOrd(int docID) {
assert docID >= 0 && docID < maxDoc;
int ord = in.getOrd(docID);
- assert ord >= 0 && ord < valueCount;
+ assert ord >= -1 && ord < valueCount;
return ord;
}
@@ -607,6 +607,54 @@ public class AssertingAtomicReader exten
return null;
}
}
+
+ /** Wraps a Bits but with additional asserts */
+ public static class AssertingBits implements Bits {
+ final Bits in;
+
+ public AssertingBits(Bits in) {
+ this.in = in;
+ }
+
+ @Override
+ public boolean get(int index) {
+ assert index >= 0 && index < length();
+ return in.get(index);
+ }
+
+ @Override
+ public int length() {
+ return in.length();
+ }
+ }
+
+ @Override
+ public Bits getLiveDocs() {
+ Bits liveDocs = super.getLiveDocs();
+ if (liveDocs != null) {
+ assert maxDoc() == liveDocs.length();
+ liveDocs = new AssertingBits(liveDocs);
+ } else {
+ assert maxDoc() == numDocs();
+ assert !hasDeletions();
+ }
+ return liveDocs;
+ }
+
+ @Override
+ public Bits getDocsWithField(String field) throws IOException {
+ Bits docsWithField = super.getDocsWithField(field);
+ FieldInfo fi = getFieldInfos().fieldInfo(field);
+ if (docsWithField != null) {
+ assert fi != null;
+ assert fi.hasDocValues();
+ assert maxDoc() == docsWithField.length();
+ docsWithField = new AssertingBits(docsWithField);
+ } else {
+ assert fi == null || fi.hasDocValues() == false;
+ }
+ return docsWithField;
+ }
// this is the same hack as FCInvisible
@Override