You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/01/24 18:01:10 UTC
svn commit: r1438072 - in /lucene/dev/branches/lucene4547/lucene:
core/src/java/org/apache/lucene/codecs/lucene40/
core/src/java/org/apache/lucene/codecs/lucene41/
test-framework/src/java/org/apache/lucene/codecs/lucene40/
test-framework/src/java/org/a...
Author: rmuir
Date: Thu Jan 24 17:01:10 2013
New Revision: 1438072
URL: http://svn.apache.org/viewvc?rev=1438072&view=rev
Log:
4.0 sortedbytes
Removed:
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40LyingDocValuesFormat.java
lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40LyingRWDocValuesFormat.java
Modified:
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesFormat.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesReader.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41Codec.java
lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesWriter.java
lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWCodec.java
lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41/Lucene41RWCodec.java
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java?rev=1438072&r1=1438071&r2=1438072&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java Thu Jan 24 17:01:10 2013
@@ -87,7 +87,7 @@ public class Lucene40Codec extends Codec
return infosFormat;
}
- private final DocValuesFormat defaultDVFormat = new Lucene40LyingDocValuesFormat();
+ private final DocValuesFormat defaultDVFormat = new Lucene40DocValuesFormat();
@Override
public DocValuesFormat docValuesFormat() {
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesFormat.java?rev=1438072&r1=1438071&r2=1438072&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesFormat.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesFormat.java Thu Jan 24 17:01:10 2013
@@ -51,7 +51,6 @@ public class Lucene40DocValuesFormat ext
static final String VAR_INTS_CODEC_NAME = "PackedInts";
static final int VAR_INTS_VERSION_START = 0;
static final int VAR_INTS_VERSION_CURRENT = VAR_INTS_VERSION_START;
-
static final byte VAR_INTS_PACKED = 0x00;
static final byte VAR_INTS_FIXED_64 = 0x01;
@@ -75,4 +74,17 @@ public class Lucene40DocValuesFormat ext
static final String BYTES_VAR_STRAIGHT_CODEC_NAME_DAT = "VarStraightBytesDat";
static final int BYTES_VAR_STRAIGHT_VERSION_START = 0;
static final int BYTES_VAR_STRAIGHT_VERSION_CURRENT = BYTES_VAR_STRAIGHT_VERSION_START;
+
+ // constants for BYTES_FIXED_SORTED
+ static final String BYTES_FIXED_SORTED_CODEC_NAME_IDX = "FixedSortedBytesIdx";
+ static final String BYTES_FIXED_SORTED_CODEC_NAME_DAT = "FixedSortedBytesDat";
+ static final int BYTES_FIXED_SORTED_VERSION_START = 0;
+ static final int BYTES_FIXED_SORTED_VERSION_CURRENT = BYTES_FIXED_SORTED_VERSION_START;
+
+ // constants for BYTES_VAR_SORTED
+ // NOTE THIS IS NOT A BUG! 4.0 actually screwed this up (VAR_SORTED and VAR_DEREF have same codec header)
+ static final String BYTES_VAR_SORTED_CODEC_NAME_IDX = "VarDerefBytesIdx";
+ static final String BYTES_VAR_SORTED_CODEC_NAME_DAT = "VarDerefBytesDat";
+ static final int BYTES_VAR_SORTED_VERSION_START = 0;
+ static final int BYTES_VAR_SORTED_VERSION_CURRENT = BYTES_VAR_SORTED_VERSION_START;
}
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesReader.java?rev=1438072&r1=1438071&r2=1438072&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesReader.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesReader.java Thu Jan 24 17:01:10 2013
@@ -276,7 +276,7 @@ class Lucene40DocValuesReader extends Do
instance = loadBytesVarStraight(field);
break;
default:
- throw new AssertionError();
+ throw new AssertionError(); // nocommit
}
binaryInstances.put(field.number, instance);
}
@@ -355,7 +355,113 @@ class Lucene40DocValuesReader extends Do
@Override
public synchronized SortedDocValues getSorted(FieldInfo field) throws IOException {
- throw new AssertionError();
+ SortedDocValues instance = sortedInstances.get(field.number);
+ if (instance == null) {
+ String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, Integer.toString(field.number), "dat");
+ String indexName = IndexFileNames.segmentFileName(state.segmentInfo.name, Integer.toString(field.number), "idx");
+ IndexInput data = null;
+ IndexInput index = null;
+ boolean success = false;
+ try {
+ data = dir.openInput(dataName, state.context);
+ index = dir.openInput(indexName, state.context);
+ switch(LegacyDocValuesType.valueOf(field.getAttribute(legacyKey))) {
+ case BYTES_FIXED_SORTED:
+ instance = loadBytesFixedSorted(field, data, index);
+ break;
+ case BYTES_VAR_SORTED:
+ instance = loadBytesVarSorted(field, data, index);
+ break;
+ default:
+ throw new AssertionError();
+ }
+ success = true;
+ } finally {
+ if (success) {
+ IOUtils.close(data, index);
+ } else {
+ IOUtils.closeWhileHandlingException(data, index);
+ }
+ }
+ sortedInstances.put(field.number, instance);
+ }
+ return instance;
+ }
+
+ private SortedDocValues loadBytesFixedSorted(FieldInfo field, IndexInput data, IndexInput index) throws IOException {
+ CodecUtil.checkHeader(data, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_CODEC_NAME_DAT,
+ Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_START,
+ Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_CURRENT);
+ CodecUtil.checkHeader(index, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_CODEC_NAME_IDX,
+ Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_START,
+ Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_CURRENT);
+
+ final int fixedLength = data.readInt();
+ final int valueCount = index.readInt();
+
+ // nocommit? can the current impl even handle > 2G?
+ final byte[] bytes = new byte[fixedLength*valueCount];
+ data.readBytes(bytes, 0, bytes.length);
+ final PackedInts.Reader reader = PackedInts.getReader(index);
+
+ return new SortedDocValues() {
+ @Override
+ public int getOrd(int docID) {
+ return (int) reader.get(docID);
+ }
+
+ @Override
+ public void lookupOrd(int ord, BytesRef result) {
+ result.bytes = bytes;
+ result.offset = ord * fixedLength;
+ result.length = fixedLength;
+ }
+
+ @Override
+ public int getValueCount() {
+ return valueCount;
+ }
+ };
+ }
+
+ private SortedDocValues loadBytesVarSorted(FieldInfo field, IndexInput data, IndexInput index) throws IOException {
+ CodecUtil.checkHeader(data, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_DAT,
+ Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_START,
+ Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT);
+ CodecUtil.checkHeader(index, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_IDX,
+ Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_START,
+ Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT);
+
+ long maxAddress = index.readLong();
+ // nocommit? can the current impl even handle > 2G?
+ final byte[] bytes = new byte[(int)maxAddress];
+ data.readBytes(bytes, 0, bytes.length);
+
+ final PackedInts.Reader addressReader = PackedInts.getReader(index);
+ final PackedInts.Reader ordsReader = PackedInts.getReader(index);
+
+ final int valueCount = addressReader.size() - 1;
+
+ return new SortedDocValues() {
+ @Override
+ public int getOrd(int docID) {
+ return (int)ordsReader.get(docID);
+ }
+
+ @Override
+ public void lookupOrd(int ord, BytesRef result) {
+ long startAddress = addressReader.get(ord);
+ long endAddress = addressReader.get(ord+1);
+ result.bytes = bytes;
+ result.offset = (int)startAddress;
+ result.length = (int)(endAddress - startAddress);
+ }
+
+ @Override
+ public int getValueCount() {
+ return valueCount;
+ }
+ };
}
@Override
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41Codec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41Codec.java?rev=1438072&r1=1438071&r2=1438072&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41Codec.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41Codec.java Thu Jan 24 17:01:10 2013
@@ -32,7 +32,7 @@ import org.apache.lucene.codecs.StoredFi
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.compressing.CompressingStoredFieldsFormat;
import org.apache.lucene.codecs.compressing.CompressionMode;
-import org.apache.lucene.codecs.lucene40.Lucene40LyingDocValuesFormat;
+import org.apache.lucene.codecs.lucene40.Lucene40DocValuesFormat;
import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat;
import org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat;
import org.apache.lucene.codecs.lucene40.Lucene40NormsFormat;
@@ -125,7 +125,7 @@ public class Lucene41Codec extends Codec
}
private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene41");
- private final DocValuesFormat dvFormat = new Lucene40LyingDocValuesFormat();
+ private final DocValuesFormat dvFormat = new Lucene40DocValuesFormat();
private final NormsFormat normsFormat = new Lucene40NormsFormat();
@Override
Modified: lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesWriter.java?rev=1438072&r1=1438071&r2=1438072&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesWriter.java (original)
+++ lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesWriter.java Thu Jan 24 17:01:10 2013
@@ -219,12 +219,16 @@ class Lucene40DocValuesWriter extends Do
Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_CODEC_NAME_IDX,
Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_CURRENT);
+ /* values */
+
final long startPos = data.getFilePointer();
for (BytesRef v : values) {
data.writeBytes(v.bytes, v.offset, v.length);
}
+ /* addresses */
+
final long maxAddress = data.getFilePointer() - startPos;
index.writeVLong(maxAddress);
@@ -245,8 +249,121 @@ class Lucene40DocValuesWriter extends Do
@Override
public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
- assert false;
- }
+ // examine the values to determine best type to use
+ int minLength = Integer.MAX_VALUE;
+ int maxLength = Integer.MIN_VALUE;
+ for (BytesRef b : values) {
+ minLength = Math.min(minLength, b.length);
+ maxLength = Math.max(maxLength, b.length);
+ }
+
+ boolean success = false;
+ IndexOutput data = null;
+ IndexOutput index = null;
+ String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, Integer.toString(field.number), "dat");
+ String indexName = IndexFileNames.segmentFileName(state.segmentInfo.name, Integer.toString(field.number), "idx");
+
+ try {
+ data = dir.createOutput(dataName, state.context);
+ index = dir.createOutput(indexName, state.context);
+ if (minLength == maxLength) {
+ // fixed byte[]
+ addFixedSortedBytesField(field, data, index, values, docToOrd, minLength);
+ } else {
+ // var byte[]
+ addVarSortedBytesField(field, data, index, values, docToOrd);
+ }
+ success = true;
+ } finally {
+ if (success) {
+ IOUtils.close(data, index);
+ } else {
+ IOUtils.closeWhileHandlingException(data, index);
+ }
+ }
+ }
+
+ private void addFixedSortedBytesField(FieldInfo field, IndexOutput data, IndexOutput index, Iterable<BytesRef> values, Iterable<Number> docToOrd, int length) throws IOException {
+ field.putAttribute(legacyKey, LegacyDocValuesType.BYTES_FIXED_SORTED.name());
+
+ CodecUtil.writeHeader(data,
+ Lucene40DocValuesFormat.BYTES_FIXED_SORTED_CODEC_NAME_DAT,
+ Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_CURRENT);
+
+ CodecUtil.writeHeader(index,
+ Lucene40DocValuesFormat.BYTES_FIXED_SORTED_CODEC_NAME_IDX,
+ Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_CURRENT);
+
+ /* values */
+
+ data.writeInt(length);
+ int valueCount = 0;
+ for (BytesRef v : values) {
+ data.writeBytes(v.bytes, v.offset, v.length);
+ valueCount++;
+ }
+
+ /* ordinals */
+
+ index.writeInt(valueCount);
+ int maxDoc = state.segmentInfo.getDocCount();
+ assert valueCount > 0;
+ final PackedInts.Writer w = PackedInts.getWriter(index, maxDoc, PackedInts.bitsRequired(valueCount-1), PackedInts.DEFAULT);
+ for (Number n : docToOrd) {
+ w.add(n.longValue());
+ }
+ w.finish();
+ }
+
+ private void addVarSortedBytesField(FieldInfo field, IndexOutput data, IndexOutput index, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
+ field.putAttribute(legacyKey, LegacyDocValuesType.BYTES_VAR_SORTED.name());
+
+ CodecUtil.writeHeader(data,
+ Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_DAT,
+ Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT);
+
+ CodecUtil.writeHeader(index,
+ Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_IDX,
+ Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT);
+
+ /* values */
+
+ final long startPos = data.getFilePointer();
+
+ int valueCount = 0;
+ for (BytesRef v : values) {
+ data.writeBytes(v.bytes, v.offset, v.length);
+ valueCount++;
+ }
+
+ /* addresses */
+
+ final long maxAddress = data.getFilePointer() - startPos;
+ index.writeLong(maxAddress);
+
+ assert valueCount != Integer.MAX_VALUE; // unsupported by the 4.0 impl
+
+ final PackedInts.Writer w = PackedInts.getWriter(index, valueCount+1, PackedInts.bitsRequired(maxAddress), PackedInts.DEFAULT);
+ long currentPosition = 0;
+ for (BytesRef v : values) {
+ w.add(currentPosition);
+ currentPosition += v.length;
+ }
+ // write sentinel
+ assert currentPosition == maxAddress;
+ w.add(currentPosition);
+ w.finish();
+
+ /* ordinals */
+
+ final int maxDoc = state.segmentInfo.getDocCount();
+ assert valueCount > 0;
+ final PackedInts.Writer ords = PackedInts.getWriter(index, maxDoc, PackedInts.bitsRequired(valueCount-1), PackedInts.DEFAULT);
+ for (Number n : docToOrd) {
+ ords.add(n.longValue());
+ }
+ ords.finish();
+ }
@Override
public void close() throws IOException {
Modified: lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWCodec.java?rev=1438072&r1=1438071&r2=1438072&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWCodec.java (original)
+++ lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWCodec.java Thu Jan 24 17:01:10 2013
@@ -33,8 +33,7 @@ public final class Lucene40RWCodec exten
}
};
- //private final DocValuesFormat docValues = new Lucene40RWDocValuesFormat();
- private final DocValuesFormat docValues = new Lucene40LyingRWDocValuesFormat();
+ private final DocValuesFormat docValues = new Lucene40RWDocValuesFormat();
private final NormsFormat norms = new Lucene40RWNormsFormat();
@Override
Modified: lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41/Lucene41RWCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41/Lucene41RWCodec.java?rev=1438072&r1=1438071&r2=1438072&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41/Lucene41RWCodec.java (original)
+++ lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41/Lucene41RWCodec.java Thu Jan 24 17:01:10 2013
@@ -9,7 +9,7 @@ import org.apache.lucene.codecs.NormsFor
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat;
import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosWriter;
-import org.apache.lucene.codecs.lucene40.Lucene40LyingRWDocValuesFormat;
+import org.apache.lucene.codecs.lucene40.Lucene40RWDocValuesFormat;
import org.apache.lucene.codecs.lucene40.Lucene40RWNormsFormat;
/*
@@ -41,7 +41,7 @@ public class Lucene41RWCodec extends Luc
}
};
- private final DocValuesFormat docValues = new Lucene40LyingRWDocValuesFormat();
+ private final DocValuesFormat docValues = new Lucene40RWDocValuesFormat();
private final NormsFormat norms = new Lucene40RWNormsFormat();
@Override