You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/01/08 03:13:06 UTC
svn commit: r1430114 -
/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SimpleDocValuesFormat.java
Author: rmuir
Date: Tue Jan 8 02:13:06 2013
New Revision: 1430114
URL: http://svn.apache.org/viewvc?rev=1430114&view=rev
Log:
variable length byte[]
Modified:
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SimpleDocValuesFormat.java
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SimpleDocValuesFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SimpleDocValuesFormat.java?rev=1430114&r1=1430113&r2=1430114&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SimpleDocValuesFormat.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SimpleDocValuesFormat.java Tue Jan 8 02:13:06 2013
@@ -19,6 +19,7 @@ package org.apache.lucene.codecs.lucene4
import java.io.IOException;
import java.util.HashMap;
+import java.util.Iterator;
import java.util.Map;
import org.apache.lucene.codecs.CodecUtil;
@@ -106,23 +107,49 @@ public class Lucene41SimpleDocValuesForm
}
@Override
- public void addBinaryField(FieldInfo field, Iterable<BytesRef> values) throws IOException {
+ public void addBinaryField(FieldInfo field, final Iterable<BytesRef> values) throws IOException {
+ // write the byte[] data
meta.writeVInt(field.number);
- // nocommit handle var length too!!
- int length = -1;
+ int minLength = Integer.MAX_VALUE;
+ int maxLength = Integer.MIN_VALUE;
+ final long startFP = data.getFilePointer();
for(BytesRef v : values) {
- if (length == -1) {
- length = v.length;
- } else if (length != v.length) {
- throw new UnsupportedOperationException();
- }
+ minLength = Math.min(minLength, v.length);
+ maxLength = Math.max(maxLength, v.length);
+ data.writeBytes(v.bytes, v.offset, v.length);
}
- // nocommit don't hardwire fixedLength to 1:
- meta.writeByte((byte) 1);
- meta.writeVInt(length);
- meta.writeLong(data.getFilePointer());
- for(BytesRef value : values) {
- data.writeBytes(value.bytes, value.offset, value.length);
+ meta.writeVInt(minLength);
+ meta.writeVInt(maxLength);
+ meta.writeLong(startFP);
+
+ // if minLength == maxLength, its a fixed-length byte[], we are done (the addresses are implicit)
+ // otherwise, we need to record the length fields...
+ // TODO: make this more efficient. this is just as inefficient as 4.0 codec.... we can do much better.
+ if (minLength != maxLength) {
+ addNumericField(field, new Iterable<Number>() {
+ @Override
+ public Iterator<Number> iterator() {
+ final Iterator<BytesRef> inner = values.iterator();
+ return new Iterator<Number>() {
+ long addr = 0;
+
+ @Override
+ public boolean hasNext() {
+ return inner.hasNext();
+ }
+
+ @Override
+ public Number next() {
+ BytesRef b = inner.next();
+ addr += b.length;
+ return addr; // nocommit don't box
+ }
+
+ @Override
+ public void remove() { throw new UnsupportedOperationException(); }
+ };
+ }
+ });
}
}
@@ -150,7 +177,7 @@ public class Lucene41SimpleDocValuesForm
static class BinaryEntry {
long offset;
- boolean fixedLength;
+ int minLength;
int maxLength;
}
@@ -200,10 +227,10 @@ public class Lucene41SimpleDocValuesForm
} else if (DocValues.isBytes(type)) {
BinaryEntry b = readBinaryField(meta);
binaries.put(fieldNumber, b);
- if (!b.fixedLength) {
- throw new AssertionError();
- // here we will read a numerics entry for the field, too.
- // it contains the addresses as ints.
+ if (b.minLength != b.maxLength) {
+ fieldNumber = meta.readVInt(); // waste
+ // variable length byte[]: read addresses as a numeric dv field
+ numerics.put(fieldNumber, readNumericField(meta));
}
}
fieldNumber = meta.readVInt();
@@ -220,7 +247,7 @@ public class Lucene41SimpleDocValuesForm
static BinaryEntry readBinaryField(IndexInput meta) throws IOException {
BinaryEntry entry = new BinaryEntry();
- entry.fixedLength = meta.readByte() != 0;
+ entry.minLength = meta.readVInt();
entry.maxLength = meta.readVInt();
entry.offset = meta.readLong();
return entry;
@@ -228,7 +255,11 @@ public class Lucene41SimpleDocValuesForm
@Override
public NumericDocValues getNumeric(FieldInfo field) throws IOException {
+ // nocommit: user can currently get back a numericDV of the addresses...
final NumericEntry entry = numerics.get(field.number);
+ // nocommit: what are we doing with clone?!
+ final IndexInput data = this.data.clone();
+ data.seek(entry.offset);
final PackedInts.Reader reader = PackedInts.getDirectReaderNoHeader(data, entry.header);
return new NumericDocValues() {
@Override
@@ -245,20 +276,59 @@ public class Lucene41SimpleDocValuesForm
@Override
public BinaryDocValues getBinary(FieldInfo field) throws IOException {
- final BinaryEntry entry = binaries.get(field.number);
- assert entry.fixedLength;
+ BinaryEntry bytes = binaries.get(field.number);
+ if (bytes.minLength == bytes.maxLength) {
+ return getFixedBinary(field, bytes);
+ } else {
+ return getVariableBinary(field, bytes);
+ }
+ }
+
+ private BinaryDocValues getFixedBinary(FieldInfo field, final BinaryEntry bytes) {
+ // nocommit: what are we doing with clone?!
+ final IndexInput data = this.data.clone();
return new BinaryDocValues() {
@Override
public void get(int docID, BytesRef result) {
- long address = entry.offset + docID * (long)entry.maxLength;
+ long address = bytes.offset + docID * (long)bytes.maxLength;
try {
data.seek(address);
- if (result.length < entry.maxLength) {
+ if (result.length < bytes.maxLength) {
+ result.offset = 0;
+ result.bytes = new byte[bytes.maxLength];
+ }
+ data.readBytes(result.bytes, result.offset, bytes.maxLength);
+ result.length = bytes.maxLength;
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Override
+ public int size() {
+ return maxDoc;
+ }
+ };
+ }
+
+ private BinaryDocValues getVariableBinary(FieldInfo field, final BinaryEntry bytes) throws IOException {
+ // nocommit: what are we doing with clone?!
+ final IndexInput data = this.data.clone();
+ final NumericDocValues addresses = getNumeric(field);
+ return new BinaryDocValues() {
+ @Override
+ public void get(int docID, BytesRef result) {
+ long startAddress = docID == 0 ? bytes.offset : bytes.offset + addresses.get(docID-1);
+ long endAddress = bytes.offset + addresses.get(docID);
+ int length = (int) (endAddress - startAddress);
+ try {
+ data.seek(startAddress);
+ if (result.length < length) {
result.offset = 0;
- result.bytes = new byte[entry.maxLength];
+ result.bytes = new byte[length];
}
- data.readBytes(result.bytes, result.offset, entry.maxLength);
- result.length = entry.maxLength;
+ data.readBytes(result.bytes, result.offset, length);
+ result.length = length;
} catch (IOException e) {
throw new RuntimeException(e);
}
@@ -276,6 +346,7 @@ public class Lucene41SimpleDocValuesForm
return null;
}
+ // nocommit: is this not needed anymore? we can probably nuke some ctors and clean up
@Override
public SimpleDVProducer clone() {
return new Lucene41SimpleDocValuesProducer(data.clone(), numerics, binaries, maxDoc);