You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2012/07/04 10:28:07 UTC
svn commit: r1357166 [1/2] - in
/lucene/dev/branches/branch_4x/lucene/core/src:
java/org/apache/lucene/codecs/
java/org/apache/lucene/codecs/lucene40/values/ java/org/apache/lucene/util/
java/org/apache/lucene/util/packed/ test/org/apache/lucene/util/p...
Author: jpountz
Date: Wed Jul 4 08:28:04 2012
New Revision: 1357166
URL: http://svn.apache.org/viewvc?rev=1357166&view=rev
Log:
LUCENE-4161: Make packed int arrays usable by codecs (merged from r1357159).
Added:
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/LongsRef.java
- copied unchanged from r1357159, lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/LongsRef.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperation.java
- copied unchanged from r1357159, lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperation.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/gen_BulkOperation.py
- copied unchanged from r1357159, lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/gen_BulkOperation.py
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/gen_Direct.py
- copied unchanged from r1357159, lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/gen_Direct.py
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/gen_PackedThreeBlocks.py
- copied unchanged from r1357159, lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/gen_PackedThreeBlocks.py
Removed:
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Packed64SingleBlockReaderIterator.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Packed64SingleBlockWriter.java
Modified:
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/FixedGapTermsIndexReader.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Direct16.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Direct32.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Direct64.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Direct8.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Packed16ThreeBlocks.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Packed64.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Packed64SingleBlock.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Packed8ThreeBlocks.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/PackedReaderIterator.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/PackedWriter.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/gen_Packed64SingleBlock.py
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/package.html
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/FixedGapTermsIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/FixedGapTermsIndexReader.java?rev=1357166&r1=1357165&r2=1357166&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/FixedGapTermsIndexReader.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/FixedGapTermsIndexReader.java Wed Jul 4 08:28:04 2012
@@ -315,10 +315,10 @@ public class FixedGapTermsIndexReader ex
try {
// Subsample the index terms
clone1.seek(packedIndexStart);
- final PackedInts.ReaderIterator termsDictOffsetsIter = PackedInts.getReaderIterator(clone1);
+ final PackedInts.ReaderIterator termsDictOffsetsIter = PackedInts.getReaderIterator(clone1, PackedInts.DEFAULT_BUFFER_SIZE);
clone2.seek(packedOffsetsStart);
- final PackedInts.ReaderIterator termOffsetsIter = PackedInts.getReaderIterator(clone2);
+ final PackedInts.ReaderIterator termOffsetsIter = PackedInts.getReaderIterator(clone2, PackedInts.DEFAULT_BUFFER_SIZE);
// TODO: often we can get by w/ fewer bits per
// value, below.. .but this'd be more complex:
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java?rev=1357166&r1=1357165&r2=1357166&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java Wed Jul 4 08:28:04 2012
@@ -121,7 +121,7 @@ class VarStraightBytesImpl {
final IndexInput cloneIdx = reader.cloneIndex();
try {
numDataBytes = cloneIdx.readVLong();
- final ReaderIterator iter = PackedInts.getReaderIterator(cloneIdx);
+ final ReaderIterator iter = PackedInts.getReaderIterator(cloneIdx, PackedInts.DEFAULT_BUFFER_SIZE);
for (int i = 0; i < maxDocs; i++) {
long offset = iter.next();
++lastDocID;
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Direct16.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Direct16.java?rev=1357166&r1=1357165&r2=1357166&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Direct16.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Direct16.java Wed Jul 4 08:28:04 2012
@@ -1,3 +1,5 @@
+// This file has been automatically generated, DO NOT EDIT
+
package org.apache.lucene.util.packed;
/*
@@ -24,62 +26,37 @@ import java.io.IOException;
import java.util.Arrays;
/**
- * Direct wrapping of 16 bit values to a backing array of shorts.
+ * Direct wrapping of 16-bits values to a backing array.
* @lucene.internal
*/
+final class Direct16 extends PackedInts.MutableImpl {
+ final short[] values;
-class Direct16 extends PackedInts.MutableImpl {
- private final short[] values;
- private static final int BITS_PER_VALUE = 16;
-
- public Direct16(int valueCount) {
- super(valueCount, BITS_PER_VALUE);
+ Direct16(int valueCount) {
+ super(valueCount, 16);
values = new short[valueCount];
}
- public Direct16(DataInput in, int valueCount) throws IOException {
- super(valueCount, BITS_PER_VALUE);
- short[] values = new short[valueCount];
- for(int i=0;i<valueCount;i++) {
+ Direct16(DataInput in, int valueCount) throws IOException {
+ this(valueCount);
+ for (int i = 0; i < valueCount; ++i) {
values[i] = in.readShort();
}
final int mod = valueCount % 4;
if (mod != 0) {
- final int pad = 4-mod;
- // round out long
- for(int i=0;i<pad;i++) {
+ for (int i = mod; i < 4; ++i) {
in.readShort();
}
}
-
- this.values = values;
- }
-
- /**
- * Creates an array backed by the given values.
- * </p><p>
- * Note: The values are used directly, so changes to the values will
- * affect the structure.
- * @param values used as the internal backing array.
- */
- public Direct16(short[] values) {
- super(values.length, BITS_PER_VALUE);
- this.values = values;
}
+ @Override
public long get(final int index) {
- assert index >= 0 && index < size();
- return 0xFFFFL & values[index];
+ return values[index] & 0xFFFFL;
}
public void set(final int index, final long value) {
- values[index] = (short)(value & 0xFFFF);
- }
-
- @Override
- public void fill(int fromIndex, int toIndex, long val) {
- assert (val & 0xffffL) == val;
- Arrays.fill(values, fromIndex, toIndex, (short) val);
+ values[index] = (short) (value);
}
public long ramBytesUsed() {
@@ -87,7 +64,7 @@ class Direct16 extends PackedInts.Mutabl
}
public void clear() {
- Arrays.fill(values, (short)0);
+ Arrays.fill(values, (short) 0L);
}
@Override
@@ -99,4 +76,35 @@ class Direct16 extends PackedInts.Mutabl
public boolean hasArray() {
return true;
}
+
+ @Override
+ public int get(int index, long[] arr, int off, int len) {
+ assert len > 0 : "len must be > 0 (got " + len + ")";
+ assert index >= 0 && index < valueCount;
+ assert off + len <= arr.length;
+
+ final int gets = Math.min(valueCount - index, len);
+ for (int i = index, o = off, end = index + gets; i < end; ++i, ++o) {
+ arr[o] = values[i] & 0xFFFFL;
+ }
+ return gets;
+ }
+
+ public int set(int index, long[] arr, int off, int len) {
+ assert len > 0 : "len must be > 0 (got " + len + ")";
+ assert index >= 0 && index < valueCount;
+ assert off + len <= arr.length;
+
+ final int sets = Math.min(valueCount - index, len);
+ for (int i = index, o = off, end = index + sets; i < end; ++i, ++o) {
+ values[i] = (short) arr[o];
+ }
+ return sets;
+ }
+
+ @Override
+ public void fill(int fromIndex, int toIndex, long val) {
+ assert val == (val & 0xFFFFL);
+ Arrays.fill(values, fromIndex, toIndex, (short) val);
+ }
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Direct32.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Direct32.java?rev=1357166&r1=1357165&r2=1357166&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Direct32.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Direct32.java Wed Jul 4 08:28:04 2012
@@ -1,3 +1,5 @@
+// This file has been automatically generated, DO NOT EDIT
+
package org.apache.lucene.util.packed;
/*
@@ -24,58 +26,37 @@ import java.io.IOException;
import java.util.Arrays;
/**
- * Direct wrapping of 32 bit values to a backing array of ints.
+ * Direct wrapping of 32-bits values to a backing array.
* @lucene.internal
*/
+final class Direct32 extends PackedInts.MutableImpl {
+ final int[] values;
-class Direct32 extends PackedInts.MutableImpl {
- private final int[] values;
- private static final int BITS_PER_VALUE = 32;
-
- public Direct32(int valueCount) {
- super(valueCount, BITS_PER_VALUE);
+ Direct32(int valueCount) {
+ super(valueCount, 32);
values = new int[valueCount];
}
- public Direct32(DataInput in, int valueCount) throws IOException {
- super(valueCount, BITS_PER_VALUE);
- int[] values = new int[valueCount];
- for(int i=0;i<valueCount;i++) {
+ Direct32(DataInput in, int valueCount) throws IOException {
+ this(valueCount);
+ for (int i = 0; i < valueCount; ++i) {
values[i] = in.readInt();
}
final int mod = valueCount % 2;
if (mod != 0) {
- in.readInt();
+ for (int i = mod; i < 2; ++i) {
+ in.readInt();
+ }
}
-
- this.values = values;
- }
-
- /**
- * Creates an array backed by the given values.
- * </p><p>
- * Note: The values are used directly, so changes to the given values will
- * affect the structure.
- * @param values used as the internal backing array.
- */
- public Direct32(int[] values) {
- super(values.length, BITS_PER_VALUE);
- this.values = values;
}
+ @Override
public long get(final int index) {
- assert index >= 0 && index < size();
- return 0xFFFFFFFFL & values[index];
+ return values[index] & 0xFFFFFFFFL;
}
public void set(final int index, final long value) {
- values[index] = (int)(value & 0xFFFFFFFF);
- }
-
- @Override
- public void fill(int fromIndex, int toIndex, long val) {
- assert (val & 0xffffffffL) == val;
- Arrays.fill(values, fromIndex, toIndex, (int) val);
+ values[index] = (int) (value);
}
public long ramBytesUsed() {
@@ -83,11 +64,11 @@ class Direct32 extends PackedInts.Mutabl
}
public void clear() {
- Arrays.fill(values, 0);
+ Arrays.fill(values, (int) 0L);
}
-
+
@Override
- public int[] getArray() {
+ public Object getArray() {
return values;
}
@@ -95,4 +76,35 @@ class Direct32 extends PackedInts.Mutabl
public boolean hasArray() {
return true;
}
+
+ @Override
+ public int get(int index, long[] arr, int off, int len) {
+ assert len > 0 : "len must be > 0 (got " + len + ")";
+ assert index >= 0 && index < valueCount;
+ assert off + len <= arr.length;
+
+ final int gets = Math.min(valueCount - index, len);
+ for (int i = index, o = off, end = index + gets; i < end; ++i, ++o) {
+ arr[o] = values[i] & 0xFFFFFFFFL;
+ }
+ return gets;
+ }
+
+ public int set(int index, long[] arr, int off, int len) {
+ assert len > 0 : "len must be > 0 (got " + len + ")";
+ assert index >= 0 && index < valueCount;
+ assert off + len <= arr.length;
+
+ final int sets = Math.min(valueCount - index, len);
+ for (int i = index, o = off, end = index + sets; i < end; ++i, ++o) {
+ values[i] = (int) arr[o];
+ }
+ return sets;
+ }
+
+ @Override
+ public void fill(int fromIndex, int toIndex, long val) {
+ assert val == (val & 0xFFFFFFFFL);
+ Arrays.fill(values, fromIndex, toIndex, (int) val);
+ }
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Direct64.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Direct64.java?rev=1357166&r1=1357165&r2=1357166&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Direct64.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Direct64.java Wed Jul 4 08:28:04 2012
@@ -1,3 +1,5 @@
+// This file has been automatically generated, DO NOT EDIT
+
package org.apache.lucene.util.packed;
/*
@@ -24,62 +26,67 @@ import java.io.IOException;
import java.util.Arrays;
/**
- * Direct wrapping of 64 bit values to a backing array of longs.
+ * Direct wrapping of 64-bits values to a backing array.
* @lucene.internal
*/
-class Direct64 extends PackedInts.MutableImpl {
- private final long[] values;
- private static final int BITS_PER_VALUE = 64;
+final class Direct64 extends PackedInts.MutableImpl {
+ final long[] values;
- public Direct64(int valueCount) {
- super(valueCount, BITS_PER_VALUE);
+ Direct64(int valueCount) {
+ super(valueCount, 64);
values = new long[valueCount];
}
- public Direct64(DataInput in, int valueCount) throws IOException {
- super(valueCount, BITS_PER_VALUE);
- long[] values = new long[valueCount];
- for(int i=0;i<valueCount;i++) {
+ Direct64(DataInput in, int valueCount) throws IOException {
+ this(valueCount);
+ for (int i = 0; i < valueCount; ++i) {
values[i] = in.readLong();
}
+ }
+
+ @Override
+ public long get(final int index) {
+ return values[index];
+ }
- this.values = values;
+ public void set(final int index, final long value) {
+ values[index] = (value);
}
- /**
- * Creates an array backed by the given values.
- * </p><p>
- * Note: The values are used directly, so changes to the given values will
- * affect the structure.
- * @param values used as the internal backing array.
- */
- public Direct64(long[] values) {
- super(values.length, BITS_PER_VALUE);
- this.values = values;
+ public long ramBytesUsed() {
+ return RamUsageEstimator.sizeOf(values);
}
- public long get(final int index) {
- assert index >= 0 && index < size();
- return values[index];
+ public void clear() {
+ Arrays.fill(values, 0L);
+ }
+
+ @Override
+ public Object getArray() {
+ return values;
+ }
+
+ @Override
+ public boolean hasArray() {
+ return true;
}
@Override
public int get(int index, long[] arr, int off, int len) {
+ assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
+
final int gets = Math.min(valueCount - index, len);
System.arraycopy(values, index, arr, off, gets);
return gets;
}
- public void set(final int index, final long value) {
- values[index] = value;
- }
-
- @Override
public int set(int index, long[] arr, int off, int len) {
+ assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
+
final int sets = Math.min(valueCount - index, len);
System.arraycopy(arr, off, values, index, sets);
return sets;
@@ -89,23 +96,4 @@ class Direct64 extends PackedInts.Mutabl
public void fill(int fromIndex, int toIndex, long val) {
Arrays.fill(values, fromIndex, toIndex, val);
}
-
- public long ramBytesUsed() {
- return RamUsageEstimator.sizeOf(values);
- }
-
- public void clear() {
- Arrays.fill(values, 0L);
- }
-
- @Override
- public long[] getArray() {
- return values;
- }
-
- @Override
- public boolean hasArray() {
- return true;
- }
-
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Direct8.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Direct8.java?rev=1357166&r1=1357165&r2=1357166&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Direct8.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Direct8.java Wed Jul 4 08:28:04 2012
@@ -1,3 +1,5 @@
+// This file has been automatically generated, DO NOT EDIT
+
package org.apache.lucene.util.packed;
/*
@@ -24,63 +26,37 @@ import java.io.IOException;
import java.util.Arrays;
/**
- * Direct wrapping of 8 bit values to a backing array of bytes.
+ * Direct wrapping of 8-bits values to a backing array.
* @lucene.internal
*/
+final class Direct8 extends PackedInts.MutableImpl {
+ final byte[] values;
-class Direct8 extends PackedInts.MutableImpl {
- private final byte[] values;
- private static final int BITS_PER_VALUE = 8;
-
- public Direct8(int valueCount) {
- super(valueCount, BITS_PER_VALUE);
+ Direct8(int valueCount) {
+ super(valueCount, 8);
values = new byte[valueCount];
}
- public Direct8(DataInput in, int valueCount)
- throws IOException {
- super(valueCount, BITS_PER_VALUE);
- byte[] values = new byte[valueCount];
- for(int i=0;i<valueCount;i++) {
+ Direct8(DataInput in, int valueCount) throws IOException {
+ this(valueCount);
+ for (int i = 0; i < valueCount; ++i) {
values[i] = in.readByte();
}
final int mod = valueCount % 8;
if (mod != 0) {
- final int pad = 8-mod;
- // round out long
- for(int i=0;i<pad;i++) {
+ for (int i = mod; i < 8; ++i) {
in.readByte();
}
}
-
- this.values = values;
- }
-
- /**
- * Creates an array backed by the given values.
- * </p><p>
- * Note: The values are used directly, so changes to the given values will
- * affect the structure.
- * @param values used as the internal backing array.
- */
- public Direct8(byte[] values) {
- super(values.length, BITS_PER_VALUE);
- this.values = values;
}
+ @Override
public long get(final int index) {
- assert index >= 0 && index < size();
- return 0xFFL & values[index];
+ return values[index] & 0xFFL;
}
public void set(final int index, final long value) {
- values[index] = (byte)(value & 0xFF);
- }
-
- @Override
- public void fill(int fromIndex, int toIndex, long val) {
- assert (val & 0xffL) == val;
- Arrays.fill(values, fromIndex, toIndex, (byte) val);
+ values[index] = (byte) (value);
}
public long ramBytesUsed() {
@@ -88,7 +64,7 @@ class Direct8 extends PackedInts.Mutable
}
public void clear() {
- Arrays.fill(values, (byte)0);
+ Arrays.fill(values, (byte) 0L);
}
@Override
@@ -100,4 +76,35 @@ class Direct8 extends PackedInts.Mutable
public boolean hasArray() {
return true;
}
+
+ @Override
+ public int get(int index, long[] arr, int off, int len) {
+ assert len > 0 : "len must be > 0 (got " + len + ")";
+ assert index >= 0 && index < valueCount;
+ assert off + len <= arr.length;
+
+ final int gets = Math.min(valueCount - index, len);
+ for (int i = index, o = off, end = index + gets; i < end; ++i, ++o) {
+ arr[o] = values[i] & 0xFFL;
+ }
+ return gets;
+ }
+
+ public int set(int index, long[] arr, int off, int len) {
+ assert len > 0 : "len must be > 0 (got " + len + ")";
+ assert index >= 0 && index < valueCount;
+ assert off + len <= arr.length;
+
+ final int sets = Math.min(valueCount - index, len);
+ for (int i = index, o = off, end = index + sets; i < end; ++i, ++o) {
+ values[i] = (byte) arr[o];
+ }
+ return sets;
+ }
+
+ @Override
+ public void fill(int fromIndex, int toIndex, long val) {
+ assert val == (val & 0xFFL);
+ Arrays.fill(values, fromIndex, toIndex, (byte) val);
+ }
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Packed16ThreeBlocks.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Packed16ThreeBlocks.java?rev=1357166&r1=1357165&r2=1357166&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Packed16ThreeBlocks.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Packed16ThreeBlocks.java Wed Jul 4 08:28:04 2012
@@ -1,10 +1,6 @@
-package org.apache.lucene.util.packed;
-
-import java.io.IOException;
-import java.util.Arrays;
+// This file has been automatically generated, DO NOT EDIT
-import org.apache.lucene.store.DataInput;
-import org.apache.lucene.util.RamUsageEstimator;
+package org.apache.lucene.util.packed;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -23,32 +19,38 @@ import org.apache.lucene.util.RamUsageEs
* limitations under the License.
*/
-/** 48 bitsPerValue backed by short[] */
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.util.RamUsageEstimator;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+/**
+ * Packs integers into 3 shorts (48 bits per value).
+ * @lucene.internal
+ */
final class Packed16ThreeBlocks extends PackedInts.MutableImpl {
+ final short[] blocks;
public static final int MAX_SIZE = Integer.MAX_VALUE / 3;
- private final short[] blocks;
-
Packed16ThreeBlocks(int valueCount) {
super(valueCount, 48);
if (valueCount > MAX_SIZE) {
throw new ArrayIndexOutOfBoundsException("MAX_SIZE exceeded");
}
- this.blocks = new short[3 * valueCount];
+ blocks = new short[valueCount * 3];
}
Packed16ThreeBlocks(DataInput in, int valueCount) throws IOException {
this(valueCount);
- for (int i = 0; i < blocks.length; i++) {
+ for (int i = 0; i < 3 * valueCount; ++i) {
blocks[i] = in.readShort();
}
final int mod = blocks.length % 4;
if (mod != 0) {
- final int pad = 4 - mod;
- // round out long
- for (int i = 0; i < pad; i++) {
- in.readShort();
+ for (int i = mod; i < 4; ++i) {
+ in.readShort();
}
}
}
@@ -56,26 +58,55 @@ final class Packed16ThreeBlocks extends
@Override
public long get(int index) {
final int o = index * 3;
- return (blocks[o] & 0xffffL) << 32 | (blocks[o+1] & 0xffffL) << 16 | (blocks[o+2] & 0xffffL);
+ return (blocks[o] & 0xFFFFL) << 32 | (blocks[o+1] & 0xFFFFL) << 16 | (blocks[o+2] & 0xFFFFL);
+ }
+
+ @Override
+ public int get(int index, long[] arr, int off, int len) {
+ assert len > 0 : "len must be > 0 (got " + len + ")";
+ assert index >= 0 && index < valueCount;
+ assert off + len <= arr.length;
+
+ final int gets = Math.min(valueCount - index, len);
+ for (int i = index * 3, end = (index + gets) * 3; i < end; i+=3) {
+ arr[off++] = (blocks[i] & 0xFFFFL) << 32 | (blocks[i+1] & 0xFFFFL) << 16 | (blocks[i+2] & 0xFFFFL);
+ }
+ return gets;
}
@Override
public void set(int index, long value) {
final int o = index * 3;
- blocks[o] = (short) (value >> 32);
- blocks[o+1] = (short) (value >> 16);
+ blocks[o] = (short) (value >>> 32);
+ blocks[o+1] = (short) (value >>> 16);
blocks[o+2] = (short) value;
}
@Override
+ public int set(int index, long[] arr, int off, int len) {
+ assert len > 0 : "len must be > 0 (got " + len + ")";
+ assert index >= 0 && index < valueCount;
+ assert off + len <= arr.length;
+
+ final int sets = Math.min(valueCount - index, len);
+ for (int i = off, o = index * 3, end = off + sets; i < end; ++i) {
+ final long value = arr[i];
+ blocks[o++] = (short) (value >>> 32);
+ blocks[o++] = (short) (value >>> 16);
+ blocks[o++] = (short) value;
+ }
+ return sets;
+ }
+
+ @Override
public void fill(int fromIndex, int toIndex, long val) {
- short block1 = (short) (val >> 32);
- short block2 = (short) (val >> 16);
- short block3 = (short) val;
- for (int i = fromIndex * 3, end = toIndex * 3; i < end; ) {
- blocks[i++] = block1;
- blocks[i++] = block2;
- blocks[i++] = block3;
+ final short block1 = (short) (val >>> 32);
+ final short block2 = (short) (val >>> 16);
+ final short block3 = (short) val;
+ for (int i = fromIndex * 3, end = toIndex * 3; i < end; i += 3) {
+ blocks[i] = block1;
+ blocks[i+1] = block2;
+ blocks[i+2] = block3;
}
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Packed64.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Packed64.java?rev=1357166&r1=1357165&r2=1357166&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Packed64.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Packed64.java Wed Jul 4 08:28:04 2012
@@ -139,6 +139,49 @@ class Packed64 extends PackedInts.Mutabl
}
@Override
+ public int get(int index, long[] arr, int off, int len) {
+ assert len > 0 : "len must be > 0 (got " + len + ")";
+ assert index >= 0 && index < valueCount;
+ len = Math.min(len, valueCount - index);
+ assert off + len <= arr.length;
+
+ final int originalIndex = index;
+ final BulkOperation op = BulkOperation.of(PackedInts.Format.PACKED, bitsPerValue);
+
+ // go to the next block where the value does not span across two blocks
+ final int offsetInBlocks = index % op.values();
+ if (offsetInBlocks != 0) {
+ for (int i = offsetInBlocks; i < op.values() && len > 0; ++i) {
+ arr[off++] = get(index++);
+ --len;
+ }
+ if (len == 0) {
+ return index - originalIndex;
+ }
+ }
+
+ // bulk get
+ assert index % op.values() == 0;
+ int blockIndex = (int) ((long) index * bitsPerValue) >>> BLOCK_BITS;
+ assert (((long)index * bitsPerValue) & MOD_MASK) == 0;
+ final int iterations = len / op.values();
+ op.get(blocks, blockIndex, arr, off, iterations);
+ final int gotValues = iterations * op.values();
+ index += gotValues;
+ len -= gotValues;
+ assert len >= 0;
+
+ if (index > originalIndex) {
+ // stay at the block boundary
+ return index - originalIndex;
+ } else {
+ // no progress so far => already at a block boundary but no full block to get
+ assert index == originalIndex;
+ return super.get(index, arr, off, len);
+ }
+ }
+
+ @Override
public void set(final int index, final long value) {
// The abstract index in a contiguous bit stream
final long majorBitPos = (long)index * bitsPerValue;
@@ -159,6 +202,48 @@ class Packed64 extends PackedInts.Mutabl
| (value << (BLOCK_SIZE - endBits));
}
+ @Override
+ public int set(int index, long[] arr, int off, int len) {
+ assert len > 0 : "len must be > 0 (got " + len + ")";
+ assert index >= 0 && index < valueCount;
+ len = Math.min(len, valueCount - index);
+ assert off + len <= arr.length;
+
+ final int originalIndex = index;
+ final BulkOperation op = BulkOperation.of(PackedInts.Format.PACKED, bitsPerValue);
+
+ // go to the next block where the value does not span across two blocks
+ final int offsetInBlocks = index % op.values();
+ if (offsetInBlocks != 0) {
+ for (int i = offsetInBlocks; i < op.values() && len > 0; ++i) {
+ set(index++, arr[off++]);
+ --len;
+ }
+ if (len == 0) {
+ return index - originalIndex;
+ }
+ }
+
+ // bulk get
+ assert index % op.values() == 0;
+ int blockIndex = (int) ((long) index * bitsPerValue) >>> BLOCK_BITS;
+ assert (((long)index * bitsPerValue) & MOD_MASK) == 0;
+ final int iterations = len / op.values();
+ op.set(blocks, blockIndex, arr, off, iterations);
+ final int setValues = iterations * op.values();
+ index += setValues;
+ len -= setValues;
+ assert len >= 0;
+
+ if (index > originalIndex) {
+ // stay at the block boundary
+ return index - originalIndex;
+ } else {
+ // no progress so far => already at a block boundary but no full block to get
+ assert index == originalIndex;
+ return super.set(index, arr, off, len);
+ }
+ }
@Override
public String toString() {
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Packed64SingleBlock.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Packed64SingleBlock.java?rev=1357166&r1=1357165&r2=1357166&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Packed64SingleBlock.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Packed64SingleBlock.java Wed Jul 4 08:28:04 2012
@@ -39,12 +39,6 @@ abstract class Packed64SingleBlock exten
return Arrays.binarySearch(SUPPORTED_BITS_PER_VALUE, bitsPerValue) >= 0;
}
- public static float overheadPerValue(int bitsPerValue) {
- int valuesPerBlock = 64 / bitsPerValue;
- int overhead = 64 % bitsPerValue;
- return (float) overhead / valuesPerBlock;
- }
-
private static int requiredCapacity(int valueCount, int valuesPerBlock) {
return valueCount / valuesPerBlock
+ (valueCount % valuesPerBlock == 0 ? 0 : 1);
@@ -92,16 +86,14 @@ abstract class Packed64SingleBlock exten
// bulk get
assert index % valuesPerBlock == 0;
- final long readMask = (1L << bitsPerValue) - 1;
- final int startBlock = index / valuesPerBlock;
- final int endBlock = (index + len) / valuesPerBlock;
- final int diff = (endBlock - startBlock) * valuesPerBlock;
+ final BulkOperation op = BulkOperation.of(PackedInts.Format.PACKED_SINGLE_BLOCK, bitsPerValue);
+ assert op.blocks() == 1;
+ assert op.values() == valuesPerBlock;
+ final int blockIndex = index / valuesPerBlock;
+ final int nblocks = (index + len) / valuesPerBlock - blockIndex;
+ op.get(blocks, blockIndex, arr, off, nblocks);
+ final int diff = nblocks * valuesPerBlock;
index += diff; len -= diff;
- for (int block = startBlock; block < endBlock; ++block) {
- for (int i = 0; i < valuesPerBlock; ++i) {
- arr[off++] = (blocks[block] >>> (i * bitsPerValue)) & readMask;
- }
- }
if (index > originalIndex) {
// stay at the block boundary
@@ -138,17 +130,14 @@ abstract class Packed64SingleBlock exten
// bulk set
assert index % valuesPerBlock == 0;
- final int startBlock = index / valuesPerBlock;
- final int endBlock = (index + len) / valuesPerBlock;
- final int diff = (endBlock - startBlock) * valuesPerBlock;
+ final BulkOperation op = BulkOperation.of(PackedInts.Format.PACKED_SINGLE_BLOCK, bitsPerValue);
+ assert op.blocks() == 1;
+ assert op.values() == valuesPerBlock;
+ final int blockIndex = index / valuesPerBlock;
+ final int nblocks = (index + len) / valuesPerBlock - blockIndex;
+ op.set(blocks, blockIndex, arr, off, nblocks);
+ final int diff = nblocks * valuesPerBlock;
index += diff; len -= diff;
- for (int block = startBlock; block < endBlock; ++block) {
- long next = 0L;
- for (int i = 0; i < valuesPerBlock; ++i) {
- next |= (arr[off++] << (i * bitsPerValue));
- }
- blocks[block] = next;
- }
if (index > originalIndex) {
// stay at the block boundary
@@ -202,8 +191,8 @@ abstract class Packed64SingleBlock exten
}
@Override
- protected int getFormat() {
- return PackedInts.PACKED_SINGLE_BLOCK;
+ protected PackedInts.Format getFormat() {
+ return PackedInts.Format.PACKED_SINGLE_BLOCK;
}
@Override
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Packed8ThreeBlocks.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Packed8ThreeBlocks.java?rev=1357166&r1=1357165&r2=1357166&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Packed8ThreeBlocks.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/Packed8ThreeBlocks.java Wed Jul 4 08:28:04 2012
@@ -1,10 +1,6 @@
-package org.apache.lucene.util.packed;
-
-import java.io.IOException;
-import java.util.Arrays;
+// This file has been automatically generated, DO NOT EDIT
-import org.apache.lucene.store.DataInput;
-import org.apache.lucene.util.RamUsageEstimator;
+package org.apache.lucene.util.packed;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -23,32 +19,38 @@ import org.apache.lucene.util.RamUsageEs
* limitations under the License.
*/
-/** 24 bitsPerValue backed by byte[] */
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.util.RamUsageEstimator;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+/**
+ * Packs integers into 3 bytes (24 bits per value).
+ * @lucene.internal
+ */
final class Packed8ThreeBlocks extends PackedInts.MutableImpl {
+ final byte[] blocks;
public static final int MAX_SIZE = Integer.MAX_VALUE / 3;
- private final byte[] blocks;
-
Packed8ThreeBlocks(int valueCount) {
super(valueCount, 24);
if (valueCount > MAX_SIZE) {
throw new ArrayIndexOutOfBoundsException("MAX_SIZE exceeded");
}
- this.blocks = new byte[3 * valueCount];
+ blocks = new byte[valueCount * 3];
}
Packed8ThreeBlocks(DataInput in, int valueCount) throws IOException {
this(valueCount);
- for (int i = 0; i < blocks.length; i++) {
+ for (int i = 0; i < 3 * valueCount; ++i) {
blocks[i] = in.readByte();
}
final int mod = blocks.length % 8;
if (mod != 0) {
- final int pad = 8 - mod;
- // round out long
- for (int i = 0; i < pad; i++) {
- in.readByte();
+ for (int i = mod; i < 8; ++i) {
+ in.readByte();
}
}
}
@@ -56,26 +58,55 @@ final class Packed8ThreeBlocks extends P
@Override
public long get(int index) {
final int o = index * 3;
- return (blocks[o] & 0xffL) << 16 | (blocks[o+1] & 0xffL) << 8 | (blocks[o+2] & 0xffL);
+ return (blocks[o] & 0xFFL) << 16 | (blocks[o+1] & 0xFFL) << 8 | (blocks[o+2] & 0xFFL);
+ }
+
+ @Override
+ public int get(int index, long[] arr, int off, int len) {
+ assert len > 0 : "len must be > 0 (got " + len + ")";
+ assert index >= 0 && index < valueCount;
+ assert off + len <= arr.length;
+
+ final int gets = Math.min(valueCount - index, len);
+ for (int i = index * 3, end = (index + gets) * 3; i < end; i+=3) {
+ arr[off++] = (blocks[i] & 0xFFL) << 16 | (blocks[i+1] & 0xFFL) << 8 | (blocks[i+2] & 0xFFL);
+ }
+ return gets;
}
@Override
public void set(int index, long value) {
final int o = index * 3;
+ blocks[o] = (byte) (value >>> 16);
+ blocks[o+1] = (byte) (value >>> 8);
blocks[o+2] = (byte) value;
- blocks[o+1] = (byte) (value >> 8);
- blocks[o] = (byte) (value >> 16);
+ }
+
+ @Override
+ public int set(int index, long[] arr, int off, int len) {
+ assert len > 0 : "len must be > 0 (got " + len + ")";
+ assert index >= 0 && index < valueCount;
+ assert off + len <= arr.length;
+
+ final int sets = Math.min(valueCount - index, len);
+ for (int i = off, o = index * 3, end = off + sets; i < end; ++i) {
+ final long value = arr[i];
+ blocks[o++] = (byte) (value >>> 16);
+ blocks[o++] = (byte) (value >>> 8);
+ blocks[o++] = (byte) value;
+ }
+ return sets;
}
@Override
public void fill(int fromIndex, int toIndex, long val) {
- byte block1 = (byte) (val >> 16);
- byte block2 = (byte) (val >> 8);
- byte block3 = (byte) val;
- for (int i = fromIndex * 3, end = toIndex * 3; i < end; ) {
- blocks[i++] = block1;
- blocks[i++] = block2;
- blocks[i++] = block3;
+ final byte block1 = (byte) (val >>> 16);
+ final byte block2 = (byte) (val >>> 8);
+ final byte block3 = (byte) val;
+ for (int i = fromIndex * 3, end = toIndex * 3; i < end; i += 3) {
+ blocks[i] = block1;
+ blocks[i+1] = block2;
+ blocks[i+2] = block3;
}
}
@@ -93,5 +124,4 @@ final class Packed8ThreeBlocks extends P
return getClass().getSimpleName() + "(bitsPerValue=" + bitsPerValue
+ ", size=" + size() + ", elements.length=" + blocks.length + ")";
}
-
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java?rev=1357166&r1=1357165&r2=1357166&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java Wed Jul 4 08:28:04 2012
@@ -23,6 +23,7 @@ import org.apache.lucene.codecs.CodecUti
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.LongsRef;
import java.io.IOException;
@@ -34,7 +35,6 @@ import java.io.IOException;
*
* @lucene.internal
*/
-
public class PackedInts {
/**
@@ -62,12 +62,184 @@ public class PackedInts {
*/
public static final int DEFAULT_BUFFER_SIZE = 1024; // 1K
- final static String CODEC_NAME = "PackedInts";
- final static int VERSION_START = 0;
- final static int VERSION_CURRENT = VERSION_START;
+ public final static String CODEC_NAME = "PackedInts";
+ public final static int VERSION_START = 0;
+ public final static int VERSION_CURRENT = VERSION_START;
+
+ /**
+ * A format to write packed ints.
+ *
+ * @lucene.internal
+ */
+ public enum Format {
+ /**
+ * Compact format, all bits are written contiguously.
+ */
+ PACKED(0) {
+
+ @Override
+ public int nblocks(int bitsPerValue, int values) {
+ return (int) Math.ceil((double) values * bitsPerValue / 64);
+ }
+
+ },
+
+ /**
+ * A format that may insert padding bits to improve encoding and decoding
+ * speed. Since this format doesn't support all possible bits per value, you
+ * should never use it directly, but rather use
+ * {@link PackedInts#fastestFormatAndBits(int, int, float)} to find the
+ * format that best suits your needs.
+ */
+ PACKED_SINGLE_BLOCK(1) {
+
+ @Override
+ public int nblocks(int bitsPerValue, int values) {
+ final int valuesPerBlock = 64 / bitsPerValue;
+ return (int) Math.ceil((double) values / valuesPerBlock);
+ }
+
+ @Override
+ public boolean isSupported(int bitsPerValue) {
+ return Packed64SingleBlock.isSupported(bitsPerValue);
+ }
+
+ @Override
+ public float overheadPerValue(int bitsPerValue) {
+ assert isSupported(bitsPerValue);
+ final int valuesPerBlock = 64 / bitsPerValue;
+ final int overhead = 64 % bitsPerValue;
+ return (float) overhead / valuesPerBlock;
+ }
+
+ };
+
+ /**
+ * Get a format according to its ID.
+ */
+ public static Format byId(int id) {
+ for (Format format : Format.values()) {
+ if (format.getId() == id) {
+ return format;
+ }
+ }
+ throw new IllegalArgumentException("Unknown format id: " + id);
+ }
+
+ private Format(int id) {
+ this.id = id;
+ }
+
+ public int id;
+
+ /**
+ * Returns the ID of the format.
+ */
+ public int getId() {
+ return id;
+ }
+
+ /**
+ * Computes how many blocks are needed to store <code>values</code> values
+ * of size <code>bitsPerValue</code>.
+ */
+ public abstract int nblocks(int bitsPerValue, int values);
+
+ /**
+ * Tests whether the provided number of bits per value is supported by the
+ * format.
+ */
+ public boolean isSupported(int bitsPerValue) {
+ return bitsPerValue >= 1 && bitsPerValue <= 64;
+ }
+
+ /**
+ * Returns the overhead per value, in bits.
+ */
+ public float overheadPerValue(int bitsPerValue) {
+ assert isSupported(bitsPerValue);
+ return 0f;
+ }
+
+ /**
+ * Returns the overhead ratio (<code>overhead per value / bits per value</code>).
+ */
+ public final float overheadRatio(int bitsPerValue) {
+ assert isSupported(bitsPerValue);
+ return overheadPerValue(bitsPerValue) / bitsPerValue;
+ }
+ }
+
+ /**
+ * Simple class that holds a format and a number of bits per value.
+ */
+ public static class FormatAndBits {
+ public final Format format;
+ public final int bitsPerValue;
+ public FormatAndBits(Format format, int bitsPerValue) {
+ this.format = format;
+ this.bitsPerValue = bitsPerValue;
+ }
+ }
+
+ /**
+ * Try to find the {@link Format} and number of bits per value that would
+ * restore from disk the fastest reader whose overhead is less than
+ * <code>acceptableOverheadRatio</code>.
+ * </p><p>
+ * The <code>acceptableOverheadRatio</code> parameter makes sense for
+ * random-access {@link Reader}s. In case you only plan to perform
+ * sequential access on this stream later on, you should probably use
+ * {@link PackedInts#COMPACT}.
+ * </p><p>
+ * If you don't know how many values you are going to write, use
+ * <code>valueCount = -1</code>.
+ */
+ public static FormatAndBits fastestFormatAndBits(int valueCount, int bitsPerValue, float acceptableOverheadRatio) {
+ if (valueCount == -1) {
+ valueCount = Integer.MAX_VALUE;
+ }
+
+ acceptableOverheadRatio = Math.max(COMPACT, acceptableOverheadRatio);
+ acceptableOverheadRatio = Math.min(FASTEST, acceptableOverheadRatio);
+ float acceptableOverheadPerValue = acceptableOverheadRatio * bitsPerValue; // in bits
- static final int PACKED = 0;
- static final int PACKED_SINGLE_BLOCK = 1;
+ int maxBitsPerValue = bitsPerValue + (int) acceptableOverheadPerValue;
+
+ int actualBitsPerValue = -1;
+ Format format = Format.PACKED;
+
+ if (bitsPerValue <= 8 && maxBitsPerValue >= 8) {
+ actualBitsPerValue = 8;
+ } else if (bitsPerValue <= 16 && maxBitsPerValue >= 16) {
+ actualBitsPerValue = 16;
+ } else if (bitsPerValue <= 32 && maxBitsPerValue >= 32) {
+ actualBitsPerValue = 32;
+ } else if (bitsPerValue <= 64 && maxBitsPerValue >= 64) {
+ actualBitsPerValue = 64;
+ } else if (valueCount <= Packed8ThreeBlocks.MAX_SIZE && bitsPerValue <= 24 && maxBitsPerValue >= 24) {
+ actualBitsPerValue = 24;
+ } else if (valueCount <= Packed16ThreeBlocks.MAX_SIZE && bitsPerValue <= 48 && maxBitsPerValue >= 48) {
+ actualBitsPerValue = 48;
+ } else {
+ for (int bpv = bitsPerValue; bpv <= maxBitsPerValue; ++bpv) {
+ if (Format.PACKED_SINGLE_BLOCK.isSupported(bpv)) {
+ float overhead = Format.PACKED_SINGLE_BLOCK.overheadPerValue(bpv);
+ float acceptableOverhead = acceptableOverheadPerValue + bitsPerValue - bpv;
+ if (overhead <= acceptableOverhead) {
+ actualBitsPerValue = bpv;
+ format = Format.PACKED_SINGLE_BLOCK;
+ break;
+ }
+ }
+ }
+ if (actualBitsPerValue < 0) {
+ actualBitsPerValue = bitsPerValue;
+ }
+ }
+
+ return new FormatAndBits(format, actualBitsPerValue);
+ }
/**
* A read-only random access array of positive integers.
@@ -132,31 +304,40 @@ public class PackedInts {
public static interface ReaderIterator extends Closeable {
/** Returns next value */
long next() throws IOException;
+ /** Returns at least 1 and at most <code>count</code> next values,
+ * the returned ref MUST NOT be modified */
+ LongsRef next(int count) throws IOException;
/** Returns number of bits per value */
int getBitsPerValue();
/** Returns number of values */
int size();
/** Returns the current position */
int ord();
- /** Skips to the given ordinal and returns its value.
- * @return the value at the given position
- * @throws IOException if reading the value throws an IOException*/
- long advance(int ord) throws IOException;
}
static abstract class ReaderIteratorImpl implements ReaderIterator {
- protected final IndexInput in;
+ protected final DataInput in;
protected final int bitsPerValue;
protected final int valueCount;
- protected ReaderIteratorImpl(int valueCount, int bitsPerValue, IndexInput in) {
+ protected ReaderIteratorImpl(int valueCount, int bitsPerValue, DataInput in) {
this.in = in;
this.bitsPerValue = bitsPerValue;
this.valueCount = valueCount;
}
@Override
+ public long next() throws IOException {
+ LongsRef nextValues = next(1);
+ assert nextValues.length > 0;
+ final long result = nextValues.longs[nextValues.offset];
+ ++nextValues.offset;
+ --nextValues.length;
+ return result;
+ }
+
+ @Override
public int getBitsPerValue() {
return bitsPerValue;
}
@@ -168,7 +349,9 @@ public class PackedInts {
@Override
public void close() throws IOException {
- in.close();
+ if (in instanceof Closeable) {
+ ((Closeable) in).close();
+ }
}
}
@@ -217,7 +400,7 @@ public class PackedInts {
* A simple base for Readers that keeps track of valueCount and bitsPerValue.
* @lucene.internal
*/
- public static abstract class ReaderImpl implements Reader {
+ static abstract class ReaderImpl implements Reader {
protected final int bitsPerValue;
protected final int valueCount;
@@ -257,7 +440,7 @@ public class PackedInts {
}
- public static abstract class MutableImpl extends ReaderImpl implements Mutable {
+ static abstract class MutableImpl extends ReaderImpl implements Mutable {
protected MutableImpl(int valueCount, int bitsPerValue) {
super(valueCount, bitsPerValue);
@@ -283,13 +466,15 @@ public class PackedInts {
}
}
- protected int getFormat() {
- return PACKED;
+ protected Format getFormat() {
+ return Format.PACKED;
}
@Override
public void save(DataOutput out) throws IOException {
- Writer writer = getWriterByFormat(out, valueCount, bitsPerValue, getFormat());
+ Writer writer = getWriterNoHeader(out, getFormat(),
+ valueCount, bitsPerValue, DEFAULT_BUFFER_SIZE);
+ writer.writeHeader();
for (int i = 0; i < valueCount; ++i) {
writer.add(get(i));
}
@@ -302,121 +487,209 @@ public class PackedInts {
*/
public static abstract class Writer {
protected final DataOutput out;
- protected final int bitsPerValue;
protected final int valueCount;
+ protected final int bitsPerValue;
protected Writer(DataOutput out, int valueCount, int bitsPerValue)
throws IOException {
assert bitsPerValue <= 64;
-
+ assert valueCount >= 0 || valueCount == -1;
this.out = out;
this.valueCount = valueCount;
this.bitsPerValue = bitsPerValue;
+ }
+
+ void writeHeader() throws IOException {
+ assert valueCount != -1;
CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
out.writeVInt(bitsPerValue);
out.writeVInt(valueCount);
- out.writeVInt(getFormat());
+ out.writeVInt(getFormat().getId());
}
- protected abstract int getFormat();
+ /** The format used to serialize values. */
+ protected abstract PackedInts.Format getFormat();
+
+ /** Add a value to the stream. */
public abstract void add(long v) throws IOException;
+
+ /** The number of bits per value. */
+ public final int bitsPerValue() {
+ return bitsPerValue;
+ }
+
+ /** Perform end-of-stream operations. */
public abstract void finish() throws IOException;
+
+ /**
+ * Returns the current ord in the stream (number of values that have been
+ * written so far minus one).
+ */
+ public abstract int ord();
}
/**
- * Retrieve PackedInt data from the DataInput and return a packed int
- * structure based on it.
+ * Expert: Restore a {@link Reader} from a stream without reading metadata at
+ * the beginning of the stream. This method is useful to restore data from
+ * streams which have been created using
+ * {@link PackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)}.
*
- * @param in positioned at the beginning of a stored packed int structure.
- * @return a read only random access capable array of positive integers.
- * @throws IOException if the structure could not be retrieved.
+ * @param in the stream to read data from, positioned at the beginning of the packed values
+ * @param format the format used to serialize
+ * @param version the version used to serialize the data
+ * @param valueCount how many values the stream holds
+ * @param bitsPerValue the number of bits per value
+ * @return a Reader
+ * @throws IOException
+ * @see PackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)
* @lucene.internal
*/
- public static Reader getReader(DataInput in) throws IOException {
- CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START);
- final int bitsPerValue = in.readVInt();
- assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
- final int valueCount = in.readVInt();
- final int format = in.readVInt();
-
+ public static Reader getReaderNoHeader(DataInput in, Format format, int version,
+ int valueCount, int bitsPerValue) throws IOException {
switch (format) {
+ case PACKED_SINGLE_BLOCK:
+ return Packed64SingleBlock.create(in, valueCount, bitsPerValue);
case PACKED:
switch (bitsPerValue) {
case 8:
return new Direct8(in, valueCount);
case 16:
return new Direct16(in, valueCount);
- case 24:
- return new Packed8ThreeBlocks(in, valueCount);
case 32:
return new Direct32(in, valueCount);
- case 48:
- return new Packed16ThreeBlocks(in, valueCount);
case 64:
return new Direct64(in, valueCount);
- default:
- return new Packed64(in, valueCount, bitsPerValue);
+ case 24:
+ if (valueCount <= Packed8ThreeBlocks.MAX_SIZE) {
+ return new Packed8ThreeBlocks(in, valueCount);
+ }
+ break;
+ case 48:
+ if (valueCount <= Packed16ThreeBlocks.MAX_SIZE) {
+ return new Packed16ThreeBlocks(in, valueCount);
+ }
+ break;
}
- case PACKED_SINGLE_BLOCK:
- return Packed64SingleBlock.create(in, valueCount, bitsPerValue);
+ return new Packed64(in, valueCount, bitsPerValue);
default:
throw new AssertionError("Unknwown Writer format: " + format);
}
}
/**
+ * Restore a {@link Reader} from a stream.
+ *
+ * @param in the stream to read data from
+ * @return a Reader
+ * @throws IOException
+ * @lucene.internal
+ */
+ public static Reader getReader(DataInput in) throws IOException {
+ final int version = CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START);
+ final int bitsPerValue = in.readVInt();
+ assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
+ final int valueCount = in.readVInt();
+ final Format format = Format.byId(in.readVInt());
+
+ return getReaderNoHeader(in, format, version, valueCount, bitsPerValue);
+ }
+
+ /**
+ * Expert: Restore a {@link ReaderIterator} from a stream without reading
+ * metadata at the beginning of the stream. This method is useful to restore
+ * data from streams which have been created using
+ * {@link PackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)}.
+ *
+ * @param in the stream to read data from, positioned at the beginning of the packed values
+ * @param format the format used to serialize
+ * @param version the version used to serialize the data
+ * @param valueCount how many values the stream holds
+ * @param bitsPerValue the number of bits per value
+ * @param mem how much memory the iterator is allowed to use to read-ahead (likely to speed up iteration)
+ * @return a ReaderIterator
+ * @throws IOException
+ * @see PackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)
+ * @lucene.internal
+ */
+ public static ReaderIterator getReaderIteratorNoHeader(DataInput in, Format format, int version,
+ int valueCount, int bitsPerValue, int mem) throws IOException {
+ return new PackedReaderIterator(format, valueCount, bitsPerValue, in, mem);
+ }
+
+ /**
* Retrieve PackedInts as a {@link ReaderIterator}
* @param in positioned at the beginning of a stored packed int structure.
+ * @param mem how much memory the iterator is allowed to use to read-ahead (likely to speed up iteration)
* @return an iterator to access the values
* @throws IOException if the structure could not be retrieved.
* @lucene.internal
*/
- public static ReaderIterator getReaderIterator(IndexInput in) throws IOException {
- CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START);
+ public static ReaderIterator getReaderIterator(DataInput in, int mem) throws IOException {
+ final int version = CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START);
final int bitsPerValue = in.readVInt();
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
final int valueCount = in.readVInt();
- final int format = in.readVInt();
+ final Format format = Format.byId(in.readVInt());
+ return getReaderIteratorNoHeader(in, format, version, valueCount, bitsPerValue, mem);
+ }
+
+ /**
+ * Expert: Construct a direct {@link Reader} from a stream without reading
+ * metadata at the beginning of the stream. This method is useful to restore
+ * data from streams which have been created using
+ * {@link PackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)}.
+ * </p><p>
+ * The returned reader will have very little memory overhead, but every call
+ * to {@link Reader#get(int)} is likely to perform a disk seek.
+ *
+ * @param in the stream to read data from
+ * @param format the format used to serialize
+ * @param version the version used to serialize the data
+ * @param valueCount how many values the stream holds
+ * @param bitsPerValue the number of bits per value
+ * @return a direct Reader
+ * @throws IOException
+ * @lucene.internal
+ */
+ public static Reader getDirectReaderNoHeader(IndexInput in, Format format,
+ int version, int valueCount, int bitsPerValue) throws IOException {
switch (format) {
case PACKED:
- return new PackedReaderIterator(valueCount, bitsPerValue, in);
+ return new DirectPackedReader(bitsPerValue, valueCount, in);
case PACKED_SINGLE_BLOCK:
- return new Packed64SingleBlockReaderIterator(valueCount, bitsPerValue, in);
+ return new DirectPacked64SingleBlockReader(bitsPerValue, valueCount, in);
default:
- throw new AssertionError("Unknwown Writer format: " + format);
+ throw new AssertionError("Unknwown format: " + format);
}
}
-
+
/**
- * Retrieve PackedInts.Reader that does not load values
- * into RAM but rather accesses all values via the
- * provided IndexInput.
- * @param in positioned at the beginning of a stored packed int structure.
- * @return an Reader to access the values
- * @throws IOException if the structure could not be retrieved.
+ * Construct a direct {@link Reader} from an {@link IndexInput}. This method
+ * is useful to restore data from streams which have been created using
+ * {@link PackedInts#getWriter(DataOutput, int, int, float)}.
+ * </p><p>
+ * The returned reader will have very little memory overhead, but every call
+ * to {@link Reader#get(int)} is likely to perform a disk seek.
+ *
+ * @param in the stream to read data from
+ * @return a direct Reader
+ * @throws IOException
* @lucene.internal
*/
public static Reader getDirectReader(IndexInput in) throws IOException {
- CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START);
+ final int version = CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START);
final int bitsPerValue = in.readVInt();
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
final int valueCount = in.readVInt();
- final int format = in.readVInt();
- switch (format) {
- case PACKED:
- return new DirectPackedReader(bitsPerValue, valueCount, in);
- case PACKED_SINGLE_BLOCK:
- return new DirectPacked64SingleBlockReader(bitsPerValue, valueCount, in);
- default:
- throw new AssertionError("Unknwown Writer format: " + format);
- }
+ final Format format = Format.byId(in.readVInt());
+ return getDirectReaderNoHeader(in, format, version, valueCount, bitsPerValue);
}
/**
* Create a packed integer array with the given amount of values initialized
* to 0. the valueCount and the bitsPerValue cannot be changed after creation.
* All Mutables known by this factory are kept fully in RAM.
- *
+ * </p><p>
* Positive values of <code>acceptableOverheadRatio</code> will trade space
* for speed by selecting a faster but potentially less memory-efficient
* implementation. An <code>acceptableOverheadRatio</code> of
@@ -433,103 +706,130 @@ public class PackedInts {
*/
public static Mutable getMutable(int valueCount,
int bitsPerValue, float acceptableOverheadRatio) {
- acceptableOverheadRatio = Math.max(COMPACT, acceptableOverheadRatio);
- acceptableOverheadRatio = Math.min(FASTEST, acceptableOverheadRatio);
- float acceptableOverheadPerValue = acceptableOverheadRatio * bitsPerValue; // in bits
-
- int maxBitsPerValue = bitsPerValue + (int) acceptableOverheadPerValue;
+ assert valueCount >= 0;
- if (bitsPerValue <= 8 && maxBitsPerValue >= 8) {
- return new Direct8(valueCount);
- } else if (bitsPerValue <= 16 && maxBitsPerValue >= 16) {
- return new Direct16(valueCount);
- } else if (bitsPerValue <= 32 && maxBitsPerValue >= 32) {
- return new Direct32(valueCount);
- } else if (bitsPerValue <= 64 && maxBitsPerValue >= 64) {
- return new Direct64(valueCount);
- } else if (valueCount <= Packed8ThreeBlocks.MAX_SIZE && bitsPerValue <= 24 && maxBitsPerValue >= 24) {
- return new Packed8ThreeBlocks(valueCount);
- } else if (valueCount <= Packed16ThreeBlocks.MAX_SIZE && bitsPerValue <= 48 && maxBitsPerValue >= 48) {
- return new Packed16ThreeBlocks(valueCount);
- } else {
- for (int bpv = bitsPerValue; bpv <= maxBitsPerValue; ++bpv) {
- if (Packed64SingleBlock.isSupported(bpv)) {
- float overhead = Packed64SingleBlock.overheadPerValue(bpv);
- float acceptableOverhead = acceptableOverheadPerValue + bitsPerValue - bpv;
- if (overhead <= acceptableOverhead) {
- return Packed64SingleBlock.create(valueCount, bpv);
- }
+ final FormatAndBits formatAndBits = fastestFormatAndBits(valueCount, bitsPerValue, acceptableOverheadRatio);
+ switch (formatAndBits.format) {
+ case PACKED_SINGLE_BLOCK:
+ return Packed64SingleBlock.create(valueCount, formatAndBits.bitsPerValue);
+ case PACKED:
+ switch (formatAndBits.bitsPerValue) {
+ case 8:
+ return new Direct8(valueCount);
+ case 16:
+ return new Direct16(valueCount);
+ case 32:
+ return new Direct32(valueCount);
+ case 64:
+ return new Direct64(valueCount);
+ case 24:
+ if (valueCount <= Packed8ThreeBlocks.MAX_SIZE) {
+ return new Packed8ThreeBlocks(valueCount);
+ }
+ break;
+ case 48:
+ if (valueCount <= Packed16ThreeBlocks.MAX_SIZE) {
+ return new Packed16ThreeBlocks(valueCount);
+ }
+ break;
}
- }
- return new Packed64(valueCount, bitsPerValue);
+ return new Packed64(valueCount, formatAndBits.bitsPerValue);
+ default:
+ throw new AssertionError();
}
}
/**
- * Create a packed integer array writer for the given number of values at the
- * given bits/value. Writers append to the given IndexOutput and has very
- * low memory overhead.
+ * Expert: Create a packed integer array writer for the given output, format,
+ * value count, and number of bits per value.
+ * </p><p>
+ * The resulting stream will be long-aligned. This means that depending on
+ * the format which is used, up to 63 bits will be wasted. An easy way to
+ * make sure that no space is lost is to always use a <code>valueCount</code>
+ * that is a multiple of 64.
+ * </p><p>
+ * This method does not write any metadata to the stream, meaning that it is
+ * your responsibility to store it somewhere else in order to be able to
+ * recover data from the stream later on:
+ * <ul>
+ * <li><code>format</code> (using {@link Format#getId()}),</li>
+ * <li><code>valueCount</code>,</li>
+ * <li><code>bitsPerValue</code>,</li>
+ * <li>{@link #VERSION_CURRENT}.</li>
+ * </ul>
+ * </p><p>
+ * It is possible to start writing values without knowing how many of them you
+ * are actually going to write. To do this, just pass <code>-1</code> as
+ * <code>valueCount</code>. On the other hand, for any positive value of
+ * <code>valueCount</code>, the returned writer will make sure that you don't
+ * write more values than expected and pad the end of stream with zeros in
+ * case you have written less than <code>valueCount</code> when calling
+ * {@link Writer#finish()}.
+ * </p><p>
+ * The <code>mem</code> parameter lets you control how much memory can be used
+ * to buffer changes in memory before flushing to disk. High values of
+ * <code>mem</code> are likely to improve throughput. On the other hand, if
+ * speed is not that important to you, a value of <code>0</code> will use as
+ * little memory as possible and should already offer reasonable throughput.
*
- * Positive values of <code>acceptableOverheadRatio</code> will trade space
+ * @param out the data output
+ * @param format the format to use to serialize the values
+ * @param valueCount the number of values
+ * @param bitsPerValue the number of bits per value
+ * @param mem how much memory (in bytes) can be used to speed up serialization
+ * @return a Writer
+ * @throws IOException
+ * @see PackedInts#getReaderIteratorNoHeader(DataInput, Format, int, int, int, int)
+ * @see PackedInts#getReaderNoHeader(DataInput, Format, int, int, int)
+ * @lucene.internal
+ */
+ public static Writer getWriterNoHeader(
+ DataOutput out, Format format, int valueCount, int bitsPerValue, int mem) throws IOException {
+ return new PackedWriter(format, out, valueCount, bitsPerValue, mem);
+ }
+
+ /**
+ * Create a packed integer array writer for the given output, format, value
+ * count, and number of bits per value.
+ * </p><p>
+ * The resulting stream will be long-aligned. This means that depending on
+ * the format which is used under the hoods, up to 63 bits will be wasted.
+ * An easy way to make sure that no space is lost is to always use a
+ * <code>valueCount</code> that is a multiple of 64.
+ * </p><p>
+ * This method writes metadata to the stream, so that the resulting stream is
+ * sufficient to restore a {@link Reader} from it. You don't need to track
+ * <code>valueCount</code> or <code>bitsPerValue</code> by yourself. In case
+ * this is a problem, you should probably look at
+ * {@link #getWriterNoHeader(DataOutput, Format, int, int, int)}.
+ * </p><p>
+ * The <code>acceptableOverheadRatio</code> parameter controls how
+ * readers that will be restored from this stream trade space
* for speed by selecting a faster but potentially less memory-efficient
* implementation. An <code>acceptableOverheadRatio</code> of
* {@link PackedInts#COMPACT} will make sure that the most memory-efficient
* implementation is selected whereas {@link PackedInts#FASTEST} will make sure
- * that the fastest implementation is selected.
+ * that the fastest implementation is selected. In case you are only interested
+ * in reading this stream sequentially later on, you should probably use
+ * {@link PackedInts#COMPACT}.
*
- * @param out the destination for the produced bits.
- * @param valueCount the number of elements.
- * @param bitsPerValue the number of bits available for any given value.
+ * @param out the data output
+ * @param valueCount the number of values
+ * @param bitsPerValue the number of bits per value
* @param acceptableOverheadRatio an acceptable overhead ratio per value
- * @return a Writer ready for receiving values.
- * @throws IOException if bits could not be written to out.
+ * @return a Writer
+ * @throws IOException
* @lucene.internal
*/
public static Writer getWriter(DataOutput out,
int valueCount, int bitsPerValue, float acceptableOverheadRatio)
throws IOException {
- acceptableOverheadRatio = Math.max(COMPACT, acceptableOverheadRatio);
- acceptableOverheadRatio = Math.min(FASTEST, acceptableOverheadRatio);
- float acceptableOverheadPerValue = acceptableOverheadRatio * bitsPerValue; // in bits
-
- int maxBitsPerValue = bitsPerValue + (int) acceptableOverheadPerValue;
-
- if (bitsPerValue <= 8 && maxBitsPerValue >= 8) {
- return getWriterByFormat(out, valueCount, 8, PACKED);
- } else if (bitsPerValue <= 16 && maxBitsPerValue >= 16) {
- return getWriterByFormat(out, valueCount, 16, PACKED);
- } else if (bitsPerValue <= 32 && maxBitsPerValue >= 32) {
- return getWriterByFormat(out, valueCount, 32, PACKED);
- } else if (bitsPerValue <= 64 && maxBitsPerValue >= 64) {
- return getWriterByFormat(out, valueCount, 64, PACKED);
- } else if (valueCount <= Packed8ThreeBlocks.MAX_SIZE && bitsPerValue <= 24 && maxBitsPerValue >= 24) {
- return getWriterByFormat(out, valueCount, 24, PACKED);
- } else if (valueCount <= Packed16ThreeBlocks.MAX_SIZE && bitsPerValue <= 48 && maxBitsPerValue >= 48) {
- return getWriterByFormat(out, valueCount, 48, PACKED);
- } else {
- for (int bpv = bitsPerValue; bpv <= maxBitsPerValue; ++bpv) {
- if (Packed64SingleBlock.isSupported(bpv)) {
- float overhead = Packed64SingleBlock.overheadPerValue(bpv);
- float acceptableOverhead = acceptableOverheadPerValue + bitsPerValue - bpv;
- if (overhead <= acceptableOverhead) {
- return getWriterByFormat(out, valueCount, bpv, PACKED_SINGLE_BLOCK);
- }
- }
- }
- return getWriterByFormat(out, valueCount, bitsPerValue, PACKED);
- }
- }
+ assert valueCount >= 0;
- private static Writer getWriterByFormat(DataOutput out,
- int valueCount, int bitsPerValue, int format) throws IOException {
- switch (format) {
- case PACKED:
- return new PackedWriter(out, valueCount, bitsPerValue);
- case PACKED_SINGLE_BLOCK:
- return new Packed64SingleBlockWriter(out, valueCount, bitsPerValue);
- default:
- throw new IllegalArgumentException("Unknown format " + format);
- }
+ final FormatAndBits formatAndBits = fastestFormatAndBits(valueCount, bitsPerValue, acceptableOverheadRatio);
+ final Writer writer = getWriterNoHeader(out, formatAndBits.format, valueCount, formatAndBits.bitsPerValue, DEFAULT_BUFFER_SIZE);
+ writer.writeHeader();
+ return writer;
}
/** Returns how many bits are required to hold values up
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/PackedReaderIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/PackedReaderIterator.java?rev=1357166&r1=1357165&r2=1357166&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/PackedReaderIterator.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/PackedReaderIterator.java Wed Jul 4 08:28:04 2012
@@ -17,76 +17,71 @@ package org.apache.lucene.util.packed;
* limitations under the License.
*/
-import org.apache.lucene.store.IndexInput;
-
+import java.io.EOFException;
import java.io.IOException;
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.util.LongsRef;
+
final class PackedReaderIterator extends PackedInts.ReaderIteratorImpl {
- private long pending;
- private int pendingBitsLeft;
- private int position = -1;
- // masks[n-1] masks for bottom n bits
- private final long[] masks;
+ final PackedInts.Format format;
+ final BulkOperation bulkOperation;
+ final long[] nextBlocks;
+ final LongsRef nextValues;
+ final int iterations;
+ int position;
- public PackedReaderIterator(int valueCount, int bitsPerValue, IndexInput in) {
+ PackedReaderIterator(PackedInts.Format format, int valueCount, int bitsPerValue, DataInput in, int mem) {
super(valueCount, bitsPerValue, in);
+ this.format = format;
+ bulkOperation = BulkOperation.of(format, bitsPerValue);
+ iterations = bulkOperation.computeIterations(valueCount, mem);
+ assert iterations > 0;
+ nextBlocks = new long[iterations * bulkOperation.blocks()];
+ nextValues = new LongsRef(new long[iterations * bulkOperation.values()], 0, 0);
+ assert iterations * bulkOperation.values() == nextValues.longs.length;
+ assert iterations * bulkOperation.blocks() == nextBlocks.length;
+ nextValues.offset = nextValues.longs.length;
+ position = -1;
+ }
- masks = new long[bitsPerValue];
+ @Override
+ public LongsRef next(int count) throws IOException {
+ assert nextValues.length >= 0;
+ assert count > 0;
+ assert nextValues.offset + nextValues.length <= nextValues.longs.length;
+
+ nextValues.offset += nextValues.length;
- long v = 1;
- for (int i = 0; i < bitsPerValue; i++) {
- v *= 2;
- masks[i] = v - 1;
+ final int remaining = valueCount - position - 1;
+ if (remaining <= 0) {
+ throw new EOFException();
}
- }
+ count = Math.min(remaining, count);
- public long next() throws IOException {
- if (pendingBitsLeft == 0) {
- pending = in.readLong();
- pendingBitsLeft = 64;
- }
-
- final long result;
- if (pendingBitsLeft >= bitsPerValue) { // not split
- result = (pending >> (pendingBitsLeft - bitsPerValue)) & masks[bitsPerValue-1];
- pendingBitsLeft -= bitsPerValue;
- } else { // split
- final int bits1 = bitsPerValue - pendingBitsLeft;
- final long result1 = (pending & masks[pendingBitsLeft-1]) << bits1;
- pending = in.readLong();
- final long result2 = (pending >> (64 - bits1)) & masks[bits1-1];
- pendingBitsLeft = 64 + pendingBitsLeft - bitsPerValue;
- result = result1 | result2;
+ if (nextValues.offset == nextValues.longs.length) {
+ final int remainingBlocks = format.nblocks(bitsPerValue, remaining);
+ final int blocksToRead = Math.min(remainingBlocks, nextBlocks.length);
+ for (int i = 0; i < blocksToRead; ++i) {
+ nextBlocks[i] = in.readLong();
+ }
+ for (int i = blocksToRead; i < nextBlocks.length; ++i) {
+ nextBlocks[i] = 0L;
+ }
+
+ bulkOperation.get(nextBlocks, 0, nextValues.longs, 0, iterations);
+ nextValues.offset = 0;
}
-
- ++position;
- return result;
+
+ nextValues.length = Math.min(nextValues.longs.length - nextValues.offset, count);
+ position += nextValues.length;
+ return nextValues;
}
+ @Override
public int ord() {
return position;
}
- public long advance(final int ord) throws IOException{
- assert ord < valueCount : "ord must be less than valueCount";
- assert ord > position : "ord must be greater than the current position";
- final long bits = (long) bitsPerValue;
- final int posToSkip = ord - 1 - position;
- final long bitsToSkip = (bits * (long)posToSkip);
- if (bitsToSkip < pendingBitsLeft) { // enough bits left - no seek required
- pendingBitsLeft -= bitsToSkip;
- } else {
- final long skip = bitsToSkip-pendingBitsLeft;
- final long closestByte = (skip >> 6) << 3;
- if (closestByte != 0) { // need to seek
- final long filePointer = in.getFilePointer();
- in.seek(filePointer + closestByte);
- }
- pending = in.readLong();
- pendingBitsLeft = 64 - (int)(skip % 64);
- }
- position = ord-1;
- return next();
- }
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/PackedWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/PackedWriter.java?rev=1357166&r1=1357165&r2=1357166&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/PackedWriter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/PackedWriter.java Wed Jul 4 08:28:04 2012
@@ -19,101 +19,79 @@ package org.apache.lucene.util.packed;
import org.apache.lucene.store.DataOutput;
+import java.io.EOFException;
import java.io.IOException;
// Packs high order byte first, to match
// IndexOutput.writeInt/Long/Short byte order
-/**
- * Generic writer for space-optimal packed values. The resulting bits can be
- * used directly by Packed32, Packed64 and PackedDirect* and will always be
- * long-aligned.
- */
-
-class PackedWriter extends PackedInts.Writer {
- private long pending;
- private int pendingBitPos;
-
- // masks[n-1] masks for bottom n bits
- private final long[] masks;
- private int written = 0;
+final class PackedWriter extends PackedInts.Writer {
- public PackedWriter(DataOutput out, int valueCount, int bitsPerValue)
- throws IOException {
- super(out, valueCount, bitsPerValue);
-
- pendingBitPos = 64;
- masks = new long[bitsPerValue - 1];
+ boolean finished;
+ final PackedInts.Format format;
+ final BulkOperation bulkOperation;
+ final long[] nextBlocks;
+ final long[] nextValues;
+ final int iterations;
+ int off;
+ int written;
- long v = 1;
- for (int i = 0; i < bitsPerValue - 1; i++) {
- v *= 2;
- masks[i] = v - 1;
- }
+ PackedWriter(PackedInts.Format format, DataOutput out, int valueCount, int bitsPerValue, int mem)
+ throws IOException {
+ super(out, valueCount, bitsPerValue);
+ this.format = format;
+ bulkOperation = BulkOperation.of(format, bitsPerValue);
+ iterations = bulkOperation.computeIterations(valueCount, mem);
+ nextBlocks = new long[iterations * bulkOperation.blocks()];
+ nextValues = new long[iterations * bulkOperation.values()];
+ off = 0;
+ written = 0;
+ finished = false;
}
@Override
- protected int getFormat() {
- return PackedInts.PACKED;
+ protected PackedInts.Format getFormat() {
+ return format;
}
- /**
- * Do not call this after finish
- */
@Override
public void add(long v) throws IOException {
- assert v <= PackedInts.maxValue(bitsPerValue) : "v=" + v
- + " maxValue=" + PackedInts.maxValue(bitsPerValue);
- assert v >= 0;
- //System.out.println(" packedw add v=" + v + " pendingBitPos=" + pendingBitPos);
-
- // TODO
- if (pendingBitPos >= bitsPerValue) {
- // not split
-
- // write-once, so we can |= w/o first masking to 0s
- pending |= v << (pendingBitPos - bitsPerValue);
- if (pendingBitPos == bitsPerValue) {
- // flush
- out.writeLong(pending);
- pending = 0;
- pendingBitPos = 64;
- } else {
- pendingBitPos -= bitsPerValue;
- }
-
- } else {
- // split
-
- // write top pendingBitPos bits of value into bottom bits of pending
- pending |= (v >> (bitsPerValue - pendingBitPos)) & masks[pendingBitPos - 1];
- //System.out.println(" part1 (v >> " + (bitsPerValue - pendingBitPos) + ") & " + masks[pendingBitPos-1]);
-
- // flush
- out.writeLong(pending);
-
- // write bottom (bitsPerValue - pendingBitPos) bits of value into top bits of pending
- pendingBitPos = 64 - bitsPerValue + pendingBitPos;
- //System.out.println(" part2 v << " + pendingBitPos);
- pending = (v << pendingBitPos);
+ assert v >= 0 && v <= PackedInts.maxValue(bitsPerValue);
+ assert !finished;
+ if (valueCount != -1 && written >= valueCount) {
+ throw new EOFException("Writing past end of stream");
+ }
+ nextValues[off++] = v;
+ if (off == nextValues.length) {
+ flush(nextValues.length);
+ off = 0;
}
- written++;
+ ++written;
}
@Override
public void finish() throws IOException {
- while (written < valueCount) {
- add(0L); // Auto flush
+ assert !finished;
+ if (valueCount != -1) {
+ while (written < valueCount) {
+ add(0L);
+ }
}
+ flush(off);
+ finished = true;
+ }
- if (pendingBitPos != 64) {
- out.writeLong(pending);
+ private void flush(int nvalues) throws IOException {
+ bulkOperation.set(nextBlocks, 0, nextValues, 0, iterations);
+ final int blocks = format.nblocks(bitsPerValue, nvalues);
+ for (int i = 0; i < blocks; ++i) {
+ out.writeLong(nextBlocks[i]);
}
+ off = 0;
}
@Override
- public String toString() {
- return "PackedWriter(written " + written + "/" + valueCount + " with "
- + bitsPerValue + " bits/value)";
+ public int ord() {
+ return written - 1;
}
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/gen_Packed64SingleBlock.py
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/gen_Packed64SingleBlock.py?rev=1357166&r1=1357165&r2=1357166&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/gen_Packed64SingleBlock.py (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/gen_Packed64SingleBlock.py Wed Jul 4 08:28:04 2012
@@ -58,12 +58,6 @@ abstract class Packed64SingleBlock exten
return Arrays.binarySearch(SUPPORTED_BITS_PER_VALUE, bitsPerValue) >= 0;
}
- public static float overheadPerValue(int bitsPerValue) {
- int valuesPerBlock = 64 / bitsPerValue;
- int overhead = 64 %% bitsPerValue;
- return (float) overhead / valuesPerBlock;
- }
-
private static int requiredCapacity(int valueCount, int valuesPerBlock) {
return valueCount / valuesPerBlock
+ (valueCount %% valuesPerBlock == 0 ? 0 : 1);
@@ -111,16 +105,14 @@ abstract class Packed64SingleBlock exten
// bulk get
assert index %% valuesPerBlock == 0;
- final long readMask = (1L << bitsPerValue) - 1;
- final int startBlock = index / valuesPerBlock;
- final int endBlock = (index + len) / valuesPerBlock;
- final int diff = (endBlock - startBlock) * valuesPerBlock;
+ final BulkOperation op = BulkOperation.of(PackedInts.Format.PACKED_SINGLE_BLOCK, bitsPerValue);
+ assert op.blocks() == 1;
+ assert op.values() == valuesPerBlock;
+ final int blockIndex = index / valuesPerBlock;
+ final int nblocks = (index + len) / valuesPerBlock - blockIndex;
+ op.get(blocks, blockIndex, arr, off, nblocks);
+ final int diff = nblocks * valuesPerBlock;
index += diff; len -= diff;
- for (int block = startBlock; block < endBlock; ++block) {
- for (int i = 0; i < valuesPerBlock; ++i) {
- arr[off++] = (blocks[block] >>> (i * bitsPerValue)) & readMask;
- }
- }
if (index > originalIndex) {
// stay at the block boundary
@@ -157,17 +149,14 @@ abstract class Packed64SingleBlock exten
// bulk set
assert index %% valuesPerBlock == 0;
- final int startBlock = index / valuesPerBlock;
- final int endBlock = (index + len) / valuesPerBlock;
- final int diff = (endBlock - startBlock) * valuesPerBlock;
+ final BulkOperation op = BulkOperation.of(PackedInts.Format.PACKED_SINGLE_BLOCK, bitsPerValue);
+ assert op.blocks() == 1;
+ assert op.values() == valuesPerBlock;
+ final int blockIndex = index / valuesPerBlock;
+ final int nblocks = (index + len) / valuesPerBlock - blockIndex;
+ op.set(blocks, blockIndex, arr, off, nblocks);
+ final int diff = nblocks * valuesPerBlock;
index += diff; len -= diff;
- for (int block = startBlock; block < endBlock; ++block) {
- long next = 0L;
- for (int i = 0; i < valuesPerBlock; ++i) {
- next |= (arr[off++] << (i * bitsPerValue));
- }
- blocks[block] = next;
- }
if (index > originalIndex) {
// stay at the block boundary
@@ -221,8 +210,8 @@ abstract class Packed64SingleBlock exten
}
@Override
- protected int getFormat() {
- return PackedInts.PACKED_SINGLE_BLOCK;
+ protected PackedInts.Format getFormat() {
+ return PackedInts.Format.PACKED_SINGLE_BLOCK;
}
@Override
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/package.html?rev=1357166&r1=1357165&r2=1357166&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/package.html (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/package.html Wed Jul 4 08:28:04 2012
@@ -19,9 +19,16 @@
<head></head>
<body bgcolor="white">
+<p>Packed integer arrays and streams.</p>
+
<p>
- The packed package provides random access capable arrays of positive longs.
- The implementations provides different trade offs between memory usage and
+ The packed package provides
+ <ul>
+ <li>sequential and random access capable arrays of positive longs,</li>
+ <li>routines for efficient serialization and deserialization of streams of packed integers.</li>
+ </ul>
+
+ The implementations provide different trade-offs between memory usage and
access speed. The standard usage scenario is replacing large int or long
arrays in order to reduce the memory footprint.
</p><p>