You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2014/07/01 16:59:52 UTC
svn commit: r1607105 - in /lucene/dev/branches/branch_4x: ./ lucene/
lucene/core/ lucene/core/src/java/org/apache/lucene/index/
lucene/core/src/java/org/apache/lucene/search/
lucene/core/src/java/org/apache/lucene/util/
lucene/core/src/java/org/apache/...
Author: jpountz
Date: Tue Jul 1 14:59:52 2014
New Revision: 1607105
URL: http://svn.apache.org/r1607105
Log:
LUCENE-5792: Simplified *AppendingBuffer APIs.
Added:
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/DeltaPackedLongValues.java (with props)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicLongValues.java (with props)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/PackedLongValues.java (with props)
Removed:
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/AbstractAppendingLongBuffer.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/AppendingDeltaPackedLongBuffer.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/AppendingPackedLongBuffer.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicAppendingLongBuffer.java
Modified:
lucene/dev/branches/branch_4x/ (props changed)
lucene/dev/branches/branch_4x/lucene/ (props changed)
lucene/dev/branches/branch_4x/lucene/core/ (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/MergeState.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SortedNumericDocValuesWriter.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/PForDeltaDocIdSet.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/WAH8DocIdSet.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/PagedGrowableWriter.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/package.html
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java
lucene/dev/branches/branch_4x/lucene/misc/ (props changed)
lucene/dev/branches/branch_4x/lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java
lucene/dev/branches/branch_4x/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingMergePolicy.java
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java?rev=1607105&r1=1607104&r2=1607105&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java Tue Jul 1 14:59:52 2014
@@ -30,8 +30,8 @@ import org.apache.lucene.util.Counter;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.RamUsageEstimator;
-import org.apache.lucene.util.packed.AppendingDeltaPackedLongBuffer;
import org.apache.lucene.util.packed.PackedInts;
+import org.apache.lucene.util.packed.PackedLongValues;
/** Buffers up pending byte[] per doc, then flushes when
* segment flushes. */
@@ -47,7 +47,7 @@ class BinaryDocValuesWriter extends DocV
private final DataOutput bytesOut;
private final Counter iwBytesUsed;
- private final AppendingDeltaPackedLongBuffer lengths;
+ private final PackedLongValues.Builder lengths;
private FixedBitSet docsWithField;
private final FieldInfo fieldInfo;
private int addedValues;
@@ -57,7 +57,7 @@ class BinaryDocValuesWriter extends DocV
this.fieldInfo = fieldInfo;
this.bytes = new PagedBytes(BLOCK_BITS);
this.bytesOut = bytes.getDataOutput();
- this.lengths = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
+ this.lengths = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
this.iwBytesUsed = iwBytesUsed;
this.docsWithField = new FixedBitSet(64);
this.bytesUsed = docsWithFieldBytesUsed();
@@ -112,11 +112,12 @@ class BinaryDocValuesWriter extends DocV
public void flush(SegmentWriteState state, DocValuesConsumer dvConsumer) throws IOException {
final int maxDoc = state.segmentInfo.getDocCount();
bytes.freeze(false);
+ final PackedLongValues lengths = this.lengths.build();
dvConsumer.addBinaryField(fieldInfo,
new Iterable<BytesRef>() {
@Override
public Iterator<BytesRef> iterator() {
- return new BytesIterator(maxDoc);
+ return new BytesIterator(maxDoc, lengths);
}
});
}
@@ -124,14 +125,15 @@ class BinaryDocValuesWriter extends DocV
// iterates over the values we have in ram
private class BytesIterator implements Iterator<BytesRef> {
final BytesRef value = new BytesRef();
- final AppendingDeltaPackedLongBuffer.Iterator lengthsIterator = lengths.iterator();
+ final PackedLongValues.Iterator lengthsIterator;
final DataInput bytesIterator = bytes.getDataInput();
final int size = (int) lengths.size();
final int maxDoc;
int upto;
- BytesIterator(int maxDoc) {
+ BytesIterator(int maxDoc, PackedLongValues lengths) {
this.maxDoc = maxDoc;
+ this.lengthsIterator = lengths.iterator();
}
@Override
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/MergeState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/MergeState.java?rev=1607105&r1=1607104&r2=1607105&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/MergeState.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/MergeState.java Tue Jul 1 14:59:52 2014
@@ -22,7 +22,8 @@ import java.util.List;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.InfoStream;
-import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
+import org.apache.lucene.util.packed.PackedInts;
+import org.apache.lucene.util.packed.PackedLongValues;
/** Holds common state used during segment merging.
*
@@ -69,15 +70,15 @@ public class MergeState {
static DocMap build(final int maxDoc, final Bits liveDocs) {
assert liveDocs != null;
- final MonotonicAppendingLongBuffer docMap = new MonotonicAppendingLongBuffer();
+ final PackedLongValues.Builder docMapBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
int del = 0;
for (int i = 0; i < maxDoc; ++i) {
- docMap.add(i - del);
+ docMapBuilder.add(i - del);
if (!liveDocs.get(i)) {
++del;
}
}
- docMap.freeze();
+ final PackedLongValues docMap = docMapBuilder.build();
final int numDeletedDocs = del;
assert docMap.size() == maxDoc;
return new DocMap() {
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java?rev=1607105&r1=1607104&r2=1607105&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java Tue Jul 1 14:59:52 2014
@@ -29,9 +29,8 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.LongValues;
import org.apache.lucene.util.RamUsageEstimator;
-import org.apache.lucene.util.packed.AppendingPackedLongBuffer;
-import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
import org.apache.lucene.util.packed.PackedInts;
+import org.apache.lucene.util.packed.PackedLongValues;
/**
* A wrapper for CompositeIndexReader providing access to DocValues.
@@ -488,9 +487,9 @@ public class MultiDocValues {
// cache key of whoever asked for this awful thing
final Object owner;
// globalOrd -> (globalOrd - segmentOrd) where segmentOrd is the the ordinal in the first segment that contains this term
- final MonotonicAppendingLongBuffer globalOrdDeltas;
+ final PackedLongValues globalOrdDeltas;
// globalOrd -> first segment container
- final AppendingPackedLongBuffer firstSegments;
+ final PackedLongValues firstSegments;
// for every segment, segmentOrd -> globalOrd
final LongValues segmentToGlobalOrds[];
// the map from/to segment ids
@@ -506,11 +505,11 @@ public class MultiDocValues {
// even though we accept an overhead ratio, we keep these ones with COMPACT
// since they are only used to resolve values given a global ord, which is
// slow anyway
- globalOrdDeltas = new MonotonicAppendingLongBuffer(PackedInts.COMPACT);
- firstSegments = new AppendingPackedLongBuffer(PackedInts.COMPACT);
- final MonotonicAppendingLongBuffer[] ordDeltas = new MonotonicAppendingLongBuffer[subs.length];
+ PackedLongValues.Builder globalOrdDeltas = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
+ PackedLongValues.Builder firstSegments = PackedLongValues.packedBuilder(PackedInts.COMPACT);
+ final PackedLongValues.Builder[] ordDeltas = new PackedLongValues.Builder[subs.length];
for (int i = 0; i < ordDeltas.length; i++) {
- ordDeltas[i] = new MonotonicAppendingLongBuffer(acceptableOverheadRatio);
+ ordDeltas[i] = PackedLongValues.monotonicBuilder(acceptableOverheadRatio);
}
long[] ordDeltaBits = new long[subs.length];
long segmentOrds[] = new long[subs.length];
@@ -551,18 +550,15 @@ public class MultiDocValues {
globalOrdDeltas.add(globalOrdDelta);
globalOrd++;
}
- firstSegments.freeze();
- globalOrdDeltas.freeze();
- for (int i = 0; i < ordDeltas.length; ++i) {
- ordDeltas[i].freeze();
- }
+ this.firstSegments = firstSegments.build();
+ this.globalOrdDeltas = globalOrdDeltas.build();
// ordDeltas is typically the bottleneck, so let's see what we can do to make it faster
segmentToGlobalOrds = new LongValues[subs.length];
- long ramBytesUsed = BASE_RAM_BYTES_USED + globalOrdDeltas.ramBytesUsed()
- + firstSegments.ramBytesUsed() + RamUsageEstimator.shallowSizeOf(segmentToGlobalOrds)
+ long ramBytesUsed = BASE_RAM_BYTES_USED + this.globalOrdDeltas.ramBytesUsed()
+ + this.firstSegments.ramBytesUsed() + RamUsageEstimator.shallowSizeOf(segmentToGlobalOrds)
+ segmentMap.ramBytesUsed();
for (int i = 0; i < ordDeltas.length; ++i) {
- final MonotonicAppendingLongBuffer deltas = ordDeltas[i];
+ final PackedLongValues deltas = ordDeltas[i].build();
if (ordDeltaBits[i] == 0L) {
// segment ords perfectly match global ordinals
// likely in case of low cardinalities and large segments
@@ -576,7 +572,7 @@ public class MultiDocValues {
// monotonic compression mostly adds overhead, let's keep the mapping in plain packed ints
final int size = (int) deltas.size();
final PackedInts.Mutable newDeltas = PackedInts.getMutable(size, bitsRequired, acceptableOverheadRatio);
- final MonotonicAppendingLongBuffer.Iterator it = deltas.iterator();
+ final PackedLongValues.Iterator it = deltas.iterator();
for (int ord = 0; ord < size; ++ord) {
newDeltas.set(ord, it.next());
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java?rev=1607105&r1=1607104&r2=1607105&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java Tue Jul 1 14:59:52 2014
@@ -25,8 +25,8 @@ import org.apache.lucene.codecs.DocValue
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.RamUsageEstimator;
-import org.apache.lucene.util.packed.AppendingDeltaPackedLongBuffer;
import org.apache.lucene.util.packed.PackedInts;
+import org.apache.lucene.util.packed.PackedLongValues;
/** Buffers up pending long per doc, then flushes when
* segment flushes. */
@@ -34,14 +34,14 @@ class NumericDocValuesWriter extends Doc
private final static long MISSING = 0L;
- private AppendingDeltaPackedLongBuffer pending;
+ private PackedLongValues.Builder pending;
private final Counter iwBytesUsed;
private long bytesUsed;
private FixedBitSet docsWithField;
private final FieldInfo fieldInfo;
public NumericDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed, boolean trackDocsWithField) {
- pending = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
+ pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
docsWithField = trackDocsWithField ? new FixedBitSet(64) : null;
bytesUsed = pending.ramBytesUsed() + docsWithFieldBytesUsed();
this.fieldInfo = fieldInfo;
@@ -87,25 +87,30 @@ class NumericDocValuesWriter extends Doc
public void flush(SegmentWriteState state, DocValuesConsumer dvConsumer) throws IOException {
final int maxDoc = state.segmentInfo.getDocCount();
+ final PackedLongValues values = pending.build();
dvConsumer.addNumericField(fieldInfo,
new Iterable<Number>() {
@Override
public Iterator<Number> iterator() {
- return new NumericIterator(maxDoc);
+ return new NumericIterator(maxDoc, values, docsWithField);
}
});
}
// iterates over the values we have in ram
- private class NumericIterator implements Iterator<Number> {
- final AppendingDeltaPackedLongBuffer.Iterator iter = pending.iterator();
- final int size = (int)pending.size();
+ private static class NumericIterator implements Iterator<Number> {
+ final PackedLongValues.Iterator iter;
+ final FixedBitSet docsWithField;
+ final int size;
final int maxDoc;
int upto;
- NumericIterator(int maxDoc) {
+ NumericIterator(int maxDoc, PackedLongValues values, FixedBitSet docsWithFields) {
this.maxDoc = maxDoc;
+ this.iter = values.iterator();
+ this.size = (int) values.size();
+ this.docsWithField = docsWithFields;
}
@Override
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java?rev=1607105&r1=1607104&r2=1607105&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java Tue Jul 1 14:59:52 2014
@@ -30,14 +30,14 @@ import org.apache.lucene.util.BytesRefHa
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.RamUsageEstimator;
-import org.apache.lucene.util.packed.AppendingDeltaPackedLongBuffer;
import org.apache.lucene.util.packed.PackedInts;
+import org.apache.lucene.util.packed.PackedLongValues;
/** Buffers up pending byte[] per doc, deref and sorting via
* int ord, then flushes when segment flushes. */
class SortedDocValuesWriter extends DocValuesWriter {
final BytesRefHash hash;
- private AppendingDeltaPackedLongBuffer pending;
+ private PackedLongValues.Builder pending;
private final Counter iwBytesUsed;
private long bytesUsed; // this currently only tracks differences in 'pending'
private final FieldInfo fieldInfo;
@@ -52,7 +52,7 @@ class SortedDocValuesWriter extends DocV
new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)),
BytesRefHash.DEFAULT_CAPACITY,
new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
- pending = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
+ pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
bytesUsed = pending.ramBytesUsed();
iwBytesUsed.addAndGet(bytesUsed);
}
@@ -112,6 +112,7 @@ class SortedDocValuesWriter extends DocV
assert pending.size() == maxDoc;
final int valueCount = hash.size();
+ final PackedLongValues ords = pending.build();
final int[] sortedValues = hash.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
final int[] ordMap = new int[valueCount];
@@ -126,7 +127,7 @@ class SortedDocValuesWriter extends DocV
new Iterable<BytesRef>() {
@Override
public Iterator<BytesRef> iterator() {
- return new ValuesIterator(sortedValues, valueCount);
+ return new ValuesIterator(sortedValues, valueCount, hash);
}
},
@@ -134,21 +135,23 @@ class SortedDocValuesWriter extends DocV
new Iterable<Number>() {
@Override
public Iterator<Number> iterator() {
- return new OrdsIterator(ordMap, maxDoc);
+ return new OrdsIterator(ordMap, maxDoc, ords);
}
});
}
// iterates over the unique values we have in ram
- private class ValuesIterator implements Iterator<BytesRef> {
+ private static class ValuesIterator implements Iterator<BytesRef> {
final int sortedValues[];
+ final BytesRefHash hash;
final BytesRef scratch = new BytesRef();
final int valueCount;
int ordUpto;
- ValuesIterator(int sortedValues[], int valueCount) {
+ ValuesIterator(int sortedValues[], int valueCount, BytesRefHash hash) {
this.sortedValues = sortedValues;
this.valueCount = valueCount;
+ this.hash = hash;
}
@Override
@@ -173,16 +176,17 @@ class SortedDocValuesWriter extends DocV
}
// iterates over the ords for each doc we have in ram
- private class OrdsIterator implements Iterator<Number> {
- final AppendingDeltaPackedLongBuffer.Iterator iter = pending.iterator();
+ private static class OrdsIterator implements Iterator<Number> {
+ final PackedLongValues.Iterator iter;
final int ordMap[];
final int maxDoc;
int docUpto;
- OrdsIterator(int ordMap[], int maxDoc) {
+ OrdsIterator(int ordMap[], int maxDoc, PackedLongValues ords) {
this.ordMap = ordMap;
this.maxDoc = maxDoc;
- assert pending.size() == maxDoc;
+ assert ords.size() == maxDoc;
+ this.iter = ords.iterator();
}
@Override
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SortedNumericDocValuesWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SortedNumericDocValuesWriter.java?rev=1607105&r1=1607104&r2=1607105&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SortedNumericDocValuesWriter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SortedNumericDocValuesWriter.java Tue Jul 1 14:59:52 2014
@@ -26,13 +26,13 @@ import org.apache.lucene.codecs.DocValue
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.RamUsageEstimator;
-import org.apache.lucene.util.packed.AppendingDeltaPackedLongBuffer;
import org.apache.lucene.util.packed.PackedInts;
+import org.apache.lucene.util.packed.PackedLongValues;
/** Buffers up pending long[] per doc, sorts, then flushes when segment flushes. */
class SortedNumericDocValuesWriter extends DocValuesWriter {
- private AppendingDeltaPackedLongBuffer pending; // stream of all values
- private AppendingDeltaPackedLongBuffer pendingCounts; // count of values per doc
+ private PackedLongValues.Builder pending; // stream of all values
+ private PackedLongValues.Builder pendingCounts; // count of values per doc
private final Counter iwBytesUsed;
private long bytesUsed; // this only tracks differences in 'pending' and 'pendingCounts'
private final FieldInfo fieldInfo;
@@ -43,8 +43,8 @@ class SortedNumericDocValuesWriter exten
public SortedNumericDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
this.fieldInfo = fieldInfo;
this.iwBytesUsed = iwBytesUsed;
- pending = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
- pendingCounts = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
+ pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
+ pendingCounts = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
bytesUsed = pending.ramBytesUsed() + pendingCounts.ramBytesUsed();
iwBytesUsed.addAndGet(bytesUsed);
}
@@ -105,13 +105,15 @@ class SortedNumericDocValuesWriter exten
public void flush(SegmentWriteState state, DocValuesConsumer dvConsumer) throws IOException {
final int maxDoc = state.segmentInfo.getDocCount();
assert pendingCounts.size() == maxDoc;
+ final PackedLongValues values = pending.build();
+ final PackedLongValues valueCounts = pendingCounts.build();
dvConsumer.addSortedNumericField(fieldInfo,
// doc -> valueCount
new Iterable<Number>() {
@Override
public Iterator<Number> iterator() {
- return new CountIterator();
+ return new CountIterator(valueCounts);
}
},
@@ -119,15 +121,19 @@ class SortedNumericDocValuesWriter exten
new Iterable<Number>() {
@Override
public Iterator<Number> iterator() {
- return new ValuesIterator();
+ return new ValuesIterator(values);
}
});
}
// iterates over the values for each doc we have in ram
- private class ValuesIterator implements Iterator<Number> {
- final AppendingDeltaPackedLongBuffer.Iterator iter = pending.iterator();
-
+ private static class ValuesIterator implements Iterator<Number> {
+ final PackedLongValues.Iterator iter;
+
+ ValuesIterator(PackedLongValues values) {
+ iter = values.iterator();
+ }
+
@Override
public boolean hasNext() {
return iter.hasNext();
@@ -147,9 +153,13 @@ class SortedNumericDocValuesWriter exten
}
}
- private class CountIterator implements Iterator<Number> {
- final AppendingDeltaPackedLongBuffer.Iterator iter = pendingCounts.iterator();
-
+ private static class CountIterator implements Iterator<Number> {
+ final PackedLongValues.Iterator iter;
+
+ CountIterator(PackedLongValues valueCounts) {
+ this.iter = valueCounts.iterator();
+ }
+
@Override
public boolean hasNext() {
return iter.hasNext();
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java?rev=1607105&r1=1607104&r2=1607105&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java Tue Jul 1 14:59:52 2014
@@ -32,16 +32,15 @@ import org.apache.lucene.util.BytesRefHa
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.RamUsageEstimator;
-import org.apache.lucene.util.packed.AppendingDeltaPackedLongBuffer;
-import org.apache.lucene.util.packed.AppendingPackedLongBuffer;
import org.apache.lucene.util.packed.PackedInts;
+import org.apache.lucene.util.packed.PackedLongValues;
/** Buffers up pending byte[]s per doc, deref and sorting via
* int ord, then flushes when segment flushes. */
class SortedSetDocValuesWriter extends DocValuesWriter {
final BytesRefHash hash;
- private AppendingPackedLongBuffer pending; // stream of all termIDs
- private AppendingDeltaPackedLongBuffer pendingCounts; // termIDs per doc
+ private PackedLongValues.Builder pending; // stream of all termIDs
+ private PackedLongValues.Builder pendingCounts; // termIDs per doc
private final Counter iwBytesUsed;
private long bytesUsed; // this only tracks differences in 'pending' and 'pendingCounts'
private final FieldInfo fieldInfo;
@@ -58,8 +57,8 @@ class SortedSetDocValuesWriter extends D
new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)),
BytesRefHash.DEFAULT_CAPACITY,
new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
- pending = new AppendingPackedLongBuffer(PackedInts.COMPACT);
- pendingCounts = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
+ pending = PackedLongValues.packedBuilder(PackedInts.COMPACT);
+ pendingCounts = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
bytesUsed = pending.ramBytesUsed() + pendingCounts.ramBytesUsed();
iwBytesUsed.addAndGet(bytesUsed);
}
@@ -152,6 +151,8 @@ class SortedSetDocValuesWriter extends D
final int maxCountPerDoc = maxCount;
assert pendingCounts.size() == maxDoc;
final int valueCount = hash.size();
+ final PackedLongValues ords = pending.build();
+ final PackedLongValues ordCounts = pendingCounts.build();
final int[] sortedValues = hash.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
final int[] ordMap = new int[valueCount];
@@ -166,7 +167,7 @@ class SortedSetDocValuesWriter extends D
new Iterable<BytesRef>() {
@Override
public Iterator<BytesRef> iterator() {
- return new ValuesIterator(sortedValues, valueCount);
+ return new ValuesIterator(sortedValues, valueCount, hash);
}
},
@@ -174,7 +175,7 @@ class SortedSetDocValuesWriter extends D
new Iterable<Number>() {
@Override
public Iterator<Number> iterator() {
- return new OrdCountIterator(maxDoc);
+ return new OrdCountIterator(maxDoc, ordCounts);
}
},
@@ -182,21 +183,23 @@ class SortedSetDocValuesWriter extends D
new Iterable<Number>() {
@Override
public Iterator<Number> iterator() {
- return new OrdsIterator(ordMap, maxCountPerDoc);
+ return new OrdsIterator(ordMap, maxCountPerDoc, ords, ordCounts);
}
});
}
// iterates over the unique values we have in ram
- private class ValuesIterator implements Iterator<BytesRef> {
+ private static class ValuesIterator implements Iterator<BytesRef> {
final int sortedValues[];
+ final BytesRefHash hash;
final BytesRef scratch = new BytesRef();
final int valueCount;
int ordUpto;
- ValuesIterator(int sortedValues[], int valueCount) {
+ ValuesIterator(int sortedValues[], int valueCount, BytesRefHash hash) {
this.sortedValues = sortedValues;
this.valueCount = valueCount;
+ this.hash = hash;
}
@Override
@@ -221,9 +224,9 @@ class SortedSetDocValuesWriter extends D
}
// iterates over the ords for each doc we have in ram
- private class OrdsIterator implements Iterator<Number> {
- final AppendingPackedLongBuffer.Iterator iter = pending.iterator();
- final AppendingDeltaPackedLongBuffer.Iterator counts = pendingCounts.iterator();
+ private static class OrdsIterator implements Iterator<Number> {
+ final PackedLongValues.Iterator iter;
+ final PackedLongValues.Iterator counts;
final int ordMap[];
final long numOrds;
long ordUpto;
@@ -232,10 +235,12 @@ class SortedSetDocValuesWriter extends D
int currentUpto;
int currentLength;
- OrdsIterator(int ordMap[], int maxCount) {
+ OrdsIterator(int ordMap[], int maxCount, PackedLongValues ords, PackedLongValues ordCounts) {
this.currentDoc = new int[maxCount];
this.ordMap = ordMap;
- this.numOrds = pending.size();
+ this.numOrds = ords.size();
+ this.iter = ords.iterator();
+ this.counts = ordCounts.iterator();
}
@Override
@@ -270,14 +275,15 @@ class SortedSetDocValuesWriter extends D
}
}
- private class OrdCountIterator implements Iterator<Number> {
- final AppendingDeltaPackedLongBuffer.Iterator iter = pendingCounts.iterator();
+ private static class OrdCountIterator implements Iterator<Number> {
+ final PackedLongValues.Iterator iter;
final int maxDoc;
int docUpto;
- OrdCountIterator(int maxDoc) {
+ OrdCountIterator(int maxDoc, PackedLongValues ordCounts) {
this.maxDoc = maxDoc;
- assert pendingCounts.size() == maxDoc;
+ assert ordCounts.size() == maxDoc;
+ this.iter = ordCounts.iterator();
}
@Override
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java?rev=1607105&r1=1607104&r2=1607105&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java Tue Jul 1 14:59:52 2014
@@ -46,8 +46,8 @@ import org.apache.lucene.util.FixedBitSe
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.GrowableWriter;
-import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
import org.apache.lucene.util.packed.PackedInts;
+import org.apache.lucene.util.packed.PackedLongValues;
/**
* Expert: The default cache implementation, storing all values in memory.
@@ -1159,11 +1159,11 @@ class FieldCacheImpl implements FieldCac
public static class SortedDocValuesImpl implements Accountable {
private final PagedBytes.Reader bytes;
- private final MonotonicAppendingLongBuffer termOrdToBytesOffset;
+ private final PackedLongValues termOrdToBytesOffset;
private final PackedInts.Reader docToTermOrd;
private final int numOrd;
- public SortedDocValuesImpl(PagedBytes.Reader bytes, MonotonicAppendingLongBuffer termOrdToBytesOffset, PackedInts.Reader docToTermOrd, int numOrd) {
+ public SortedDocValuesImpl(PagedBytes.Reader bytes, PackedLongValues termOrdToBytesOffset, PackedInts.Reader docToTermOrd, int numOrd) {
this.bytes = bytes;
this.docToTermOrd = docToTermOrd;
this.termOrdToBytesOffset = termOrdToBytesOffset;
@@ -1282,7 +1282,7 @@ class FieldCacheImpl implements FieldCac
startTermsBPV = 1;
}
- MonotonicAppendingLongBuffer termOrdToBytesOffset = new MonotonicAppendingLongBuffer();
+ PackedLongValues.Builder termOrdToBytesOffset = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
final GrowableWriter docToTermOrd = new GrowableWriter(startTermsBPV, maxDoc, acceptableOverheadRatio);
int termOrd = 0;
@@ -1315,10 +1315,9 @@ class FieldCacheImpl implements FieldCac
termOrd++;
}
}
- termOrdToBytesOffset.freeze();
// maybe an int-only impl?
- return new SortedDocValuesImpl(bytes.freeze(true), termOrdToBytesOffset, docToTermOrd.getMutable(), termOrd);
+ return new SortedDocValuesImpl(bytes.freeze(true), termOrdToBytesOffset.build(), docToTermOrd.getMutable(), termOrd);
}
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/PForDeltaDocIdSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/PForDeltaDocIdSet.java?rev=1607105&r1=1607104&r2=1607105&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/PForDeltaDocIdSet.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/PForDeltaDocIdSet.java Tue Jul 1 14:59:52 2014
@@ -22,8 +22,8 @@ import java.util.Arrays;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
import org.apache.lucene.util.packed.PackedInts;
+import org.apache.lucene.util.packed.PackedLongValues;
/**
* {@link DocIdSet} implementation based on pfor-delta encoding.
@@ -43,14 +43,12 @@ public final class PForDeltaDocIdSet ext
static final int[] ITERATIONS = new int[32];
static final int[] BYTE_BLOCK_COUNTS = new int[32];
static final int MAX_BYTE_BLOCK_COUNT;
- static final MonotonicAppendingLongBuffer SINGLE_ZERO_BUFFER = new MonotonicAppendingLongBuffer(0, 64, PackedInts.COMPACT);
- static final PForDeltaDocIdSet EMPTY = new PForDeltaDocIdSet(null, 0, Integer.MAX_VALUE, SINGLE_ZERO_BUFFER, SINGLE_ZERO_BUFFER);
+ static final PackedLongValues SINGLE_ZERO = PackedLongValues.packedBuilder(PackedInts.COMPACT).add(0L).build();
+ static final PForDeltaDocIdSet EMPTY = new PForDeltaDocIdSet(null, 0, Integer.MAX_VALUE, SINGLE_ZERO, SINGLE_ZERO);
static final int LAST_BLOCK = 1 << 5; // flag to indicate the last block
static final int HAS_EXCEPTIONS = 1 << 6;
static final int UNARY = 1 << 7;
static {
- SINGLE_ZERO_BUFFER.add(0);
- SINGLE_ZERO_BUFFER.freeze();
int maxByteBLockCount = 0;
for (int i = 1; i < ITERATIONS.length; ++i) {
DECODERS[i] = PackedInts.getDecoder(PackedInts.Format.PACKED, PackedInts.VERSION_CURRENT, i);
@@ -282,20 +280,19 @@ public final class PForDeltaDocIdSet ext
final byte[] dataArr = Arrays.copyOf(data.bytes, data.length + MAX_BYTE_BLOCK_COUNT);
final int indexSize = (numBlocks - 1) / indexInterval + 1;
- final MonotonicAppendingLongBuffer docIDs, offsets;
+ final PackedLongValues docIDs, offsets;
if (indexSize <= 1) {
- docIDs = offsets = SINGLE_ZERO_BUFFER;
+ docIDs = offsets = SINGLE_ZERO;
} else {
final int pageSize = 128;
- final int initialPageCount = (indexSize + pageSize - 1) / pageSize;
- docIDs = new MonotonicAppendingLongBuffer(initialPageCount, pageSize, PackedInts.COMPACT);
- offsets = new MonotonicAppendingLongBuffer(initialPageCount, pageSize, PackedInts.COMPACT);
+ final PackedLongValues.Builder docIDsBuilder = PackedLongValues.monotonicBuilder(pageSize, PackedInts.COMPACT);
+ final PackedLongValues.Builder offsetsBuilder = PackedLongValues.monotonicBuilder(pageSize, PackedInts.COMPACT);
// Now build the index
- final Iterator it = new Iterator(dataArr, cardinality, Integer.MAX_VALUE, SINGLE_ZERO_BUFFER, SINGLE_ZERO_BUFFER);
+ final Iterator it = new Iterator(dataArr, cardinality, Integer.MAX_VALUE, SINGLE_ZERO, SINGLE_ZERO);
index:
for (int k = 0; k < indexSize; ++k) {
- docIDs.add(it.docID() + 1);
- offsets.add(it.offset);
+ docIDsBuilder.add(it.docID() + 1);
+ offsetsBuilder.add(it.offset);
for (int i = 0; i < indexInterval; ++i) {
it.skipBlock();
if (it.docID() == DocIdSetIterator.NO_MORE_DOCS) {
@@ -303,8 +300,8 @@ public final class PForDeltaDocIdSet ext
}
}
}
- docIDs.freeze();
- offsets.freeze();
+ docIDs = docIDsBuilder.build();
+ offsets = offsetsBuilder.build();
}
return new PForDeltaDocIdSet(dataArr, cardinality, indexInterval, docIDs, offsets);
@@ -313,10 +310,10 @@ public final class PForDeltaDocIdSet ext
}
final byte[] data;
- final MonotonicAppendingLongBuffer docIDs, offsets; // for the index
+ final PackedLongValues docIDs, offsets; // for the index
final int cardinality, indexInterval;
- PForDeltaDocIdSet(byte[] data, int cardinality, int indexInterval, MonotonicAppendingLongBuffer docIDs, MonotonicAppendingLongBuffer offsets) {
+ PForDeltaDocIdSet(byte[] data, int cardinality, int indexInterval, PackedLongValues docIDs, PackedLongValues offsets) {
this.data = data;
this.cardinality = cardinality;
this.indexInterval = indexInterval;
@@ -342,7 +339,7 @@ public final class PForDeltaDocIdSet ext
// index
final int indexInterval;
- final MonotonicAppendingLongBuffer docIDs, offsets;
+ final PackedLongValues docIDs, offsets;
final int cardinality;
final byte[] data;
@@ -356,7 +353,7 @@ public final class PForDeltaDocIdSet ext
int blockIdx;
int docID;
- Iterator(byte[] data, int cardinality, int indexInterval, MonotonicAppendingLongBuffer docIDs, MonotonicAppendingLongBuffer offsets) {
+ Iterator(byte[] data, int cardinality, int indexInterval, PackedLongValues docIDs, PackedLongValues offsets) {
this.data = data;
this.cardinality = cardinality;
this.indexInterval = indexInterval;
@@ -519,10 +516,10 @@ public final class PForDeltaDocIdSet ext
return 0L;
}
long ramBytesUsed = BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(data);
- if (docIDs != SINGLE_ZERO_BUFFER) {
+ if (docIDs != SINGLE_ZERO) {
ramBytesUsed += docIDs.ramBytesUsed();
}
- if (offsets != SINGLE_ZERO_BUFFER) {
+ if (offsets != SINGLE_ZERO) {
ramBytesUsed += offsets.ramBytesUsed();
}
return ramBytesUsed;
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java?rev=1607105&r1=1607104&r2=1607105&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java Tue Jul 1 14:59:52 2014
@@ -475,4 +475,20 @@ public final class RamUsageEstimator {
return bytes + " bytes";
}
}
+
+ /**
+ * Return the size of the provided array of {@link Accountable}s by summing
+ * up the shallow size of the array and the
+ * {@link Accountable#ramBytesUsed() memory usage} reported by each
+ * {@link Accountable}.
+ */
+ public static long sizeOf(Accountable[] accountables) {
+ long size = shallowSizeOf(accountables);
+ for (Accountable accountable : accountables) {
+ if (accountable != null) {
+ size += accountable.ramBytesUsed();
+ }
+ }
+ return size;
+ }
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/WAH8DocIdSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/WAH8DocIdSet.java?rev=1607105&r1=1607104&r2=1607105&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/WAH8DocIdSet.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/WAH8DocIdSet.java Tue Jul 1 14:59:52 2014
@@ -26,8 +26,8 @@ import org.apache.lucene.search.DocIdSet
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.DataInput;
-import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
import org.apache.lucene.util.packed.PackedInts;
+import org.apache.lucene.util.packed.PackedLongValues;
/**
* {@link DocIdSet} implementation based on word-aligned hybrid encoding on
@@ -88,13 +88,8 @@ public final class WAH8DocIdSet extends
/** Default index interval. */
public static final int DEFAULT_INDEX_INTERVAL = 24;
- private static final MonotonicAppendingLongBuffer SINGLE_ZERO_BUFFER = new MonotonicAppendingLongBuffer(1, 64, PackedInts.COMPACT);
- private static WAH8DocIdSet EMPTY = new WAH8DocIdSet(new byte[0], 0, 1, SINGLE_ZERO_BUFFER, SINGLE_ZERO_BUFFER);
-
- static {
- SINGLE_ZERO_BUFFER.add(0L);
- SINGLE_ZERO_BUFFER.freeze();
- }
+ private static final PackedLongValues SINGLE_ZERO = PackedLongValues.packedBuilder(PackedInts.COMPACT).add(0L).build();
+ private static WAH8DocIdSet EMPTY = new WAH8DocIdSet(new byte[0], 0, 1, SINGLE_ZERO, SINGLE_ZERO);
private static final Comparator<Iterator> SERIALIZED_LENGTH_COMPARATOR = new Comparator<Iterator>() {
@Override
@@ -377,18 +372,17 @@ public final class WAH8DocIdSet extends
// Now build the index
final int valueCount = (numSequences - 1) / indexInterval + 1;
- final MonotonicAppendingLongBuffer indexPositions, indexWordNums;
+ final PackedLongValues indexPositions, indexWordNums;
if (valueCount <= 1) {
- indexPositions = indexWordNums = SINGLE_ZERO_BUFFER;
+ indexPositions = indexWordNums = SINGLE_ZERO;
} else {
final int pageSize = 128;
- final int initialPageCount = (valueCount + pageSize - 1) / pageSize;
- final MonotonicAppendingLongBuffer positions = new MonotonicAppendingLongBuffer(initialPageCount, pageSize, PackedInts.COMPACT);
- final MonotonicAppendingLongBuffer wordNums = new MonotonicAppendingLongBuffer(initialPageCount, pageSize, PackedInts.COMPACT);
+ final PackedLongValues.Builder positions = PackedLongValues.monotonicBuilder(pageSize, PackedInts.COMPACT);
+ final PackedLongValues.Builder wordNums = PackedLongValues.monotonicBuilder(pageSize, PackedInts.COMPACT);
positions.add(0L);
wordNums.add(0L);
- final Iterator it = new Iterator(data, cardinality, Integer.MAX_VALUE, SINGLE_ZERO_BUFFER, SINGLE_ZERO_BUFFER);
+ final Iterator it = new Iterator(data, cardinality, Integer.MAX_VALUE, SINGLE_ZERO, SINGLE_ZERO);
assert it.in.getPosition() == 0;
assert it.wordNum == -1;
for (int i = 1; i < valueCount; ++i) {
@@ -403,10 +397,8 @@ public final class WAH8DocIdSet extends
positions.add(position);
wordNums.add(wordNum + 1);
}
- positions.freeze();
- wordNums.freeze();
- indexPositions = positions;
- indexWordNums = wordNums;
+ indexPositions = positions.build();
+ indexWordNums = wordNums.build();
}
return new WAH8DocIdSet(data, cardinality, indexInterval, indexPositions, indexWordNums);
@@ -476,9 +468,9 @@ public final class WAH8DocIdSet extends
private final int cardinality;
private final int indexInterval;
// index for advance(int)
- private final MonotonicAppendingLongBuffer positions, wordNums; // wordNums[i] starts at the sequence at positions[i]
+ private final PackedLongValues positions, wordNums; // wordNums[i] starts at the sequence at positions[i]
- WAH8DocIdSet(byte[] data, int cardinality, int indexInterval, MonotonicAppendingLongBuffer positions, MonotonicAppendingLongBuffer wordNums) {
+ WAH8DocIdSet(byte[] data, int cardinality, int indexInterval, PackedLongValues positions, PackedLongValues wordNums) {
this.data = data;
this.cardinality = cardinality;
this.indexInterval = indexInterval;
@@ -530,7 +522,7 @@ public final class WAH8DocIdSet extends
final ByteArrayDataInput in;
final int cardinality;
final int indexInterval;
- final MonotonicAppendingLongBuffer positions, wordNums;
+ final PackedLongValues positions, wordNums;
final int indexThreshold;
int allOnesLength;
int dirtyLength;
@@ -542,7 +534,7 @@ public final class WAH8DocIdSet extends
int docID;
- Iterator(byte[] data, int cardinality, int indexInterval, MonotonicAppendingLongBuffer positions, MonotonicAppendingLongBuffer wordNums) {
+ Iterator(byte[] data, int cardinality, int indexInterval, PackedLongValues positions, PackedLongValues wordNums) {
this.in = new ByteArrayDataInput(data);
this.cardinality = cardinality;
this.indexInterval = indexInterval;
@@ -744,10 +736,10 @@ public final class WAH8DocIdSet extends
return 0L;
}
long ramBytesUsed = BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(data);
- if (positions != SINGLE_ZERO_BUFFER) {
+ if (positions != SINGLE_ZERO) {
ramBytesUsed += positions.ramBytesUsed();
}
- if (wordNums != SINGLE_ZERO_BUFFER) {
+ if (wordNums != SINGLE_ZERO) {
ramBytesUsed += wordNums.ramBytesUsed();
}
return ramBytesUsed;
Added: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/DeltaPackedLongValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/DeltaPackedLongValues.java?rev=1607105&view=auto
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/DeltaPackedLongValues.java (added)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/DeltaPackedLongValues.java Tue Jul 1 14:59:52 2014
@@ -0,0 +1,103 @@
+package org.apache.lucene.util.packed;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Arrays;
+
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.packed.PackedInts.Reader;
+
+class DeltaPackedLongValues extends PackedLongValues {
+
+ private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(DeltaPackedLongValues.class);
+
+ final long[] mins;
+
+ DeltaPackedLongValues(int pageShift, int pageMask, Reader[] values, long[] mins, long size, long ramBytesUsed) {
+ super(pageShift, pageMask, values, size, ramBytesUsed);
+ assert values.length == mins.length;
+ this.mins = mins;
+ }
+
+ @Override
+ long get(int block, int element) {
+ return mins[block] + values[block].get(element);
+ }
+
+ @Override
+ int decodeBlock(int block, long[] dest) {
+ final int count = super.decodeBlock(block, dest);
+ final long min = mins[block];
+ for (int i = 0; i < count; ++i) {
+ dest[i] += min;
+ }
+ return count;
+ }
+
+ static class Builder extends PackedLongValues.Builder {
+
+ private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(Builder.class);
+
+ long[] mins;
+
+ Builder(int pageSize, float acceptableOverheadRatio) {
+ super(pageSize, acceptableOverheadRatio);
+ mins = new long[values.length];
+ ramBytesUsed += RamUsageEstimator.sizeOf(mins);
+ }
+
+ @Override
+ long baseRamBytesUsed() {
+ return BASE_RAM_BYTES_USED;
+ }
+
+ @Override
+ public DeltaPackedLongValues build() {
+ finish();
+ pending = null;
+ final PackedInts.Reader[] values = Arrays.copyOf(this.values, valuesOff);
+ final long[] mins = Arrays.copyOf(this.mins, valuesOff);
+ final long ramBytesUsed = DeltaPackedLongValues.BASE_RAM_BYTES_USED
+ + RamUsageEstimator.sizeOf(values) + RamUsageEstimator.sizeOf(mins);
+ return new DeltaPackedLongValues(pageShift, pageMask, values, mins, size, ramBytesUsed);
+ }
+
+ @Override
+ void pack(long[] values, int numValues, int block, float acceptableOverheadRatio) {
+ long min = values[0];
+ for (int i = 1; i < numValues; ++i) {
+ min = Math.min(min, values[i]);
+ }
+ for (int i = 0; i < numValues; ++i) {
+ values[i] -= min;
+ }
+ super.pack(values, numValues, block, acceptableOverheadRatio);
+ mins[block] = min;
+ }
+
+ @Override
+ void grow(int newBlockCount) {
+ super.grow(newBlockCount);
+ ramBytesUsed -= RamUsageEstimator.sizeOf(mins);
+ mins = Arrays.copyOf(mins, newBlockCount);
+ ramBytesUsed += RamUsageEstimator.sizeOf(mins);
+ }
+
+ }
+
+}
Added: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicLongValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicLongValues.java?rev=1607105&view=auto
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicLongValues.java (added)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicLongValues.java Tue Jul 1 14:59:52 2014
@@ -0,0 +1,105 @@
+package org.apache.lucene.util.packed;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Arrays;
+
+import static org.apache.lucene.util.packed.MonotonicBlockPackedReader.expected;
+
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.packed.DeltaPackedLongValues.Builder;
+import org.apache.lucene.util.packed.PackedInts.Reader;
+
+class MonotonicLongValues extends DeltaPackedLongValues {
+
+ private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(MonotonicLongValues.class);
+
+ final float[] averages;
+
+ MonotonicLongValues(int pageShift, int pageMask, Reader[] values, long[] mins, float[] averages, long size, long ramBytesUsed) {
+ super(pageShift, pageMask, values, mins, size, ramBytesUsed);
+ assert values.length == averages.length;
+ this.averages = averages;
+ }
+
+ @Override
+ long get(int block, int element) {
+ return expected(mins[block], averages[block], element) + values[block].get(element);
+ }
+
+ @Override
+ int decodeBlock(int block, long[] dest) {
+ final int count = super.decodeBlock(block, dest);
+ final float average = averages[block];
+ for (int i = 0; i < count; ++i) {
+ dest[i] += expected(0, average, i);
+ }
+ return count;
+ }
+
+ static class Builder extends DeltaPackedLongValues.Builder {
+
+ private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(Builder.class);
+
+ float[] averages;
+
+ Builder(int pageSize, float acceptableOverheadRatio) {
+ super(pageSize, acceptableOverheadRatio);
+ averages = new float[values.length];
+ ramBytesUsed += RamUsageEstimator.sizeOf(averages);
+ }
+
+ @Override
+ long baseRamBytesUsed() {
+ return BASE_RAM_BYTES_USED;
+ }
+
+ @Override
+ public MonotonicLongValues build() {
+ finish();
+ pending = null;
+ final PackedInts.Reader[] values = Arrays.copyOf(this.values, valuesOff);
+ final long[] mins = Arrays.copyOf(this.mins, valuesOff);
+ final float[] averages = Arrays.copyOf(this.averages, valuesOff);
+ final long ramBytesUsed = MonotonicLongValues.BASE_RAM_BYTES_USED
+ + RamUsageEstimator.sizeOf(values) + RamUsageEstimator.sizeOf(mins)
+ + RamUsageEstimator.sizeOf(averages);
+ return new MonotonicLongValues(pageShift, pageMask, values, mins, averages, size, ramBytesUsed);
+ }
+
+ @Override
+ void pack(long[] values, int numValues, int block, float acceptableOverheadRatio) {
+ final float average = numValues == 1 ? 0 : (float) (values[numValues - 1] - values[0]) / (numValues - 1);
+ for (int i = 0; i < numValues; ++i) {
+ values[i] -= expected(0, average, i);
+ }
+ super.pack(values, numValues, block, acceptableOverheadRatio);
+ averages[block] = average;
+ }
+
+ @Override
+ void grow(int newBlockCount) {
+ super.grow(newBlockCount);
+ ramBytesUsed -= RamUsageEstimator.sizeOf(averages);
+ averages = Arrays.copyOf(averages, newBlockCount);
+ ramBytesUsed += RamUsageEstimator.sizeOf(averages);
+ }
+
+ }
+
+}
Added: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/PackedLongValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/PackedLongValues.java?rev=1607105&view=auto
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/PackedLongValues.java (added)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/PackedLongValues.java Tue Jul 1 14:59:52 2014
@@ -0,0 +1,282 @@
+package org.apache.lucene.util.packed;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import static org.apache.lucene.util.packed.PackedInts.checkBlockSize;
+
+import java.util.Arrays;
+
+import org.apache.lucene.util.Accountable;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.LongValues;
+import org.apache.lucene.util.RamUsageEstimator;
+
+/**
+ * Utility class to compress integers into a {@link LongValues} instance.
+ */
+public class PackedLongValues extends LongValues implements Accountable {
+
+ private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(PackedLongValues.class);
+
+ static final int DEFAULT_PAGE_SIZE = 1024;
+ static final int MIN_PAGE_SIZE = 64;
+ // More than 1M doesn't really makes sense with these appending buffers
+ // since their goal is to try to have small numbers of bits per value
+ static final int MAX_PAGE_SIZE = 1 << 20;
+
+ /** Return a new {@link Builder} that will compress efficiently positive integers. */
+ public static PackedLongValues.Builder packedBuilder(int pageSize, float acceptableOverheadRatio) {
+ return new PackedLongValues.Builder(pageSize, acceptableOverheadRatio);
+ }
+
+ /** @see #packedBuilder(int, float) */
+ public static PackedLongValues.Builder packedBuilder(float acceptableOverheadRatio) {
+ return packedBuilder(DEFAULT_PAGE_SIZE, acceptableOverheadRatio);
+ }
+
+ /** Return a new {@link Builder} that will compress efficiently integers that
+ * are close to each other. */
+ public static PackedLongValues.Builder deltaPackedBuilder(int pageSize, float acceptableOverheadRatio) {
+ return new DeltaPackedLongValues.Builder(pageSize, acceptableOverheadRatio);
+ }
+
+ /** @see #deltaPackedBuilder(int, float) */
+ public static PackedLongValues.Builder deltaPackedBuilder(float acceptableOverheadRatio) {
+ return deltaPackedBuilder(DEFAULT_PAGE_SIZE, acceptableOverheadRatio);
+ }
+
+ /** Return a new {@link Builder} that will compress efficiently integers that
+ * would be a monotonic function of their index. */
+ public static PackedLongValues.Builder monotonicBuilder(int pageSize, float acceptableOverheadRatio) {
+ return new MonotonicLongValues.Builder(pageSize, acceptableOverheadRatio);
+ }
+
+ /** @see #monotonicBuilder(int, float) */
+ public static PackedLongValues.Builder monotonicBuilder(float acceptableOverheadRatio) {
+ return monotonicBuilder(DEFAULT_PAGE_SIZE, acceptableOverheadRatio);
+ }
+
+ final PackedInts.Reader[] values;
+ final int pageShift, pageMask;
+ private final long size;
+ private final long ramBytesUsed;
+
+ PackedLongValues(int pageShift, int pageMask, PackedInts.Reader[] values, long size, long ramBytesUsed) {
+ this.pageShift = pageShift;
+ this.pageMask = pageMask;
+ this.values = values;
+ this.size = size;
+ this.ramBytesUsed = ramBytesUsed;
+ }
+
+ /** Get the number of values in this array. */
+ public final long size() {
+ return size;
+ }
+
+ int decodeBlock(int block, long[] dest) {
+ final PackedInts.Reader vals = values[block];
+ final int size = vals.size();
+ for (int k = 0; k < size; ) {
+ k += vals.get(k, dest, k, size - k);
+ }
+ return size;
+ }
+
+ long get(int block, int element) {
+ return values[block].get(element);
+ }
+
+ @Override
+ public final long get(long index) {
+ assert index >= 0 && index < size();
+ final int block = (int) (index >> pageShift);
+ final int element = (int) (index & pageMask);
+ return get(block, element);
+ }
+
+ @Override
+ public long ramBytesUsed() {
+ return ramBytesUsed;
+ }
+
+ /** Return an iterator over the values of this array. */
+ public Iterator iterator() {
+ return new Iterator();
+ }
+
+ /** An iterator over long values. */
+ final public class Iterator {
+
+ final long[] currentValues;
+ int vOff, pOff;
+ int currentCount; // number of entries of the current page
+
+ Iterator() {
+ currentValues = new long[pageMask + 1];
+ vOff = pOff = 0;
+ fillBlock();
+ }
+
+ private void fillBlock() {
+ if (vOff == values.length) {
+ currentCount = 0;
+ } else {
+ currentCount = decodeBlock(vOff, currentValues);
+ assert currentCount > 0;
+ }
+ }
+
+ /** Whether or not there are remaining values. */
+ public final boolean hasNext() {
+ return pOff < currentCount;
+ }
+
+ /** Return the next long in the buffer. */
+ public final long next() {
+ assert hasNext();
+ long result = currentValues[pOff++];
+ if (pOff == currentCount) {
+ vOff += 1;
+ pOff = 0;
+ fillBlock();
+ }
+ return result;
+ }
+
+ }
+
+ /** A Builder for a {@link PackedLongValues} instance. */
+ public static class Builder implements Accountable {
+
+ private static final int INITIAL_PAGE_COUNT = 16;
+ private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(Builder.class);
+
+ final int pageShift, pageMask;
+ final float acceptableOverheadRatio;
+ long[] pending;
+ long size;
+
+ PackedInts.Reader[] values;
+ long ramBytesUsed;
+ int valuesOff;
+ int pendingOff;
+
+ Builder(int pageSize, float acceptableOverheadRatio) {
+ pageShift = checkBlockSize(pageSize, MIN_PAGE_SIZE, MAX_PAGE_SIZE);
+ pageMask = pageSize - 1;
+ this.acceptableOverheadRatio = acceptableOverheadRatio;
+ values = new PackedInts.Reader[INITIAL_PAGE_COUNT];
+ pending = new long[pageSize];
+ valuesOff = 0;
+ pendingOff = 0;
+ size = 0;
+ ramBytesUsed = baseRamBytesUsed() + RamUsageEstimator.sizeOf(pending) + RamUsageEstimator.shallowSizeOf(values);
+ }
+
+ /** Build a {@link PackedLongValues} instance that contains values that
+ * have been added to this builder. This operation is destructive. */
+ public PackedLongValues build() {
+ finish();
+ pending = null;
+ final PackedInts.Reader[] values = Arrays.copyOf(this.values, valuesOff);
+ final long ramBytesUsed = PackedLongValues.BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(values);
+ return new PackedLongValues(pageShift, pageMask, values, size, ramBytesUsed);
+ }
+
+ long baseRamBytesUsed() {
+ return BASE_RAM_BYTES_USED;
+ }
+
+ @Override
+ public final long ramBytesUsed() {
+ return ramBytesUsed;
+ }
+
+ /** Return the number of elements that have been added to this builder. */
+ public final long size() {
+ return size;
+ }
+
+ /** Add a new element to this builder. */
+ public Builder add(long l) {
+ if (pending == null) {
+ throw new IllegalStateException("Cannot be reused after build()");
+ }
+ if (pendingOff == pending.length) {
+ // check size
+ if (values.length == valuesOff) {
+ final int newLength = ArrayUtil.oversize(valuesOff + 1, 8);
+ grow(newLength);
+ }
+ pack();
+ }
+ pending[pendingOff++] = l;
+ size += 1;
+ return this;
+ }
+
+ final void finish() {
+ if (pendingOff > 0) {
+ if (values.length == valuesOff) {
+ grow(valuesOff + 1);
+ }
+ pack();
+ }
+ }
+
+ private void pack() {
+ pack(pending, pendingOff, valuesOff, acceptableOverheadRatio);
+ ramBytesUsed += values[valuesOff].ramBytesUsed();
+ valuesOff += 1;
+ // reset pending buffer
+ pendingOff = 0;
+ }
+
+ void pack(long[] values, int numValues, int block, float acceptableOverheadRatio) {
+ assert numValues > 0;
+ // compute max delta
+ long minValue = values[0];
+ long maxValue = values[0];
+ for (int i = 1; i < numValues; ++i) {
+ minValue = Math.min(minValue, values[i]);
+ maxValue = Math.max(maxValue, values[i]);
+ }
+
+ // build a new packed reader
+ if (minValue == 0 && maxValue == 0) {
+ this.values[block] = new PackedInts.NullReader(numValues);
+ } else {
+ final int bitsRequired = minValue < 0 ? 64 : PackedInts.bitsRequired(maxValue);
+ final PackedInts.Mutable mutable = PackedInts.getMutable(numValues, bitsRequired, acceptableOverheadRatio);
+ for (int i = 0; i < numValues; ) {
+ i += mutable.set(i, values, i, numValues - i);
+ }
+ this.values[block] = mutable;
+ }
+ }
+
+ void grow(int newBlockCount) {
+ ramBytesUsed -= RamUsageEstimator.shallowSizeOf(values);
+ values = Arrays.copyOf(values, newBlockCount);
+ ramBytesUsed += RamUsageEstimator.shallowSizeOf(values);
+ }
+
+ }
+
+}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/PagedGrowableWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/PagedGrowableWriter.java?rev=1607105&r1=1607104&r2=1607105&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/PagedGrowableWriter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/PagedGrowableWriter.java Tue Jul 1 14:59:52 2014
@@ -23,7 +23,7 @@ import org.apache.lucene.util.packed.Pac
/**
* A {@link PagedGrowableWriter}. This class slices data into fixed-size blocks
* which have independent numbers of bits per value and grow on-demand.
- * <p>You should use this class instead of the {@link AbstractAppendingLongBuffer} related ones only when
+ * <p>You should use this class instead of the {@link PackedLongValues} related ones only when
* you need random write-access. Otherwise this class will likely be slower and
* less memory-efficient.
* @lucene.internal
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/package.html?rev=1607105&r1=1607104&r2=1607105&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/package.html (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/package.html Tue Jul 1 14:59:52 2014
@@ -50,19 +50,19 @@
<li><b>{@link org.apache.lucene.util.packed.PagedGrowableWriter}</b><ul>
<li>Slices data into fixed-size blocks stored in GrowableWriters.</li>
<li>Supports more than 2B values.</li>
- <li>You should use Appending(Delta)PackedLongBuffer instead if you don't need random write access.</li>
+ <li>You should use PackedLongValues instead if you don't need random write access.</li>
</ul></li>
- <li><b>{@link org.apache.lucene.util.packed.AppendingDeltaPackedLongBuffer}</b><ul>
+ <li><b>{@link org.apache.lucene.util.packed.PackedLongValues#deltaPackedBuilder}</b><ul>
<li>Can store any sequence of longs.</li>
<li>Compression is good when values are close to each other.</li>
<li>Supports random reads, but only sequential writes.</li>
<li>Can address up to 2^42 values.</li>
</ul></li>
- <li><b>{@link org.apache.lucene.util.packed.AppendingPackedLongBuffer}</b><ul>
- <li>Same as AppendingDeltaPackedLongBuffer but assumes values are 0-based.</li>
+ <li><b>{@link org.apache.lucene.util.packed.PackedLongValues#packedBuilder}</b><ul>
+ <li>Same as deltaPackedBuilder but assumes values are 0-based.</li>
</ul></li>
- <li><b>{@link org.apache.lucene.util.packed.MonotonicAppendingLongBuffer}</b><ul>
- <li>Same as AppendingDeltaPackedLongBuffer except that compression is good when the stream is a succession of affine functions.</li>
+ <li><b>{@link org.apache.lucene.util.packed.PackedLongValues#monotonicBuilder}</b><ul>
+ <li>Same as deltaPackedBuilder except that compression is good when the stream is a succession of affine functions.</li>
</ul></li>
</ul>
Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java?rev=1607105&r1=1607104&r2=1607105&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java Tue Jul 1 14:59:52 2014
@@ -34,10 +34,10 @@ import org.apache.lucene.store.IOContext
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.LongValues;
import org.apache.lucene.util.LongsRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.Slow;
-import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.RamUsageTester;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.packed.PackedInts.Reader;
@@ -701,7 +701,7 @@ public class TestPackedInts extends Luce
assertEquals(0, writer.size());
// compare against AppendingDeltaPackedLongBuffer
- AppendingDeltaPackedLongBuffer buf = new AppendingDeltaPackedLongBuffer();
+ PackedLongValues.Builder buf = PackedLongValues.deltaPackedBuilder(random().nextFloat());
int size = random().nextInt(1000000);
long max = 5;
for (int i = 0; i < size; ++i) {
@@ -712,11 +712,12 @@ public class TestPackedInts extends Luce
}
writer = new PagedGrowableWriter(size, pageSize, TestUtil.nextInt(random(), 1, 64), random().nextFloat());
assertEquals(size, writer.size());
+ final LongValues values = buf.build();
for (int i = size - 1; i >= 0; --i) {
- writer.set(i, buf.get(i));
+ writer.set(i, values.get(i));
}
for (int i = 0; i < size; ++i) {
- assertEquals(buf.get(i), writer.get(i));
+ assertEquals(values.get(i), writer.get(i));
}
// test ramBytesUsed
@@ -752,7 +753,7 @@ public class TestPackedInts extends Luce
assertEquals(0, writer.size());
// compare against AppendingDeltaPackedLongBuffer
- AppendingDeltaPackedLongBuffer buf = new AppendingDeltaPackedLongBuffer();
+ PackedLongValues.Builder buf = PackedLongValues.deltaPackedBuilder(random().nextFloat());
int size = random().nextInt(1000000);
for (int i = 0; i < size; ++i) {
@@ -760,11 +761,12 @@ public class TestPackedInts extends Luce
}
writer = new PagedMutable(size, pageSize, bitsPerValue, random().nextFloat());
assertEquals(size, writer.size());
+ final LongValues values = buf.build();
for (int i = size - 1; i >= 0; --i) {
- writer.set(i, buf.get(i));
+ writer.set(i, values.get(i));
}
for (int i = 0; i < size; ++i) {
- assertEquals(buf.get(i), writer.get(i));
+ assertEquals(values.get(i), writer.get(i));
}
// test ramBytesUsed
@@ -960,29 +962,46 @@ public class TestPackedInts extends Luce
MONOTONIC
}
+ public void testPackedLongValuesOnZeros() {
+ // Make sure that when all values are the same, they use 0 bits per value
+ final int pageSize = 1 << TestUtil.nextInt(random(), 6, 20);
+ final float acceptableOverheadRatio = random().nextFloat();
+
+ assertEquals(
+ PackedLongValues.packedBuilder(pageSize, acceptableOverheadRatio).add(0).build().ramBytesUsed(),
+ PackedLongValues.packedBuilder(pageSize, acceptableOverheadRatio).add(0).add(0).build().ramBytesUsed());
+
+ final long l = random().nextLong();
+ assertEquals(
+ PackedLongValues.deltaPackedBuilder(pageSize, acceptableOverheadRatio).add(l).build().ramBytesUsed(),
+ PackedLongValues.deltaPackedBuilder(pageSize, acceptableOverheadRatio).add(l).add(l).build().ramBytesUsed());
+
+ final long avg = random().nextInt(100);
+ assertEquals(
+ PackedLongValues.monotonicBuilder(pageSize, acceptableOverheadRatio).add(l).add(l + avg).build().ramBytesUsed(),
+ PackedLongValues.monotonicBuilder(pageSize, acceptableOverheadRatio).add(l).add(l + avg).add(l + 2 * avg).build().ramBytesUsed());
+ }
- public void testAppendingLongBuffer() {
-
+ public void testPackedLongValues() {
final long[] arr = new long[RandomInts.randomIntBetween(random(), 1, 1000000)];
float[] ratioOptions = new float[]{PackedInts.DEFAULT, PackedInts.COMPACT, PackedInts.FAST};
for (int bpv : new int[]{0, 1, 63, 64, RandomInts.randomIntBetween(random(), 2, 62)}) {
- for (DataType dataType : DataType.values()) {
+ for (DataType dataType : Arrays.asList(DataType.DELTA_PACKED)) {
final int pageSize = 1 << TestUtil.nextInt(random(), 6, 20);
- final int initialPageCount = TestUtil.nextInt(random(), 0, 16);
float acceptableOverheadRatio = ratioOptions[TestUtil.nextInt(random(), 0, ratioOptions.length - 1)];
- AbstractAppendingLongBuffer buf;
+ PackedLongValues.Builder buf;
final int inc;
switch (dataType) {
case PACKED:
- buf = new AppendingPackedLongBuffer(initialPageCount, pageSize, acceptableOverheadRatio);
+ buf = PackedLongValues.packedBuilder(pageSize, acceptableOverheadRatio);
inc = 0;
break;
case DELTA_PACKED:
- buf = new AppendingDeltaPackedLongBuffer(initialPageCount, pageSize, acceptableOverheadRatio);
+ buf = PackedLongValues.deltaPackedBuilder(pageSize, acceptableOverheadRatio);
inc = 0;
break;
case MONOTONIC:
- buf = new MonotonicAppendingLongBuffer(initialPageCount, pageSize, acceptableOverheadRatio);
+ buf = PackedLongValues.monotonicBuilder(pageSize, acceptableOverheadRatio);
inc = TestUtil.nextInt(random(), -1000, 1000);
break;
default:
@@ -1008,22 +1027,27 @@ public class TestPackedInts extends Luce
for (int i = 0; i < arr.length; ++i) {
buf.add(arr[i]);
- }
- assertEquals(arr.length, buf.size());
- if (random().nextBoolean()) {
- buf.freeze();
- if (random().nextBoolean()) {
- // Make sure double freeze doesn't break anything
- buf.freeze();
+ if (rarely()) {
+ final long expectedBytesUsed = RamUsageTester.sizeOf(buf);
+ final long computedBytesUsed = buf.ramBytesUsed();
+ assertEquals(expectedBytesUsed, computedBytesUsed);
}
}
assertEquals(arr.length, buf.size());
+ final PackedLongValues values = buf.build();
+ try {
+ buf.add(random().nextLong());
+ fail("expected an exception");
+ } catch (IllegalStateException e) {
+ // ok
+ }
+ assertEquals(arr.length, values.size());
for (int i = 0; i < arr.length; ++i) {
- assertEquals(arr[i], buf.get(i));
+ assertEquals(arr[i], values.get(i));
}
- final AbstractAppendingLongBuffer.Iterator it = buf.iterator();
+ final PackedLongValues.Iterator it = values.iterator();
for (int i = 0; i < arr.length; ++i) {
if (random().nextBoolean()) {
assertTrue(it.hasNext());
@@ -1032,28 +1056,8 @@ public class TestPackedInts extends Luce
}
assertFalse(it.hasNext());
-
- long[] target = new long[arr.length + 1024]; // check the request for more is OK.
- for (int i = 0; i < arr.length; i += TestUtil.nextInt(random(), 0, 10000)) {
- int lenToRead = random().nextInt(buf.pageSize() * 2) + 1;
- lenToRead = Math.min(lenToRead, target.length - i);
- int lenToCheck = Math.min(lenToRead, arr.length - i);
- int off = i;
- while (off < arr.length && lenToRead > 0) {
- int read = buf.get(off, target, off, lenToRead);
- assertTrue(read > 0);
- assertTrue(read <= lenToRead);
- lenToRead -= read;
- off += read;
- }
-
- for (int j = 0; j < lenToCheck; j++) {
- assertEquals(arr[j + i], target[j + i]);
- }
- }
-
- final long expectedBytesUsed = RamUsageTester.sizeOf(buf);
- final long computedBytesUsed = buf.ramBytesUsed();
+ final long expectedBytesUsed = RamUsageTester.sizeOf(values);
+ final long computedBytesUsed = values.ramBytesUsed();
assertEquals(expectedBytesUsed, computedBytesUsed);
}
}
Modified: lucene/dev/branches/branch_4x/lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java?rev=1607105&r1=1607104&r2=1607105&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java Tue Jul 1 14:59:52 2014
@@ -27,7 +27,8 @@ import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.util.TimSorter;
-import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
+import org.apache.lucene.util.packed.PackedInts;
+import org.apache.lucene.util.packed.PackedLongValues;
/**
* Sorts documents of a given index by returning a permutation on the document
@@ -163,21 +164,21 @@ final class Sorter {
// The reason why we use MonotonicAppendingLongBuffer here is that it
// wastes very little memory if the index is in random order but can save
// a lot of memory if the index is already "almost" sorted
- final MonotonicAppendingLongBuffer newToOld = new MonotonicAppendingLongBuffer();
+ final PackedLongValues.Builder newToOldBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
for (int i = 0; i < maxDoc; ++i) {
- newToOld.add(docs[i]);
+ newToOldBuilder.add(docs[i]);
}
- newToOld.freeze();
+ final PackedLongValues newToOld = newToOldBuilder.build();
for (int i = 0; i < maxDoc; ++i) {
docs[(int) newToOld.get(i)] = i;
} // docs is now the oldToNew mapping
- final MonotonicAppendingLongBuffer oldToNew = new MonotonicAppendingLongBuffer();
+ final PackedLongValues.Builder oldToNewBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
for (int i = 0; i < maxDoc; ++i) {
- oldToNew.add(docs[i]);
+ oldToNewBuilder.add(docs[i]);
}
- oldToNew.freeze();
+ final PackedLongValues oldToNew = oldToNewBuilder.build();
return new Sorter.DocMap() {
Modified: lucene/dev/branches/branch_4x/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingMergePolicy.java?rev=1607105&r1=1607104&r2=1607105&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingMergePolicy.java (original)
+++ lucene/dev/branches/branch_4x/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingMergePolicy.java Tue Jul 1 14:59:52 2014
@@ -38,7 +38,8 @@ import org.apache.lucene.index.SlowCompo
import org.apache.lucene.search.Sort;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
+import org.apache.lucene.util.packed.PackedInts;
+import org.apache.lucene.util.packed.PackedLongValues;
/** A {@link MergePolicy} that reorders documents according to a {@link Sort}
* before merging them. As a consequence, all segments resulting from a merge
@@ -96,8 +97,8 @@ public final class SortingMergePolicy ex
super.setInfo(info);
}
- private MonotonicAppendingLongBuffer getDeletes(List<AtomicReader> readers) {
- MonotonicAppendingLongBuffer deletes = new MonotonicAppendingLongBuffer();
+ private PackedLongValues getDeletes(List<AtomicReader> readers) {
+ PackedLongValues.Builder deletes = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
int deleteCount = 0;
for (AtomicReader reader : readers) {
final int maxDoc = reader.maxDoc();
@@ -110,8 +111,7 @@ public final class SortingMergePolicy ex
}
}
}
- deletes.freeze();
- return deletes;
+ return deletes.build();
}
@Override
@@ -123,7 +123,7 @@ public final class SortingMergePolicy ex
return super.getDocMap(mergeState);
}
assert mergeState.docMaps.length == 1; // we returned a singleton reader
- final MonotonicAppendingLongBuffer deletes = getDeletes(unsortedReaders);
+ final PackedLongValues deletes = getDeletes(unsortedReaders);
return new MergePolicy.DocMap() {
@Override
public int map(int old) {