You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2016/10/04 17:06:48 UTC
lucene-solr:master: LUCENE-7474: Doc values writers should have a
sparse encoding.
Repository: lucene-solr
Updated Branches:
refs/heads/master 796ed508f -> d50cf9761
LUCENE-7474: Doc values writers should have a sparse encoding.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/d50cf976
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/d50cf976
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/d50cf976
Branch: refs/heads/master
Commit: d50cf97617c88ec75fd8f4482003623db08e625e
Parents: 796ed50
Author: Adrien Grand <jp...@gmail.com>
Authored: Tue Oct 4 18:59:26 2016 +0200
Committer: Adrien Grand <jp...@gmail.com>
Committed: Tue Oct 4 19:00:53 2016 +0200
----------------------------------------------------------------------
.../lucene/index/BinaryDocValuesWriter.java | 76 +++++++------------
.../lucene/index/NumericDocValuesWriter.java | 66 +++++-----------
.../lucene/index/SortedDocValuesWriter.java | 63 ++++++---------
.../index/SortedNumericDocValuesWriter.java | 80 +++++++++-----------
.../lucene/index/SortedSetDocValuesWriter.java | 74 +++++++-----------
5 files changed, 134 insertions(+), 225 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d50cf976/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java
index cbcfec5..ff2e67c 100644
--- a/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java
@@ -20,15 +20,16 @@ package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.PagedBytes;
-import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;
@@ -49,8 +50,9 @@ class BinaryDocValuesWriter extends DocValuesWriter {
private final PackedLongValues.Builder lengths;
private FixedBitSet docsWithField;
private final FieldInfo fieldInfo;
- private int addedValues;
private long bytesUsed;
+ private int lastDocID = -1;
+ private int maxLength = 0;
public BinaryDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
this.fieldInfo = fieldInfo;
@@ -59,12 +61,12 @@ class BinaryDocValuesWriter extends DocValuesWriter {
this.lengths = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
this.iwBytesUsed = iwBytesUsed;
this.docsWithField = new FixedBitSet(64);
- this.bytesUsed = docsWithFieldBytesUsed();
+ this.bytesUsed = lengths.ramBytesUsed() + docsWithField.ramBytesUsed();
iwBytesUsed.addAndGet(bytesUsed);
}
public void addValue(int docID, BytesRef value) {
- if (docID < addedValues) {
+ if (docID <= lastDocID) {
throw new IllegalArgumentException("DocValuesField \"" + fieldInfo.name + "\" appears more than once in this document (only one value is allowed per field)");
}
if (value == null) {
@@ -74,12 +76,7 @@ class BinaryDocValuesWriter extends DocValuesWriter {
throw new IllegalArgumentException("DocValuesField \"" + fieldInfo.name + "\" is too large, must be <= " + MAX_LENGTH);
}
- // Fill in any holes:
- while(addedValues < docID) {
- addedValues++;
- lengths.add(0);
- }
- addedValues++;
+ maxLength = Math.max(value.length, maxLength);
lengths.add(value.length);
try {
bytesOut.writeBytes(value.bytes, value.offset, value.length);
@@ -90,15 +87,12 @@ class BinaryDocValuesWriter extends DocValuesWriter {
docsWithField = FixedBitSet.ensureCapacity(docsWithField, docID);
docsWithField.set(docID);
updateBytesUsed();
- }
-
- private long docsWithFieldBytesUsed() {
- // size of the long[] + some overhead
- return RamUsageEstimator.sizeOf(docsWithField.getBits()) + 64;
+
+ lastDocID = docID;
}
private void updateBytesUsed() {
- final long newBytesUsed = lengths.ramBytesUsed() + bytes.ramBytesUsed() + docsWithFieldBytesUsed();
+ final long newBytesUsed = lengths.ramBytesUsed() + bytes.ramBytesUsed() + docsWithField.ramBytesUsed();
iwBytesUsed.addAndGet(newBytesUsed - bytesUsed);
bytesUsed = newBytesUsed;
}
@@ -109,7 +103,6 @@ class BinaryDocValuesWriter extends DocValuesWriter {
@Override
public void flush(SegmentWriteState state, DocValuesConsumer dvConsumer) throws IOException {
- final int maxDoc = state.segmentInfo.maxDoc();
bytes.freeze(false);
final PackedLongValues lengths = this.lengths.build();
dvConsumer.addBinaryField(fieldInfo,
@@ -119,53 +112,38 @@ class BinaryDocValuesWriter extends DocValuesWriter {
if (fieldInfoIn != fieldInfo) {
throw new IllegalArgumentException("wrong fieldInfo");
}
- return new BufferedBinaryDocValues(maxDoc, lengths);
+ return new BufferedBinaryDocValues(lengths, maxLength, bytes.getDataInput(), docsWithField);
}
});
}
// iterates over the values we have in ram
- private class BufferedBinaryDocValues extends BinaryDocValues {
- final BytesRefBuilder value = new BytesRefBuilder();
+ private static class BufferedBinaryDocValues extends BinaryDocValues {
+ final BytesRefBuilder value;
final PackedLongValues.Iterator lengthsIterator;
- final DataInput bytesIterator = bytes.getDataInput();
- final int size;
- final int maxDoc;
- private int docID = -1;
+ final DocIdSetIterator docsWithField;
+ final DataInput bytesIterator;
- BufferedBinaryDocValues(int maxDoc, PackedLongValues lengths) {
- this.maxDoc = maxDoc;
+ BufferedBinaryDocValues(PackedLongValues lengths, int maxLength, DataInput bytesIterator, FixedBitSet docsWithFields) {
+ this.value = new BytesRefBuilder();
+ this.value.grow(maxLength);
this.lengthsIterator = lengths.iterator();
- this.size = (int) lengths.size();
+ this.bytesIterator = bytesIterator;
+ this.docsWithField = new BitSetIterator(docsWithFields, lengths.size());
}
@Override
public int docID() {
- return docID;
+ return docsWithField.docID();
}
@Override
public int nextDoc() throws IOException {
- if (docID == size-1) {
- docID = NO_MORE_DOCS;
- } else {
- int next = docsWithField.nextSetBit(docID+1);
- if (next == NO_MORE_DOCS) {
- docID = NO_MORE_DOCS;
- } else {
-
- int length = 0;
-
- // skip missing values:
- while (docID < next) {
- docID++;
- length = (int) lengthsIterator.next();
- assert docID == next || length == 0;
- }
- value.grow(length);
- value.setLength(length);
- bytesIterator.readBytes(value.bytes(), 0, length);
- }
+ int docID = docsWithField.nextDoc();
+ if (docID != NO_MORE_DOCS) {
+ int length = Math.toIntExact(lengthsIterator.next());
+ value.setLength(length);
+ bytesIterator.readBytes(value.bytes(), 0, length);
}
return docID;
}
@@ -177,7 +155,7 @@ class BinaryDocValuesWriter extends DocValuesWriter {
@Override
public long cost() {
- return docsWithField.cardinality();
+ return docsWithField.cost();
}
@Override
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d50cf976/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
index 03fbe10..adfa706 100644
--- a/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
@@ -20,9 +20,10 @@ package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.FixedBitSet;
-import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;
@@ -30,47 +31,38 @@ import org.apache.lucene.util.packed.PackedLongValues;
* segment flushes. */
class NumericDocValuesWriter extends DocValuesWriter {
- private final static long MISSING = 0L;
-
private PackedLongValues.Builder pending;
private final Counter iwBytesUsed;
private long bytesUsed;
private FixedBitSet docsWithField;
private final FieldInfo fieldInfo;
+ private int lastDocID = -1;
public NumericDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
docsWithField = new FixedBitSet(64);
- bytesUsed = pending.ramBytesUsed() + docsWithFieldBytesUsed();
+ bytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed();
this.fieldInfo = fieldInfo;
this.iwBytesUsed = iwBytesUsed;
iwBytesUsed.addAndGet(bytesUsed);
}
public void addValue(int docID, long value) {
- if (docID < pending.size()) {
+ if (docID <= lastDocID) {
throw new IllegalArgumentException("DocValuesField \"" + fieldInfo.name + "\" appears more than once in this document (only one value is allowed per field)");
}
- // Fill in any holes:
- for (int i = (int)pending.size(); i < docID; ++i) {
- pending.add(MISSING);
- }
-
pending.add(value);
docsWithField = FixedBitSet.ensureCapacity(docsWithField, docID);
docsWithField.set(docID);
-
+
updateBytesUsed();
- }
-
- private long docsWithFieldBytesUsed() {
- // size of the long[] + some overhead
- return RamUsageEstimator.sizeOf(docsWithField.getBits()) + 64;
+
+ lastDocID = docID;
}
private void updateBytesUsed() {
- final long newBytesUsed = pending.ramBytesUsed() + docsWithFieldBytesUsed();
+ final long newBytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed();
iwBytesUsed.addAndGet(newBytesUsed - bytesUsed);
bytesUsed = newBytesUsed;
}
@@ -82,7 +74,6 @@ class NumericDocValuesWriter extends DocValuesWriter {
@Override
public void flush(SegmentWriteState state, DocValuesConsumer dvConsumer) throws IOException {
- final int maxDoc = state.segmentInfo.maxDoc();
final PackedLongValues values = pending.build();
dvConsumer.addNumericField(fieldInfo,
@@ -92,7 +83,7 @@ class NumericDocValuesWriter extends DocValuesWriter {
if (fieldInfo != NumericDocValuesWriter.this.fieldInfo) {
throw new IllegalArgumentException("wrong fieldInfo");
}
- return new BufferedNumericDocValues(maxDoc, values, docsWithField);
+ return new BufferedNumericDocValues(values, docsWithField);
}
});
}
@@ -100,43 +91,28 @@ class NumericDocValuesWriter extends DocValuesWriter {
// iterates over the values we have in ram
private static class BufferedNumericDocValues extends NumericDocValues {
final PackedLongValues.Iterator iter;
- final FixedBitSet docsWithField;
- final int size;
- final int maxDoc;
+ final DocIdSetIterator docsWithField;
private long value;
- private int docID = -1;
-
- BufferedNumericDocValues(int maxDoc, PackedLongValues values, FixedBitSet docsWithFields) {
- this.maxDoc = maxDoc;
+
+ BufferedNumericDocValues(PackedLongValues values, FixedBitSet docsWithFields) {
this.iter = values.iterator();
- this.size = (int) values.size();
- this.docsWithField = docsWithFields;
+ this.docsWithField = new BitSetIterator(docsWithFields, values.size());
}
@Override
public int docID() {
- return docID;
+ return docsWithField.docID();
}
@Override
- public int nextDoc() {
- if (docID == size-1) {
- docID = NO_MORE_DOCS;
- } else {
- int next = docsWithField.nextSetBit(docID+1);
- if (next == NO_MORE_DOCS) {
- docID = NO_MORE_DOCS;
- } else {
- // skip missing values:
- while (docID < next) {
- docID++;
- value = iter.next();
- }
- }
+ public int nextDoc() throws IOException {
+ int docID = docsWithField.nextDoc();
+ if (docID != NO_MORE_DOCS) {
+ value = iter.next();
}
return docID;
}
-
+
@Override
public int advance(int target) {
throw new UnsupportedOperationException();
@@ -144,7 +120,7 @@ class NumericDocValuesWriter extends DocValuesWriter {
@Override
public long cost() {
- return docsWithField.cardinality();
+ return docsWithField.cost();
}
@Override
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d50cf976/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java
index f199fcc..885ee89 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java
@@ -21,11 +21,14 @@ import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE;
import java.io.IOException;
import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.Counter;
+import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;
@@ -34,12 +37,11 @@ import org.apache.lucene.util.packed.PackedLongValues;
class SortedDocValuesWriter extends DocValuesWriter {
final BytesRefHash hash;
private PackedLongValues.Builder pending;
+ private FixedBitSet docsWithField;
private final Counter iwBytesUsed;
private long bytesUsed; // this currently only tracks differences in 'pending'
private final FieldInfo fieldInfo;
-
- private static final int EMPTY_ORD = -1;
- private int nonEmptyCount;
+ private int lastDocID = -1;
public SortedDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
this.fieldInfo = fieldInfo;
@@ -50,12 +52,13 @@ class SortedDocValuesWriter extends DocValuesWriter {
BytesRefHash.DEFAULT_CAPACITY,
new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
- bytesUsed = pending.ramBytesUsed();
+ docsWithField = new FixedBitSet(64);
+ bytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed();
iwBytesUsed.addAndGet(bytesUsed);
}
public void addValue(int docID, BytesRef value) {
- if (docID < pending.size()) {
+ if (docID <= lastDocID) {
throw new IllegalArgumentException("DocValuesField \"" + fieldInfo.name + "\" appears more than once in this document (only one value is allowed per field)");
}
if (value == null) {
@@ -65,20 +68,15 @@ class SortedDocValuesWriter extends DocValuesWriter {
throw new IllegalArgumentException("DocValuesField \"" + fieldInfo.name + "\" is too large, must be <= " + (BYTE_BLOCK_SIZE - 2));
}
- // Fill in any holes:
- while(pending.size() < docID) {
- pending.add(EMPTY_ORD);
- }
-
addOneValue(value);
- nonEmptyCount++;
+ docsWithField = FixedBitSet.ensureCapacity(docsWithField, docID);
+ docsWithField.set(docID);
+
+ lastDocID = docID;
}
@Override
public void finish(int maxDoc) {
- while(pending.size() < maxDoc) {
- pending.add(EMPTY_ORD);
- }
updateBytesUsed();
}
@@ -99,16 +97,13 @@ class SortedDocValuesWriter extends DocValuesWriter {
}
private void updateBytesUsed() {
- final long newBytesUsed = pending.ramBytesUsed();
+ final long newBytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed();
iwBytesUsed.addAndGet(newBytesUsed - bytesUsed);
bytesUsed = newBytesUsed;
}
@Override
public void flush(SegmentWriteState state, DocValuesConsumer dvConsumer) throws IOException {
- final int maxDoc = state.segmentInfo.maxDoc();
-
- assert pending.size() == maxDoc;
final int valueCount = hash.size();
final PackedLongValues ords = pending.build();
@@ -126,51 +121,41 @@ class SortedDocValuesWriter extends DocValuesWriter {
if (fieldInfoIn != fieldInfo) {
throw new IllegalArgumentException("wrong fieldInfo");
}
- return new BufferedSortedDocValues(hash, valueCount, maxDoc, ords, sortedValues, ordMap, nonEmptyCount);
+ return new BufferedSortedDocValues(hash, valueCount, ords, sortedValues, ordMap, docsWithField);
}
});
}
private static class BufferedSortedDocValues extends SortedDocValues {
final BytesRefHash hash;
- final int maxDoc;
final BytesRef scratch = new BytesRef();
final int[] sortedValues;
final int[] ordMap;
- final int cost;
final int valueCount;
- private int docID = -1;
private int ord;
final PackedLongValues.Iterator iter;
+ final DocIdSetIterator docsWithField;
- public BufferedSortedDocValues(BytesRefHash hash, int valueCount, int maxDoc, PackedLongValues docToOrd, int[] sortedValues, int[] ordMap, int cost) {
+ public BufferedSortedDocValues(BytesRefHash hash, int valueCount, PackedLongValues docToOrd, int[] sortedValues, int[] ordMap, FixedBitSet docsWithField) {
this.hash = hash;
this.valueCount = valueCount;
- this.maxDoc = maxDoc;
this.sortedValues = sortedValues;
this.iter = docToOrd.iterator();
this.ordMap = ordMap;
- this.cost = cost;
+ this.docsWithField = new BitSetIterator(docsWithField, docToOrd.size());
}
@Override
public int docID() {
- return docID;
+ return docsWithField.docID();
}
@Override
- public int nextDoc() {
- while (true) {
- docID++;
- if (docID >= maxDoc) {
- docID = NO_MORE_DOCS;
- break;
- }
- ord = (int) iter.next();
- if (ord != -1) {
- ord = ordMap[ord];
- break;
- }
+ public int nextDoc() throws IOException {
+ int docID = docsWithField.nextDoc();
+ if (docID != NO_MORE_DOCS) {
+ ord = Math.toIntExact(iter.next());
+ ord = ordMap[ord];
}
return docID;
}
@@ -182,7 +167,7 @@ class SortedDocValuesWriter extends DocValuesWriter {
@Override
public long cost() {
- return cost;
+ return docsWithField.cost();
}
@Override
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d50cf976/lucene/core/src/java/org/apache/lucene/index/SortedNumericDocValuesWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedNumericDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/SortedNumericDocValuesWriter.java
index 7db5d98..e154547 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortedNumericDocValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedNumericDocValuesWriter.java
@@ -21,8 +21,11 @@ import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.Counter;
+import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;
@@ -31,10 +34,11 @@ import org.apache.lucene.util.packed.PackedLongValues;
class SortedNumericDocValuesWriter extends DocValuesWriter {
private PackedLongValues.Builder pending; // stream of all values
private PackedLongValues.Builder pendingCounts; // count of values per doc
+ private FixedBitSet docsWithField;
private final Counter iwBytesUsed;
private long bytesUsed; // this only tracks differences in 'pending' and 'pendingCounts'
private final FieldInfo fieldInfo;
- private int currentDoc;
+ private int currentDoc = -1;
private long currentValues[] = new long[8];
private int currentUpto = 0;
@@ -43,19 +47,16 @@ class SortedNumericDocValuesWriter extends DocValuesWriter {
this.iwBytesUsed = iwBytesUsed;
pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
pendingCounts = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
- bytesUsed = pending.ramBytesUsed() + pendingCounts.ramBytesUsed();
+ docsWithField = new FixedBitSet(64);
+ bytesUsed = pending.ramBytesUsed() + pendingCounts.ramBytesUsed() + docsWithField.ramBytesUsed() + RamUsageEstimator.sizeOf(currentValues);
iwBytesUsed.addAndGet(bytesUsed);
}
- public void addValue(int docID, long value) {
+ public void addValue(int docID, long value) {
+ assert docID >= currentDoc;
if (docID != currentDoc) {
finishCurrentDoc();
- }
-
- // Fill in any holes:
- while(currentDoc < docID) {
- pendingCounts.add(0); // no values
- currentDoc++;
+ currentDoc = docID;
}
addOneValue(value);
@@ -64,6 +65,9 @@ class SortedNumericDocValuesWriter extends DocValuesWriter {
// finalize currentDoc: this sorts the values in the current doc
private void finishCurrentDoc() {
+ if (currentDoc == -1) {
+ return;
+ }
Arrays.sort(currentValues, 0, currentUpto);
for (int i = 0; i < currentUpto; i++) {
pending.add(currentValues[i]);
@@ -71,17 +75,14 @@ class SortedNumericDocValuesWriter extends DocValuesWriter {
// record the number of values for this doc
pendingCounts.add(currentUpto);
currentUpto = 0;
- currentDoc++;
+
+ docsWithField = FixedBitSet.ensureCapacity(docsWithField, currentDoc);
+ docsWithField.set(currentDoc);
}
@Override
public void finish(int maxDoc) {
finishCurrentDoc();
-
- // fill in any holes
- for (int i = currentDoc; i < maxDoc; i++) {
- pendingCounts.add(0); // no values
- }
}
private void addOneValue(long value) {
@@ -94,15 +95,13 @@ class SortedNumericDocValuesWriter extends DocValuesWriter {
}
private void updateBytesUsed() {
- final long newBytesUsed = pending.ramBytesUsed() + pendingCounts.ramBytesUsed() + RamUsageEstimator.sizeOf(currentValues);
+ final long newBytesUsed = pending.ramBytesUsed() + pendingCounts.ramBytesUsed() + docsWithField.ramBytesUsed() + RamUsageEstimator.sizeOf(currentValues);
iwBytesUsed.addAndGet(newBytesUsed - bytesUsed);
bytesUsed = newBytesUsed;
}
@Override
public void flush(SegmentWriteState state, DocValuesConsumer dvConsumer) throws IOException {
- final int maxDoc = state.segmentInfo.maxDoc();
- assert pendingCounts.size() == maxDoc;
final PackedLongValues values = pending.build();
final PackedLongValues valueCounts = pendingCounts.build();
@@ -113,7 +112,7 @@ class SortedNumericDocValuesWriter extends DocValuesWriter {
if (fieldInfoIn != fieldInfo) {
throw new IllegalArgumentException("wrong fieldInfo");
}
- return new BufferedSortedNumericDocValues(values, valueCounts);
+ return new BufferedSortedNumericDocValues(values, valueCounts, docsWithField);
}
});
}
@@ -121,45 +120,31 @@ class SortedNumericDocValuesWriter extends DocValuesWriter {
private static class BufferedSortedNumericDocValues extends SortedNumericDocValues {
final PackedLongValues.Iterator valuesIter;
final PackedLongValues.Iterator valueCountsIter;
- final int maxDoc;
- final long cost;
- private int docID = -1;
+ final DocIdSetIterator docsWithField;
private int valueCount;
private int valueUpto;
- public BufferedSortedNumericDocValues(PackedLongValues values, PackedLongValues valueCounts) {
+ public BufferedSortedNumericDocValues(PackedLongValues values, PackedLongValues valueCounts, FixedBitSet docsWithField) {
valuesIter = values.iterator();
valueCountsIter = valueCounts.iterator();
- maxDoc = Math.toIntExact(valueCounts.size());
- cost = values.size();
+ this.docsWithField = new BitSetIterator(docsWithField, values.size());
}
@Override
public int docID() {
- return docID;
+ return docsWithField.docID();
}
@Override
- public int nextDoc() {
-
- // consume any un-consumed values from current doc
- while(valueUpto < valueCount) {
+ public int nextDoc() throws IOException {
+ for (int i = valueUpto; i < valueCount; ++i) {
valuesIter.next();
- valueUpto++;
}
-
- while (true) {
- docID++;
- if (docID == maxDoc) {
- docID = NO_MORE_DOCS;
- break;
- } else {
- valueCount = Math.toIntExact(valueCountsIter.next());
- if (valueCount > 0) {
- valueUpto = 0;
- break;
- }
- }
+
+ int docID = docsWithField.nextDoc();
+ if (docID != NO_MORE_DOCS) {
+ valueCount = Math.toIntExact(valueCountsIter.next());
+ valueUpto = 0;
}
return docID;
}
@@ -176,13 +161,16 @@ class SortedNumericDocValuesWriter extends DocValuesWriter {
@Override
public long nextValue() {
+ if (valueUpto == valueCount) {
+ throw new IllegalStateException();
+ }
valueUpto++;
return valuesIter.next();
}
-
+
@Override
public long cost() {
- return cost;
+ return docsWithField.cost();
}
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d50cf976/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java
index b474d86..e7d915f 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java
@@ -22,12 +22,15 @@ import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.Counter;
+import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;
@@ -37,10 +40,11 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
final BytesRefHash hash;
private PackedLongValues.Builder pending; // stream of all termIDs
private PackedLongValues.Builder pendingCounts; // termIDs per doc
+ private FixedBitSet docsWithField;
private final Counter iwBytesUsed;
private long bytesUsed; // this only tracks differences in 'pending' and 'pendingCounts'
private final FieldInfo fieldInfo;
- private int currentDoc;
+ private int currentDoc = -1;
private int currentValues[] = new int[8];
private int currentUpto;
private int maxCount;
@@ -55,26 +59,23 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
pending = PackedLongValues.packedBuilder(PackedInts.COMPACT);
pendingCounts = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
+ docsWithField = new FixedBitSet(64);
bytesUsed = pending.ramBytesUsed() + pendingCounts.ramBytesUsed();
iwBytesUsed.addAndGet(bytesUsed);
}
public void addValue(int docID, BytesRef value) {
+ assert docID >= currentDoc;
if (value == null) {
throw new IllegalArgumentException("field \"" + fieldInfo.name + "\": null value not allowed");
}
if (value.length > (BYTE_BLOCK_SIZE - 2)) {
throw new IllegalArgumentException("DocValuesField \"" + fieldInfo.name + "\" is too large, must be <= " + (BYTE_BLOCK_SIZE - 2));
}
-
+
if (docID != currentDoc) {
finishCurrentDoc();
- }
-
- // Fill in any holes:
- while(currentDoc < docID) {
- pendingCounts.add(0); // no values
- currentDoc++;
+ currentDoc = docID;
}
addOneValue(value);
@@ -83,6 +84,9 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
// finalize currentDoc: this deduplicates the current term ids
private void finishCurrentDoc() {
+ if (currentDoc == -1) {
+ return;
+ }
Arrays.sort(currentValues, 0, currentUpto);
int lastValue = -1;
int count = 0;
@@ -99,17 +103,13 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
pendingCounts.add(count);
maxCount = Math.max(maxCount, count);
currentUpto = 0;
- currentDoc++;
+ docsWithField = FixedBitSet.ensureCapacity(docsWithField, currentDoc);
+ docsWithField.set(currentDoc);
}
@Override
public void finish(int maxDoc) {
finishCurrentDoc();
-
- // fill in any holes
- for (int i = currentDoc; i < maxDoc; i++) {
- pendingCounts.add(0); // no values
- }
}
private void addOneValue(BytesRef value) {
@@ -141,9 +141,6 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
@Override
public void flush(SegmentWriteState state, DocValuesConsumer dvConsumer) throws IOException {
- final int maxDoc = state.segmentInfo.maxDoc();
- final int maxCountPerDoc = maxCount;
- assert pendingCounts.size() == maxDoc;
final int valueCount = hash.size();
final PackedLongValues ords = pending.build();
final PackedLongValues ordCounts = pendingCounts.build();
@@ -161,66 +158,51 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
if (fieldInfoIn != fieldInfo) {
throw new IllegalArgumentException("wrong fieldInfo");
}
- return new BufferedSortedSetDocValues(sortedValues, ordMap, hash, ords, ordCounts, maxCount);
+ return new BufferedSortedSetDocValues(sortedValues, ordMap, hash, ords, ordCounts, maxCount, docsWithField);
}
});
}
private static class BufferedSortedSetDocValues extends SortedSetDocValues {
- private int docID = -1;
final int[] sortedValues;
final int[] ordMap;
final BytesRefHash hash;
- final PackedLongValues ords;
- final PackedLongValues ordCounts;
final BytesRef scratch = new BytesRef();
final PackedLongValues.Iterator ordsIter;
final PackedLongValues.Iterator ordCountsIter;
+ final DocIdSetIterator docsWithField;
final int currentDoc[];
private int ordCount;
private int ordUpto;
- public BufferedSortedSetDocValues(int[] sortedValues, int[] ordMap, BytesRefHash hash, PackedLongValues ords, PackedLongValues ordCounts, int maxCount) {
+ public BufferedSortedSetDocValues(int[] sortedValues, int[] ordMap, BytesRefHash hash, PackedLongValues ords, PackedLongValues ordCounts, int maxCount, FixedBitSet docsWithField) {
this.currentDoc = new int[maxCount];
this.sortedValues = sortedValues;
this.ordMap = ordMap;
this.hash = hash;
- this.ords = ords;
- this.ordCounts = ordCounts;
this.ordsIter = ords.iterator();
this.ordCountsIter = ordCounts.iterator();
+ this.docsWithField = new BitSetIterator(docsWithField, ordCounts.size());
}
@Override
public int docID() {
- return docID;
+ return docsWithField.docID();
}
@Override
- public int nextDoc() {
- // consume any un-consumed ords from current doc
- while (ordUpto < ordCount) {
- ordsIter.next();
- ordUpto++;
- }
- while (true) {
- docID++;
- if (docID == ordCounts.size()) {
- docID = NO_MORE_DOCS;
- break;
- }
+ public int nextDoc() throws IOException {
+ int docID = docsWithField.nextDoc();
+ if (docID != NO_MORE_DOCS) {
ordCount = (int) ordCountsIter.next();
- if (ordCount > 0) {
- for(int i=0;i<ordCount;i++) {
- currentDoc[i] = ordMap[Math.toIntExact(ordsIter.next())];
- }
- Arrays.sort(currentDoc, 0, ordCount);
- ordUpto = 0;
- break;
+ assert ordCount > 0;
+ for (int i = 0; i < ordCount; i++) {
+ currentDoc[i] = ordMap[Math.toIntExact(ordsIter.next())];
}
+ Arrays.sort(currentDoc, 0, ordCount);
+ ordUpto = 0;
}
-
return docID;
}
@@ -235,7 +217,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
@Override
public long cost() {
- return ordCounts.size();
+ return docsWithField.cost();
}
@Override