You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by iv...@apache.org on 2019/06/26 08:16:17 UTC
[lucene-solr] branch master updated: LUCENE-8868: New storing
strategy for BKD tree leaves with low cardinality (#730)
This is an automated email from the ASF dual-hosted git repository.
ivera pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/master by this push:
new dac4310 LUCENE-8868: New storing strategy for BKD tree leaves with low cardinality (#730)
dac4310 is described below
commit dac4310129a4a10bbac81c64723ad5a6669d3c5e
Author: Ignacio Vera <iv...@apache.org>
AuthorDate: Wed Jun 26 10:16:12 2019 +0200
LUCENE-8868: New storing strategy for BKD tree leaves with low cardinality (#730)
When a leaf has only few distinct values, we store the distinct values with the cardinality.
---
lucene/CHANGES.txt | 4 +
.../java/org/apache/lucene/util/bkd/BKDReader.java | 90 +++++++++++--
.../java/org/apache/lucene/util/bkd/BKDWriter.java | 140 +++++++++++++++++----
.../apache/lucene/util/bkd/HeapPointWriter.java | 19 +++
.../test/org/apache/lucene/util/bkd/TestBKD.java | 23 ++++
5 files changed, 241 insertions(+), 35 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index ba1fb2a..a8734a3 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -128,6 +128,10 @@ Optimizations
Now caller threads execute at least one search on an index even if there is
an executor provided to minimize thread context switching. (Simon Willnauer)
+* LUCENE-8868: New storing strategy for BKD tree leaves with low cardinality.
+ It stores the distinct values once with the cardinality value reducing the
+ storage cost.
+
Test Framework
* LUCENE-8825: CheckHits now display the shard index in case of mismatch
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java
index c1f5a63..ffabd6a 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java
@@ -441,16 +441,22 @@ public final class BKDReader extends PointValues implements Accountable {
void visitDocValues(int[] commonPrefixLengths, byte[] scratchDataPackedValue, byte[] scratchMinIndexPackedValue, byte[] scratchMaxIndexPackedValue,
IndexInput in, int[] docIDs, int count, IntersectVisitor visitor) throws IOException {
+ if (version >= BKDWriter.VERSION_LOW_CARDINALITY_LEAVES) {
+ visitDocValuesWithCardinality(commonPrefixLengths, scratchDataPackedValue, scratchMinIndexPackedValue, scratchMaxIndexPackedValue, in, docIDs, count, visitor);
+ } else {
+ visitDocValuesNoCardinality(commonPrefixLengths, scratchDataPackedValue, scratchMinIndexPackedValue, scratchMaxIndexPackedValue, in, docIDs, count, visitor);
+ }
+ }
-
+ void visitDocValuesNoCardinality(int[] commonPrefixLengths, byte[] scratchDataPackedValue, byte[] scratchMinIndexPackedValue, byte[] scratchMaxIndexPackedValue,
+ IndexInput in, int[] docIDs, int count, IntersectVisitor visitor) throws IOException {
readCommonPrefixes(commonPrefixLengths, scratchDataPackedValue, in);
if (numIndexDims != 1 && version >= BKDWriter.VERSION_LEAF_STORES_BOUNDS) {
byte[] minPackedValue = scratchMinIndexPackedValue;
System.arraycopy(scratchDataPackedValue, 0, minPackedValue, 0, packedIndexBytesLength);
byte[] maxPackedValue = scratchMaxIndexPackedValue;
- //Copy common prefixes before reading adjusted
- // box
+ // Copy common prefixes before reading adjusted box
System.arraycopy(minPackedValue, 0, maxPackedValue, 0, packedIndexBytesLength);
readMinMax(commonPrefixLengths, minPackedValue, maxPackedValue, in);
@@ -480,12 +486,61 @@ public final class BKDReader extends PointValues implements Accountable {
int compressedDim = readCompressedDim(in);
if (compressedDim == -1) {
- visitRawDocValues(commonPrefixLengths, scratchDataPackedValue, in, docIDs, count, visitor);
+ visitUniqueRawDocValues(scratchDataPackedValue, docIDs, count, visitor);
} else {
visitCompressedDocValues(commonPrefixLengths, scratchDataPackedValue, in, docIDs, count, visitor, compressedDim);
}
}
+ void visitDocValuesWithCardinality(int[] commonPrefixLengths, byte[] scratchDataPackedValue, byte[] scratchMinIndexPackedValue, byte[] scratchMaxIndexPackedValue,
+ IndexInput in, int[] docIDs, int count, IntersectVisitor visitor) throws IOException {
+
+ readCommonPrefixes(commonPrefixLengths, scratchDataPackedValue, in);
+ int compressedDim = readCompressedDim(in);
+ if (compressedDim == -1) {
+ // all values are the same
+ visitor.grow(count);
+ visitUniqueRawDocValues(scratchDataPackedValue, docIDs, count, visitor);
+ } else {
+ if (numIndexDims != 1) {
+ byte[] minPackedValue = scratchMinIndexPackedValue;
+ System.arraycopy(scratchDataPackedValue, 0, minPackedValue, 0, packedIndexBytesLength);
+ byte[] maxPackedValue = scratchMaxIndexPackedValue;
+ // Copy common prefixes before reading adjusted box
+ System.arraycopy(minPackedValue, 0, maxPackedValue, 0, packedIndexBytesLength);
+ readMinMax(commonPrefixLengths, minPackedValue, maxPackedValue, in);
+
+ // The index gives us range of values for each dimension, but the actual range of values
+ // might be much more narrow than what the index told us, so we double check the relation
+ // here, which is cheap yet might help figure out that the block either entirely matches
+ // or does not match at all. This is especially more likely in the case that there are
+ // multiple dimensions that have correlation, ie. splitting on one dimension also
+ // significantly changes the range of values in another dimension.
+ Relation r = visitor.compare(minPackedValue, maxPackedValue);
+ if (r == Relation.CELL_OUTSIDE_QUERY) {
+ return;
+ }
+ visitor.grow(count);
+
+ if (r == Relation.CELL_INSIDE_QUERY) {
+ for (int i = 0; i < count; ++i) {
+ visitor.visit(docIDs[i]);
+ }
+ return;
+ }
+ } else {
+ visitor.grow(count);
+ }
+ if (compressedDim == -2) {
+ // low cardinality values
+ visitSparseRawDocValues(commonPrefixLengths, scratchDataPackedValue, in, docIDs, count, visitor);
+ } else {
+ // high cardinality
+ visitCompressedDocValues(commonPrefixLengths, scratchDataPackedValue, in, docIDs, count, visitor, compressedDim);
+ }
+ }
+ }
+
private void readMinMax(int[] commonPrefixLengths, byte[] minPackedValue, byte[] maxPackedValue, IndexInput in) throws IOException {
for (int dim = 0; dim < numIndexDims; dim++) {
int prefix = commonPrefixLengths[dim];
@@ -494,13 +549,28 @@ public final class BKDReader extends PointValues implements Accountable {
}
}
- // Just read suffixes for every dimension
- private void visitRawDocValues(int[] commonPrefixLengths, byte[] scratchPackedValue, IndexInput in, int[] docIDs, int count, IntersectVisitor visitor) throws IOException {
- for (int i = 0; i < count; ++i) {
- for(int dim=0;dim<numDataDims;dim++) {
+ // read cardinality and point
+ private void visitSparseRawDocValues(int[] commonPrefixLengths, byte[] scratchPackedValue, IndexInput in, int[] docIDs, int count, IntersectVisitor visitor) throws IOException {
+ int i;
+ for (i = 0; i < count;) {
+ int length = in.readVInt();
+ for(int dim = 0; dim < numDataDims; dim++) {
int prefix = commonPrefixLengths[dim];
in.readBytes(scratchPackedValue, dim*bytesPerDim + prefix, bytesPerDim - prefix);
}
+ for (int j = i; j < i + length; j++) {
+ visitor.visit(docIDs[j], scratchPackedValue);
+ }
+ i += length;
+ }
+ if (i != count) {
+ throw new CorruptIndexException("Sub blocks do not add up to the expected count: " + count + " != " + i, in);
+ }
+ }
+
+ // point is under commonPrefix
+ private void visitUniqueRawDocValues(byte[] scratchPackedValue, int[] docIDs, int count, IntersectVisitor visitor) throws IOException {
+ for (int i = 0; i < count; i++) {
visitor.visit(docIDs[i], scratchPackedValue);
}
}
@@ -515,7 +585,7 @@ public final class BKDReader extends PointValues implements Accountable {
scratchPackedValue[compressedByteOffset] = in.readByte();
final int runLen = Byte.toUnsignedInt(in.readByte());
for (int j = 0; j < runLen; ++j) {
- for(int dim=0;dim<numDataDims;dim++) {
+ for(int dim = 0; dim < numDataDims; dim++) {
int prefix = commonPrefixLengths[dim];
in.readBytes(scratchPackedValue, dim*bytesPerDim + prefix, bytesPerDim - prefix);
}
@@ -530,7 +600,7 @@ public final class BKDReader extends PointValues implements Accountable {
private int readCompressedDim(IndexInput in) throws IOException {
int compressedDim = in.readByte();
- if (compressedDim < -1 || compressedDim >= numDataDims) {
+ if (compressedDim < -2 || compressedDim >= numDataDims || (version < BKDWriter.VERSION_LOW_CARDINALITY_LEAVES && compressedDim == -2)) {
throw new CorruptIndexException("Got compressedDim="+compressedDim, in);
}
return compressedDim;
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
index 659935c..c65d56a 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
@@ -80,7 +80,8 @@ public class BKDWriter implements Closeable {
//public static final int VERSION_CURRENT = VERSION_START;
public static final int VERSION_LEAF_STORES_BOUNDS = 5;
public static final int VERSION_SELECTIVE_INDEXING = 6;
- public static final int VERSION_CURRENT = VERSION_SELECTIVE_INDEXING;
+ public static final int VERSION_LOW_CARDINALITY_LEAVES = 7;
+ public static final int VERSION_CURRENT = VERSION_LOW_CARDINALITY_LEAVES;
/** How many bytes each docs takes in the fixed-width offline format */
private final int bytesPerDoc;
@@ -516,6 +517,7 @@ public class BKDWriter implements Closeable {
final int[] leafDocs = new int[maxPointsInLeafNode];
private long valueCount;
private int leafCount;
+ private int leafCardinality;
OneDimensionBKDWriter(IndexOutput out) {
if (numIndexDims != 1) {
@@ -546,6 +548,9 @@ public class BKDWriter implements Closeable {
assert valueInOrder(valueCount + leafCount,
0, lastPackedValue, packedValue, 0, docID, lastDocID);
+ if (leafCount == 0 || Arrays.mismatch(leafValues, (leafCount - 1) * bytesPerDim, leafCount * bytesPerDim, packedValue, 0, bytesPerDim) != -1) {
+ leafCardinality++;
+ }
System.arraycopy(packedValue, 0, leafValues, leafCount * packedBytesLength, packedBytesLength);
leafDocs[leafCount] = docID;
docsSeen.set(docID);
@@ -558,7 +563,8 @@ public class BKDWriter implements Closeable {
if (leafCount == maxPointsInLeafNode) {
// We write a block once we hit exactly the max count ... this is different from
// when we write N > 1 dimensional points where we write between max/2 and max per leaf block
- writeLeafBlock();
+ writeLeafBlock(leafCardinality);
+ leafCardinality = 0;
leafCount = 0;
}
@@ -567,7 +573,8 @@ public class BKDWriter implements Closeable {
public long finish() throws IOException {
if (leafCount > 0) {
- writeLeafBlock();
+ writeLeafBlock(leafCardinality);
+ leafCardinality = 0;
leafCount = 0;
}
@@ -593,7 +600,7 @@ public class BKDWriter implements Closeable {
return indexFP;
}
- private void writeLeafBlock() throws IOException {
+ private void writeLeafBlock(int leafCardinality) throws IOException {
assert leafCount != 0;
if (valueCount == 0) {
System.arraycopy(leafValues, 0, minPackedValue, 0, packedIndexBytesLength);
@@ -613,7 +620,7 @@ public class BKDWriter implements Closeable {
int offset = (leafCount - 1) * packedBytesLength;
int prefix = Arrays.mismatch(leafValues, 0, bytesPerDim, leafValues, offset, offset + bytesPerDim);
if (prefix == -1) {
- prefix = bytesPerDim;
+ prefix = bytesPerDim;
}
commonPrefixLengths[0] = prefix;
@@ -635,7 +642,7 @@ public class BKDWriter implements Closeable {
assert valuesInOrderAndBounds(leafCount, 0, ArrayUtil.copyOfSubArray(leafValues, 0, packedBytesLength),
ArrayUtil.copyOfSubArray(leafValues, (leafCount - 1) * packedBytesLength, leafCount * packedBytesLength),
packedValues, leafDocs, 0);
- writeLeafBlockPackedValues(scratchOut, commonPrefixLengths, leafCount, 0, packedValues);
+ writeLeafBlockPackedValues(scratchOut, commonPrefixLengths, leafCount, 0, packedValues, leafCardinality);
scratchOut.copyTo(out);
scratchOut.reset();
}
@@ -1028,34 +1035,96 @@ public class BKDWriter implements Closeable {
DocIdsWriter.writeDocIds(docIDs, start, count, out);
}
- private void writeLeafBlockPackedValues(DataOutput out, int[] commonPrefixLengths, int count, int sortedDim, IntFunction<BytesRef> packedValues) throws IOException {
+ private void writeLeafBlockPackedValues(DataOutput out, int[] commonPrefixLengths, int count, int sortedDim, IntFunction<BytesRef> packedValues, int leafCardinality) throws IOException {
int prefixLenSum = Arrays.stream(commonPrefixLengths).sum();
if (prefixLenSum == packedBytesLength) {
// all values in this block are equal
out.writeByte((byte) -1);
} else {
- if (numIndexDims != 1) {
- writeActualBounds(out, commonPrefixLengths, count, packedValues);
- }
assert commonPrefixLengths[sortedDim] < bytesPerDim;
- out.writeByte((byte) sortedDim);
+ // estimate if storing the values with cardinality is cheaper than storing all values.
int compressedByteOffset = sortedDim * bytesPerDim + commonPrefixLengths[sortedDim];
- commonPrefixLengths[sortedDim]++;
- for (int i = 0; i < count; ) {
- // do run-length compression on the byte at compressedByteOffset
- int runLen = runLen(packedValues, i, Math.min(i + 0xff, count), compressedByteOffset);
- assert runLen <= 0xff;
- BytesRef first = packedValues.apply(i);
- byte prefixByte = first.bytes[first.offset + compressedByteOffset];
- out.writeByte(prefixByte);
- out.writeByte((byte) runLen);
- writeLeafBlockPackedValuesRange(out, commonPrefixLengths, i, i + runLen, packedValues);
- i += runLen;
- assert i <= count;
+ int highCardinalityCost;
+ int lowCardinalityCost;
+ if (count == leafCardinality) {
+ // all values in this block are different
+ highCardinalityCost = 0;
+ lowCardinalityCost = 1;
+ } else {
+ // compute cost of runLen compression
+ int numRunLens = 0;
+ for (int i = 0; i < count; ) {
+ // do run-length compression on the byte at compressedByteOffset
+ int runLen = runLen(packedValues, i, Math.min(i + 0xff, count), compressedByteOffset);
+ assert runLen <= 0xff;
+ numRunLens++;
+ i += runLen;
+ }
+ // Add cost of runLen compression
+ highCardinalityCost = count * (packedBytesLength - prefixLenSum - 1) + 2 * numRunLens;
+ // +1 is the byte needed for storing the cardinality
+ lowCardinalityCost = leafCardinality * (packedBytesLength - prefixLenSum + 1);
+ }
+ if (lowCardinalityCost <= highCardinalityCost) {
+ out.writeByte((byte) -2);
+ writeLowCardinalityLeafBlockPackedValues(out, commonPrefixLengths, count, packedValues);
+ } else {
+ out.writeByte((byte) sortedDim);
+ writeHighCardinalityLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues, compressedByteOffset);
}
}
}
+ private void writeLowCardinalityLeafBlockPackedValues(DataOutput out, int[] commonPrefixLengths, int count, IntFunction<BytesRef> packedValues) throws IOException {
+ if (numIndexDims != 1) {
+ writeActualBounds(out, commonPrefixLengths, count, packedValues);
+ }
+ BytesRef value = packedValues.apply(0);
+ System.arraycopy(value.bytes, value.offset, scratch1, 0, packedBytesLength);
+ int cardinality = 1;
+ for (int i = 1; i < count; i++) {
+ value = packedValues.apply(i);
+ for(int dim = 0; dim < numDataDims; dim++) {
+ final int start = dim * bytesPerDim + commonPrefixLengths[dim];
+ final int end = dim * bytesPerDim + bytesPerDim;
+ if (Arrays.mismatch(value.bytes, value.offset + start, value.offset + end, scratch1, start, end) != -1) {
+ out.writeVInt(cardinality);
+ for (int j = 0; j < numDataDims; j++) {
+ out.writeBytes(scratch1, j * bytesPerDim + commonPrefixLengths[j], bytesPerDim - commonPrefixLengths[j]);
+ }
+ System.arraycopy(value.bytes, value.offset, scratch1, 0, packedBytesLength);
+ cardinality = 1;
+ break;
+ } else if (dim == numDataDims - 1){
+ cardinality++;
+ }
+ }
+ }
+ out.writeVInt(cardinality);
+ for (int i = 0; i < numDataDims; i++) {
+ out.writeBytes(scratch1, i * bytesPerDim + commonPrefixLengths[i], bytesPerDim - commonPrefixLengths[i]);
+ }
+ }
+
+ private void writeHighCardinalityLeafBlockPackedValues(DataOutput out, int[] commonPrefixLengths, int count, int sortedDim, IntFunction<BytesRef> packedValues, int compressedByteOffset) throws IOException {
+ if (numIndexDims != 1) {
+ writeActualBounds(out, commonPrefixLengths, count, packedValues);
+ }
+ commonPrefixLengths[sortedDim]++;
+ for (int i = 0; i < count; ) {
+ // do run-length compression on the byte at compressedByteOffset
+ int runLen = runLen(packedValues, i, Math.min(i + 0xff, count), compressedByteOffset);
+ assert runLen <= 0xff;
+ BytesRef first = packedValues.apply(i);
+ byte prefixByte = first.bytes[first.offset + compressedByteOffset];
+ out.writeByte(prefixByte);
+ out.writeByte((byte) runLen);
+ writeLeafBlockPackedValuesRange(out, commonPrefixLengths, i, i + runLen, packedValues);
+ i += runLen;
+ assert i <= count;
+ }
+ }
+
private void writeActualBounds(DataOutput out, int[] commonPrefixLengths, int count, IntFunction<BytesRef> packedValues) throws IOException {
for (int dim = 0; dim < numIndexDims; ++dim) {
int commonPrefixLength = commonPrefixLengths[dim];
@@ -1295,6 +1364,25 @@ public class BKDWriter implements Closeable {
MutablePointsReaderUtils.sortByDim(sortedDim, bytesPerDim, commonPrefixLengths,
reader, from, to, scratchBytesRef1, scratchBytesRef2);
+ BytesRef comparator = scratchBytesRef1;
+ BytesRef collector = scratchBytesRef2;
+ reader.getValue(from, comparator);
+ int leafCardinality = 1;
+ for (int i = from + 1; i < to; ++i) {
+ reader.getValue(i, collector);
+ for (int dim =0; dim < numDataDims; dim++) {
+ final int start = dim * bytesPerDim + commonPrefixLengths[dim];
+ final int end = dim * bytesPerDim + bytesPerDim;
+ if (Arrays.mismatch(collector.bytes, collector.offset + start, collector.offset + end,
+ comparator.bytes, comparator.offset + start, comparator.offset + end) != -1) {
+ leafCardinality++;
+ BytesRef scratch = collector;
+ collector = comparator;
+ comparator = scratch;
+ break;
+ }
+ }
+ }
// Save the block file pointer:
leafBlockFPs[nodeID - leafNodeOffset] = out.getFilePointer();
@@ -1323,7 +1411,7 @@ public class BKDWriter implements Closeable {
};
assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues,
docIDs, 0);
- writeLeafBlockPackedValues(scratchOut, commonPrefixLengths, count, sortedDim, packedValues);
+ writeLeafBlockPackedValues(scratchOut, commonPrefixLengths, count, sortedDim, packedValues, leafCardinality);
scratchOut.copyTo(out);
scratchOut.reset();
} else {
@@ -1426,6 +1514,8 @@ public class BKDWriter implements Closeable {
// sort the chosen dimension
radixSelector.heapRadixSort(heapSource, from, to, sortedDim, commonPrefixLengths[sortedDim]);
+ // compute cardinality
+ int leafCardinality = heapSource.computeCardinality(from ,to, numDataDims, bytesPerDim, commonPrefixLengths);
// Save the block file pointer:
leafBlockFPs[nodeID - leafNodeOffset] = out.getFilePointer();
@@ -1459,7 +1549,7 @@ public class BKDWriter implements Closeable {
};
assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues,
heapSource.docIDs, from);
- writeLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues);
+ writeLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues, leafCardinality);
} else {
// Inner node: partition/recurse
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointWriter.java
index db30548..306ec5c 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointWriter.java
@@ -16,6 +16,8 @@
*/
package org.apache.lucene.util.bkd;
+import java.util.Arrays;
+
import org.apache.lucene.util.BytesRef;
/**
@@ -93,6 +95,23 @@ public final class HeapPointWriter implements PointWriter {
System.arraycopy(scratch, 0, block, indexJ, packedBytesLength);
}
+ public int computeCardinality(int from, int to, int numDataDims, int bytesPerDim, int[] commonPrefixLengths) {
+ assert packedBytesLength == numDataDims * bytesPerDim;
+ int leafCardinality = 1;
+ for (int i = from + 1; i < to; i++) {
+ for (int dim = 0; dim < numDataDims; dim++) {
+ final int start = dim * bytesPerDim + commonPrefixLengths[dim];
+ final int end = dim * bytesPerDim + bytesPerDim;
+ if (Arrays.mismatch(block, i * packedBytesLength + start, i * packedBytesLength + end,
+ block, (i - 1) * packedBytesLength + start, (i - 1) * packedBytesLength + end) != -1) {
+ leafCardinality++;
+ break;
+ }
+ }
+ }
+ return leafCardinality;
+ }
+
@Override
public long count() {
return nextWrite;
diff --git a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
index ce76ca4..15ba4f9 100644
--- a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
+++ b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
@@ -614,6 +614,29 @@ public class TestBKD extends LuceneTestCase {
verify(docValues, null, numDataDims, numIndexDims, numBytesPerDim);
}
+ // this should trigger low cardinality leaves
+ public void testRandomFewDifferentValues() throws Exception {
+ int numBytesPerDim = TestUtil.nextInt(random(), 2, 30);
+ int numIndexDims = TestUtil.nextInt(random(), 1, 8);
+ int numDataDims = TestUtil.nextInt(random(), numIndexDims, 8);
+
+ int numDocs = atLeast(10000);
+ int cardinality = TestUtil.nextInt(random(), 2, 100);
+ byte[][][] values = new byte[cardinality][numDataDims][numBytesPerDim];
+ for (int i = 0; i < cardinality; i++) {
+ for (int j = 0; j < numDataDims; j++) {
+ random().nextBytes(values[i][j]);
+ }
+ }
+
+ byte[][][] docValues = new byte[numDocs][][];
+ for(int docID = 0; docID < numDocs; docID++) {
+ docValues[docID] = values[random().nextInt(cardinality)];
+ }
+
+ verify(docValues, null, numDataDims, numIndexDims, numBytesPerDim);
+ }
+
public void testMultiValued() throws Exception {
int numBytesPerDim = TestUtil.nextInt(random(), 2, 30);
int numDataDims = TestUtil.nextInt(random(), 1, 5);