You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2016/10/18 13:17:26 UTC
lucene-solr:branch_6x: LUCENE-7501: BKDReader should not store the
split dimension explicitly in the 1D case.
Repository: lucene-solr
Updated Branches:
refs/heads/branch_6x df09867cc -> e6d225603
LUCENE-7501: BKDReader should not store the split dimension explicitly in the 1D case.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/e6d22560
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/e6d22560
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/e6d22560
Branch: refs/heads/branch_6x
Commit: e6d225603aad24a7420f395ecd3dffc17fddf62c
Parents: df09867
Author: Adrien Grand <jp...@gmail.com>
Authored: Tue Oct 18 09:33:36 2016 +0200
Committer: Adrien Grand <jp...@gmail.com>
Committed: Tue Oct 18 15:06:07 2016 +0200
----------------------------------------------------------------------
lucene/CHANGES.txt | 3 ++
.../simpletext/SimpleTextPointsReader.java | 17 +++++++++--
.../org/apache/lucene/util/bkd/BKDReader.java | 32 +++++++++++---------
.../org/apache/lucene/util/bkd/BKDWriter.java | 14 ++++++---
4 files changed, 45 insertions(+), 21 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e6d22560/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index e277ddc..33adbb7 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -53,6 +53,9 @@ Improvements
Optimizations
+* LUCENE-7501: BKDReader should not store the split dimension explicitly in the
+ 1D case. (Adrien Grand)
+
Other
* LUCENE-7452: Block join query exception suggests how to find a doc, which
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e6d22560/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsReader.java
----------------------------------------------------------------------
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsReader.java
index 1477f17..2a01b52 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsReader.java
@@ -138,15 +138,26 @@ class SimpleTextPointsReader extends PointsReader {
readLine(dataIn);
count = parseInt(SPLIT_COUNT);
- byte[] splitPackedValues = new byte[count * (1 + bytesPerDim)];
+ byte[] splitPackedValues;
+ int bytesPerIndexEntry;
+ if (numDims == 1) {
+ bytesPerIndexEntry = bytesPerDim;
+ } else {
+ bytesPerIndexEntry = 1 + bytesPerDim;
+ }
+ splitPackedValues = new byte[count * bytesPerIndexEntry];
for(int i=0;i<count;i++) {
readLine(dataIn);
- splitPackedValues[(1 + bytesPerDim) * i] = (byte) parseInt(SPLIT_DIM);
+ int address = bytesPerIndexEntry * i;
+ int splitDim = parseInt(SPLIT_DIM);
+ if (numDims != 1) {
+ splitPackedValues[address++] = (byte) splitDim;
+ }
readLine(dataIn);
assert startsWith(SPLIT_VALUE);
BytesRef br = SimpleTextUtil.fromBytesRefString(stripPrefix(SPLIT_VALUE));
assert br.length == bytesPerDim;
- System.arraycopy(br.bytes, br.offset, splitPackedValues, (1 + bytesPerDim) * i + 1, bytesPerDim);
+ System.arraycopy(br.bytes, br.offset, splitPackedValues, address, bytesPerDim);
}
return new SimpleTextBKDReader(dataIn, numDims, maxPointsInLeafNode, bytesPerDim, leafBlockFPs, splitPackedValues, minValue.bytes, maxValue.bytes, pointCount, docCount);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e6d22560/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java
index 9ca0bb4..efb8ea6 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java
@@ -26,6 +26,7 @@ import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.StringHelper;
/** Handles intersection of an multi-dimensional shape in byte[] space with a block KD-tree previously written with {@link BKDWriter}.
@@ -39,6 +40,7 @@ public class BKDReader implements Accountable {
final private int leafNodeOffset;
final int numDims;
final int bytesPerDim;
+ final int bytesPerIndexEntry;
final IndexInput in;
final int maxPointsInLeafNode;
final byte[] minPackedValue;
@@ -54,6 +56,7 @@ public class BKDReader implements Accountable {
numDims = in.readVInt();
maxPointsInLeafNode = in.readVInt();
bytesPerDim = in.readVInt();
+ bytesPerIndexEntry = numDims == 1 && version >= BKDWriter.VERSION_IMPLICIT_SPLIT_DIM_1D ? bytesPerDim : bytesPerDim + 1;
packedBytesLength = numDims * bytesPerDim;
// Read index:
@@ -70,7 +73,7 @@ public class BKDReader implements Accountable {
pointCount = in.readVLong();
docCount = in.readVInt();
- splitPackedValues = new byte[(1+bytesPerDim)*numLeaves];
+ splitPackedValues = new byte[bytesPerIndexEntry*numLeaves];
// TODO: don't write split packed values[0]!
in.readBytes(splitPackedValues, 0, splitPackedValues.length);
@@ -135,6 +138,7 @@ public class BKDReader implements Accountable {
this.numDims = numDims;
this.maxPointsInLeafNode = maxPointsInLeafNode;
this.bytesPerDim = bytesPerDim;
+ bytesPerIndexEntry = numDims == 1 ? bytesPerDim : bytesPerDim + 1;
packedBytesLength = numDims * bytesPerDim;
this.leafNodeOffset = leafBlockFPs.length;
this.leafBlockFPs = leafBlockFPs;
@@ -234,22 +238,22 @@ public class BKDReader implements Accountable {
} else {
// Non-leaf node:
- int address = nodeID * (bytesPerDim+1);
- int splitDim = splitPackedValues[address] & 0xff;
+ int address = nodeID * bytesPerIndexEntry;
+ int splitDim = numDims == 1 ? 0 : splitPackedValues[address++] & 0xff;
assert splitDim < numDims;
byte[] splitPackedValue = new byte[packedBytesLength];
// Recurse on left sub-tree:
System.arraycopy(cellMaxPacked, 0, splitPackedValue, 0, packedBytesLength);
- System.arraycopy(splitPackedValues, address+1, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
+ System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
verify(state,
2*nodeID,
cellMinPacked, splitPackedValue);
// Recurse on right sub-tree:
System.arraycopy(cellMinPacked, 0, splitPackedValue, 0, packedBytesLength);
- System.arraycopy(splitPackedValues, address+1, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
+ System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
verify(state,
2*nodeID+1,
splitPackedValue, cellMaxPacked);
@@ -457,8 +461,8 @@ public class BKDReader implements Accountable {
// Non-leaf node: recurse on the split left and right nodes
// TODO: save the unused 1 byte prefix (it's always 0) in the 1d case here:
- int address = nodeID * (bytesPerDim+1);
- int splitDim = splitPackedValues[address] & 0xff;
+ int address = nodeID * bytesPerIndexEntry;
+ int splitDim = numDims == 1 ? 0 : splitPackedValues[address++] & 0xff;
assert splitDim < numDims;
// TODO: can we alloc & reuse this up front?
@@ -468,14 +472,14 @@ public class BKDReader implements Accountable {
// Recurse on left sub-tree:
System.arraycopy(cellMaxPacked, 0, splitPackedValue, 0, packedBytesLength);
- System.arraycopy(splitPackedValues, address+1, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
+ System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
intersect(state,
2*nodeID,
cellMinPacked, splitPackedValue);
// Recurse on right sub-tree:
System.arraycopy(cellMinPacked, 0, splitPackedValue, 0, packedBytesLength);
- System.arraycopy(splitPackedValues, address+1, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
+ System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
intersect(state,
2*nodeID+1,
splitPackedValue, cellMaxPacked);
@@ -484,16 +488,16 @@ public class BKDReader implements Accountable {
/** Copies the split value for this node into the provided byte array */
public void copySplitValue(int nodeID, byte[] splitPackedValue) {
- int address = nodeID * (bytesPerDim+1);
- int splitDim = splitPackedValues[address] & 0xff;
+ int address = nodeID * bytesPerIndexEntry;
+ int splitDim = numDims == 1 ? 0 : splitPackedValues[address++] & 0xff;
assert splitDim < numDims;
- System.arraycopy(splitPackedValues, address+1, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
+ System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
}
@Override
public long ramBytesUsed() {
- return splitPackedValues.length +
- leafBlockFPs.length * Long.BYTES;
+ return RamUsageEstimator.sizeOf(splitPackedValues) +
+ RamUsageEstimator.sizeOf(leafBlockFPs);
}
public byte[] getMinPackedValue() {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e6d22560/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
index 88a84e9..da73286 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
@@ -82,7 +82,8 @@ public class BKDWriter implements Closeable {
public static final int VERSION_START = 0;
public static final int VERSION_COMPRESSED_DOC_IDS = 1;
public static final int VERSION_COMPRESSED_VALUES = 2;
- public static final int VERSION_CURRENT = VERSION_COMPRESSED_VALUES;
+ public static final int VERSION_IMPLICIT_SPLIT_DIM_1D = 3;
+ public static final int VERSION_CURRENT = VERSION_IMPLICIT_SPLIT_DIM_1D;
/** How many bytes each docs takes in the fixed-width offline format */
private final int bytesPerDoc;
@@ -1033,10 +1034,15 @@ public class BKDWriter implements Closeable {
out.writeVLong(pointCount);
out.writeVInt(docsSeen.cardinality());
- // TODO: for 1D case, don't waste the first byte of each split value (it's always 0)
-
// NOTE: splitPackedValues[0] is unused, because nodeID is 1-based:
- out.writeBytes(splitPackedValues, 0, splitPackedValues.length);
+ if (numDims == 1) {
+ // write the index, skipping the byte used to store the split dim since it is always 0
+ for (int i = 1; i < splitPackedValues.length; i += 1 + bytesPerDim) {
+ out.writeBytes(splitPackedValues, i, bytesPerDim);
+ }
+ } else {
+ out.writeBytes(splitPackedValues, 0, splitPackedValues.length);
+ }
long lastFP = 0;
for (int i=0;i<leafBlockFPs.length;i++) {