You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ho...@apache.org on 2016/03/21 01:43:34 UTC
[12/50] lucene-solr:jira/SOLR-445: optimize offline -> offline
partition
optimize offline -> offline partition
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/983908c8
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/983908c8
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/983908c8
Branch: refs/heads/jira/SOLR-445
Commit: 983908c80989d2af6868c8e1d99925a52d79a65e
Parents: d8eac8e
Author: Mike McCandless <mi...@apache.org>
Authored: Sun Mar 13 08:55:31 2016 -0400
Committer: Mike McCandless <mi...@apache.org>
Committed: Sun Mar 13 08:55:31 2016 -0400
----------------------------------------------------------------------
.../org/apache/lucene/util/bkd/BKDWriter.java | 19 +----
.../apache/lucene/util/bkd/HeapPointReader.java | 4 +-
.../lucene/util/bkd/OfflinePointReader.java | 77 +++++++++++++++++++-
.../lucene/util/bkd/OfflinePointWriter.java | 2 +-
.../org/apache/lucene/util/bkd/PointReader.java | 37 ++++++++--
5 files changed, 111 insertions(+), 28 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/983908c8/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
index d4e30b7..c5cdc30 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
@@ -1176,24 +1176,7 @@ public class BKDWriter implements Closeable {
PointWriter rightPointWriter = getPointWriter(source.count - leftCount, "right" + dim);
PointReader reader = slices[dim].writer.getReader(slices[dim].start);) {
- // Partition this source according to how the splitDim split the values:
- long nextRightCount = 0;
- for (long i=0;i<source.count;i++) {
- boolean result = reader.next();
- assert result;
- byte[] packedValue = reader.packedValue();
- long ord = reader.ord();
- int docID = reader.docID();
- if (ordBitSet.get(ord)) {
- rightPointWriter.append(packedValue, ord, docID);
- nextRightCount++;
- if (dim == dimToClear) {
- ordBitSet.clear(ord);
- }
- } else {
- leftPointWriter.append(packedValue, ord, docID);
- }
- }
+ long nextRightCount = reader.split(source.count, ordBitSet, leftPointWriter, rightPointWriter, dim == dimToClear);
leftSlices[dim] = new PathSlice(leftPointWriter, 0, leftCount);
rightSlices[dim] = new PathSlice(rightPointWriter, 0, rightCount);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/983908c8/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointReader.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointReader.java
index 63c7869..cd9152e 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointReader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointReader.java
@@ -18,9 +18,7 @@ package org.apache.lucene.util.bkd;
import java.util.List;
-import org.apache.lucene.util.PagedBytes;
-
-final class HeapPointReader implements PointReader {
+final class HeapPointReader extends PointReader {
private int curRead;
final List<byte[]> blocks;
final int valuesPerBlock;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/983908c8/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java
index 3c4b8b5..c8ab47e 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java
@@ -22,9 +22,11 @@ import java.io.IOException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.LongBitSet;
/** Reads points from disk in a fixed-with format, previously written with {@link OfflinePointWriter}. */
-final class OfflinePointReader implements PointReader {
+final class OfflinePointReader extends PointReader {
long countLeft;
private final IndexInput in;
private final byte[] packedValue;
@@ -90,5 +92,78 @@ final class OfflinePointReader implements PointReader {
public void close() throws IOException {
in.close();
}
+
+ @Override
+ public long split(long count, LongBitSet rightTree, PointWriter left, PointWriter right, boolean doClearBits) throws IOException {
+
+ if (left instanceof OfflinePointWriter == false ||
+ right instanceof OfflinePointWriter == false) {
+ return super.split(count, rightTree, left, right, doClearBits);
+ }
+
+ // We specialize the offline -> offline split since the default impl
+ // is somewhat wasteful otherwise (e.g. decoding docID when we don't
+ // need to)
+
+ int packedBytesLength = packedValue.length;
+
+ int bytesPerDoc = packedBytesLength + Integer.BYTES;
+ if (longOrds) {
+ bytesPerDoc += Long.BYTES;
+ } else {
+ bytesPerDoc += Integer.BYTES;
+ }
+
+ long rightCount = 0;
+
+ IndexOutput rightOut = ((OfflinePointWriter) right).out;
+ IndexOutput leftOut = ((OfflinePointWriter) left).out;
+
+ ((OfflinePointWriter) right).count = count;
+ ((OfflinePointWriter) left).count = count;
+
+ assert count <= countLeft: "count=" + count + " countLeft=" + countLeft;
+
+ countLeft -= count;
+
+ byte[] buffer = new byte[bytesPerDoc];
+ while (count > 0) {
+ in.readBytes(buffer, 0, buffer.length);
+ long ord;
+ if (longOrds) {
+ ord = readLong(buffer, packedBytesLength);
+ } else {
+ ord = readInt(buffer, packedBytesLength);
+ }
+ if (rightTree.get(ord)) {
+ rightOut.writeBytes(buffer, 0, bytesPerDoc);
+ if (doClearBits) {
+ rightTree.clear(ord);
+ }
+ rightCount++;
+ } else {
+ leftOut.writeBytes(buffer, 0, bytesPerDoc);
+ }
+
+ count--;
+ }
+
+ return rightCount;
+ }
+
+ // Poached from ByteArrayDataInput:
+ private static long readLong(byte[] bytes, int pos) {
+ final int i1 = ((bytes[pos++] & 0xff) << 24) | ((bytes[pos++] & 0xff) << 16) |
+ ((bytes[pos++] & 0xff) << 8) | (bytes[pos++] & 0xff);
+ final int i2 = ((bytes[pos++] & 0xff) << 24) | ((bytes[pos++] & 0xff) << 16) |
+ ((bytes[pos++] & 0xff) << 8) | (bytes[pos++] & 0xff);
+ return (((long)i1) << 32) | (i2 & 0xFFFFFFFFL);
+ }
+
+ // Poached from ByteArrayDataInput:
+ private static int readInt(byte[] bytes, int pos) {
+ return ((bytes[pos++] & 0xFF) << 24) | ((bytes[pos++] & 0xFF) << 16)
+ | ((bytes[pos++] & 0xFF) << 8) | (bytes[pos++] & 0xFF);
+ }
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/983908c8/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java
index 5aa11de..f958050 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java
@@ -28,7 +28,7 @@ final class OfflinePointWriter implements PointWriter {
final Directory tempDir;
final IndexOutput out;
final int packedBytesLength;
- private long count;
+ long count;
private boolean closed;
// true if ords are written as long (8 bytes), else 4 bytes
private boolean longOrds;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/983908c8/lucene/core/src/java/org/apache/lucene/util/bkd/PointReader.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/PointReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/PointReader.java
index fe7a961..1919f58 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/PointReader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/PointReader.java
@@ -20,21 +20,48 @@ package org.apache.lucene.util.bkd;
import java.io.Closeable;
import java.io.IOException;
+import org.apache.lucene.util.LongBitSet;
+
/** One pass iterator through all points previously written with a
* {@link PointWriter}, abstracting away whether points a read
* from (offline) disk or simple arrays in heap. */
-interface PointReader extends Closeable {
+abstract class PointReader implements Closeable {
/** Returns false once iteration is done, else true. */
- boolean next() throws IOException;
+ abstract boolean next() throws IOException;
/** Returns the packed byte[] value */
- byte[] packedValue();
+ abstract byte[] packedValue();
/** Point ordinal */
- long ord();
+ abstract long ord();
/** DocID for this point */
- int docID();
+ abstract int docID();
+
+ /** Splits this reader into left and right partitions */
+ public long split(long count, LongBitSet rightTree, PointWriter left, PointWriter right, boolean doClearBits) throws IOException {
+
+ // Partition this source according to how the splitDim split the values:
+ long rightCount = 0;
+ for (long i=0;i<count;i++) {
+ boolean result = next();
+ assert result;
+ byte[] packedValue = packedValue();
+ long ord = ord();
+ int docID = docID();
+ if (rightTree.get(ord)) {
+ right.append(packedValue, ord, docID);
+ rightCount++;
+ if (doClearBits) {
+ rightTree.clear(ord);
+ }
+ } else {
+ left.append(packedValue, ord, docID);
+ }
+ }
+
+ return rightCount;
+ }
}