You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ho...@apache.org on 2016/03/21 01:43:34 UTC

[12/50] lucene-solr:jira/SOLR-445: optimize offline -> offline partition

optimize offline -> offline partition


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/983908c8
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/983908c8
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/983908c8

Branch: refs/heads/jira/SOLR-445
Commit: 983908c80989d2af6868c8e1d99925a52d79a65e
Parents: d8eac8e
Author: Mike McCandless <mi...@apache.org>
Authored: Sun Mar 13 08:55:31 2016 -0400
Committer: Mike McCandless <mi...@apache.org>
Committed: Sun Mar 13 08:55:31 2016 -0400

----------------------------------------------------------------------
 .../org/apache/lucene/util/bkd/BKDWriter.java   | 19 +----
 .../apache/lucene/util/bkd/HeapPointReader.java |  4 +-
 .../lucene/util/bkd/OfflinePointReader.java     | 77 +++++++++++++++++++-
 .../lucene/util/bkd/OfflinePointWriter.java     |  2 +-
 .../org/apache/lucene/util/bkd/PointReader.java | 37 ++++++++--
 5 files changed, 111 insertions(+), 28 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/983908c8/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
index d4e30b7..c5cdc30 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
@@ -1176,24 +1176,7 @@ public class BKDWriter implements Closeable {
              PointWriter rightPointWriter = getPointWriter(source.count - leftCount, "right" + dim);
              PointReader reader = slices[dim].writer.getReader(slices[dim].start);) {
 
-          // Partition this source according to how the splitDim split the values:
-          long nextRightCount = 0;
-          for (long i=0;i<source.count;i++) {
-            boolean result = reader.next();
-            assert result;
-            byte[] packedValue = reader.packedValue();
-            long ord = reader.ord();
-            int docID = reader.docID();
-            if (ordBitSet.get(ord)) {
-              rightPointWriter.append(packedValue, ord, docID);
-              nextRightCount++;
-              if (dim == dimToClear) {
-                ordBitSet.clear(ord);
-              }
-            } else {
-              leftPointWriter.append(packedValue, ord, docID);
-            }
-          }
+          long nextRightCount = reader.split(source.count, ordBitSet, leftPointWriter, rightPointWriter, dim == dimToClear);
 
           leftSlices[dim] = new PathSlice(leftPointWriter, 0, leftCount);
           rightSlices[dim] = new PathSlice(rightPointWriter, 0, rightCount);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/983908c8/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointReader.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointReader.java
index 63c7869..cd9152e 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointReader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointReader.java
@@ -18,9 +18,7 @@ package org.apache.lucene.util.bkd;
 
 import java.util.List;
 
-import org.apache.lucene.util.PagedBytes;
-
-final class HeapPointReader implements PointReader {
+final class HeapPointReader extends PointReader {
   private int curRead;
   final List<byte[]> blocks;
   final int valuesPerBlock;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/983908c8/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java
index 3c4b8b5..c8ab47e 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java
@@ -22,9 +22,11 @@ import java.io.IOException;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.LongBitSet;
 
 /** Reads points from disk in a fixed-with format, previously written with {@link OfflinePointWriter}. */
-final class OfflinePointReader implements PointReader {
+final class OfflinePointReader extends PointReader {
   long countLeft;
   private final IndexInput in;
   private final byte[] packedValue;
@@ -90,5 +92,78 @@ final class OfflinePointReader implements PointReader {
   public void close() throws IOException {
     in.close();
   }
+
+  @Override
+  public long split(long count, LongBitSet rightTree, PointWriter left, PointWriter right, boolean doClearBits) throws IOException {
+
+    if (left instanceof OfflinePointWriter == false ||
+        right instanceof OfflinePointWriter == false) {
+      return super.split(count, rightTree, left, right, doClearBits);
+    }
+
+    // We specialize the offline -> offline split since the default impl
+    // is somewhat wasteful otherwise (e.g. decoding docID when we don't
+    // need to)
+
+    int packedBytesLength = packedValue.length;
+
+    int bytesPerDoc = packedBytesLength + Integer.BYTES;
+    if (longOrds) {
+      bytesPerDoc += Long.BYTES;
+    } else {
+      bytesPerDoc += Integer.BYTES;
+    }
+
+    long rightCount = 0;
+
+    IndexOutput rightOut = ((OfflinePointWriter) right).out;
+    IndexOutput leftOut = ((OfflinePointWriter) left).out;
+
+    ((OfflinePointWriter) right).count = count;
+    ((OfflinePointWriter) left).count = count;
+
+    assert count <= countLeft: "count=" + count + " countLeft=" + countLeft;
+
+    countLeft -= count;
+
+    byte[] buffer = new byte[bytesPerDoc];
+    while (count > 0) {
+      in.readBytes(buffer, 0, buffer.length);
+      long ord;
+      if (longOrds) {
+        ord = readLong(buffer, packedBytesLength);
+      } else {
+        ord = readInt(buffer, packedBytesLength);
+      }
+      if (rightTree.get(ord)) {
+        rightOut.writeBytes(buffer, 0, bytesPerDoc);
+        if (doClearBits) {
+          rightTree.clear(ord);
+        }
+        rightCount++;
+      } else {
+        leftOut.writeBytes(buffer, 0, bytesPerDoc);
+      }
+
+      count--;
+    }
+
+    return rightCount;
+  }
+
+  // Poached from ByteArrayDataInput:
+  private static long readLong(byte[] bytes, int pos) {
+    final int i1 = ((bytes[pos++] & 0xff) << 24) | ((bytes[pos++] & 0xff) << 16) |
+      ((bytes[pos++] & 0xff) << 8) | (bytes[pos++] & 0xff);
+    final int i2 = ((bytes[pos++] & 0xff) << 24) | ((bytes[pos++] & 0xff) << 16) |
+      ((bytes[pos++] & 0xff) << 8) | (bytes[pos++] & 0xff);
+    return (((long)i1) << 32) | (i2 & 0xFFFFFFFFL);
+  }
+
+  // Poached from ByteArrayDataInput:
+  private static int readInt(byte[] bytes, int pos) {
+    return ((bytes[pos++] & 0xFF) << 24) | ((bytes[pos++] & 0xFF) << 16)
+      | ((bytes[pos++] & 0xFF) <<  8) |  (bytes[pos++] & 0xFF);
+  }
 }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/983908c8/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java
index 5aa11de..f958050 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java
@@ -28,7 +28,7 @@ final class OfflinePointWriter implements PointWriter {
   final Directory tempDir;
   final IndexOutput out;
   final int packedBytesLength;
-  private long count;
+  long count;
   private boolean closed;
   // true if ords are written as long (8 bytes), else 4 bytes
   private boolean longOrds;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/983908c8/lucene/core/src/java/org/apache/lucene/util/bkd/PointReader.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/PointReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/PointReader.java
index fe7a961..1919f58 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/PointReader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/PointReader.java
@@ -20,21 +20,48 @@ package org.apache.lucene.util.bkd;
 import java.io.Closeable;
 import java.io.IOException;
 
+import org.apache.lucene.util.LongBitSet;
+
 /** One pass iterator through all points previously written with a
  *  {@link PointWriter}, abstracting away whether points a read
  *  from (offline) disk or simple arrays in heap. */
-interface PointReader extends Closeable {
+abstract class PointReader implements Closeable {
 
   /** Returns false once iteration is done, else true. */
-  boolean next() throws IOException;
+  abstract boolean next() throws IOException;
 
   /** Returns the packed byte[] value */
-  byte[] packedValue();
+  abstract byte[] packedValue();
 
   /** Point ordinal */
-  long ord();
+  abstract long ord();
 
   /** DocID for this point */
-  int docID();
+  abstract int docID();
+
+  /** Splits this reader into left and right partitions */
+  public long split(long count, LongBitSet rightTree, PointWriter left, PointWriter right, boolean doClearBits) throws IOException {
+
+    // Partition this source according to how the splitDim split the values:
+    long rightCount = 0;
+    for (long i=0;i<count;i++) {
+      boolean result = next();
+      assert result;
+      byte[] packedValue = packedValue();
+      long ord = ord();
+      int docID = docID();
+      if (rightTree.get(ord)) {
+        right.append(packedValue, ord, docID);
+        rightCount++;
+        if (doClearBits) {
+          rightTree.clear(ord);
+        }
+      } else {
+        left.append(packedValue, ord, docID);
+      }
+    }
+
+    return rightCount;
+  }
 }