You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2016/03/25 12:43:15 UTC

lucene-solr:master: BKDWriter optimization: avoid copying unnecessary bytes when marking the right tree ords

Repository: lucene-solr
Updated Branches:
  refs/heads/master fc7f55913 -> 5d16e7581


BKDWriter optimization: avoid copying unnecessary bytes when marking the right tree ords


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/5d16e758
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/5d16e758
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/5d16e758

Branch: refs/heads/master
Commit: 5d16e758193feec58b898c437dc05d81198d69f5
Parents: fc7f559
Author: Mike McCandless <mi...@apache.org>
Authored: Fri Mar 25 07:44:40 2016 -0400
Committer: Mike McCandless <mi...@apache.org>
Committed: Fri Mar 25 07:44:40 2016 -0400

----------------------------------------------------------------------
 .../org/apache/lucene/util/bkd/BKDWriter.java   | 14 ++---------
 .../lucene/util/bkd/OfflinePointReader.java     | 25 ++++++++++++++++++++
 .../org/apache/lucene/util/bkd/PointReader.java | 12 ++++++++++
 3 files changed, 39 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5d16e758/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
index ffba762..dd2ec5d 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
@@ -1004,22 +1004,12 @@ public class BKDWriter implements Closeable {
     try (PointReader reader = source.writer.getReader(source.start + source.count - rightCount, rightCount)) {
       boolean result = reader.next();
       assert result;
-
       System.arraycopy(reader.packedValue(), splitDim*bytesPerDim, scratch1, 0, bytesPerDim);
       if (numDims > 1) {
-
         assert ordBitSet.get(reader.ord()) == false;
         ordBitSet.set(reader.ord());
-
-        // Start at 1 because we already did the first value above (so we could keep the split value):
-        for(int i=1;i<rightCount;i++) {
-          result = reader.next();
-          if (result == false) {
-            throw new IllegalStateException("did not see enough points from reader=" + reader);
-          }
-          assert ordBitSet.get(reader.ord()) == false: "ord=" + reader.ord() + " was seen twice from " + source.writer;
-          ordBitSet.set(reader.ord());
-        }
+        // Subtract 1 from rightCount because we already did the first value above (so we could record the split value):
+        reader.markOrds(rightCount-1, ordBitSet);
       }
     } catch (Throwable t) {
       verifyChecksum(t, source.writer);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5d16e758/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java
index 830e37b..20faa9e 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java
@@ -33,6 +33,7 @@ final class OfflinePointReader extends PointReader {
   final IndexInput in;
   private final byte[] packedValue;
   final boolean singleValuePerDoc;
+  final int bytesPerDoc;
   private long ord;
   private int docID;
   // true if ords are written as long (8 bytes), else 4 bytes
@@ -53,6 +54,7 @@ final class OfflinePointReader extends PointReader {
         bytesPerDoc += Integer.BYTES;
       }
     }
+    this.bytesPerDoc = bytesPerDoc;
 
     if ((start + length) * bytesPerDoc + CodecUtil.footerLength() > tempDir.fileLength(tempFileName)) {
       throw new IllegalArgumentException("requested slice is beyond the length of this file: start=" + start + " length=" + length + " bytesPerDoc=" + bytesPerDoc + " fileLength=" + tempDir.fileLength(tempFileName) + " tempFileName=" + tempFileName);
@@ -135,6 +137,29 @@ final class OfflinePointReader extends PointReader {
   }
 
   @Override
+  public void markOrds(long count, LongBitSet ordBitSet) throws IOException {
+    if (countLeft < count) {
+      throw new IllegalStateException("only " + countLeft + " points remain, but " + count + " were requested");
+    }
+    long fp = in.getFilePointer() + packedValue.length;
+    if (singleValuePerDoc == false) {
+      fp += Integer.BYTES;
+    }
+    for(long i=0;i<count;i++) {
+      in.seek(fp);
+      long ord;
+      if (longOrds) {
+        ord = in.readLong();
+      } else {
+        ord = in.readInt();
+      }
+      assert ordBitSet.get(ord) == false: "ord=" + ord + " i=" + i + " was seen twice from " + this;
+      ordBitSet.set(ord);
+      fp += bytesPerDoc;
+    }
+  }
+
+  @Override
   public long split(long count, LongBitSet rightTree, PointWriter left, PointWriter right, boolean doClearBits) throws IOException {
 
     if (left instanceof OfflinePointWriter == false ||

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5d16e758/lucene/core/src/java/org/apache/lucene/util/bkd/PointReader.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/PointReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/PointReader.java
index 1919f58..90de0d1 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/PointReader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/PointReader.java
@@ -39,6 +39,18 @@ abstract class PointReader implements Closeable {
   /** DocID for this point */
   abstract int docID();
 
+  /** Iterates through the next {@code count} ords, marking them in the provided {@code ordBitSet}. */
+  public void markOrds(long count, LongBitSet ordBitSet) throws IOException {
+    for(int i=0;i<count;i++) {
+      boolean result = next();
+      if (result == false) {
+        throw new IllegalStateException("did not see enough points from reader=" + this);
+      }
+      assert ordBitSet.get(ord()) == false: "ord=" + ord() + " was seen twice from " + this;
+      ordBitSet.set(ord());
+    }
+  }
+
   /** Splits this reader into left and right partitions */
   public long split(long count, LongBitSet rightTree, PointWriter left, PointWriter right, boolean doClearBits) throws IOException {