You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2016/03/01 22:07:44 UTC

lucene-solr git commit: optimize BKDWriter's offline comparator a bit

Repository: lucene-solr
Updated Branches:
  refs/heads/master 0f4f53a8f -> 251cdbcee


optimize BKDWriter's offline comparator a bit


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/251cdbce
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/251cdbce
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/251cdbce

Branch: refs/heads/master
Commit: 251cdbcee367a938459ad4e95d5e7d444b03e1be
Parents: 0f4f53a
Author: Mike McCandless <mi...@apache.org>
Authored: Tue Mar 1 16:08:01 2016 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Tue Mar 1 16:08:01 2016 -0500

----------------------------------------------------------------------
 .../org/apache/lucene/util/StringHelper.java    |  1 +
 .../org/apache/lucene/util/bkd/BKDWriter.java   | 27 ++++++++++----------
 2 files changed, 15 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/251cdbce/lucene/core/src/java/org/apache/lucene/util/StringHelper.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/StringHelper.java b/lucene/core/src/java/org/apache/lucene/util/StringHelper.java
index c733e15..6d958ba 100644
--- a/lucene/core/src/java/org/apache/lucene/util/StringHelper.java
+++ b/lucene/core/src/java/org/apache/lucene/util/StringHelper.java
@@ -377,6 +377,7 @@ public abstract class StringHelper {
    *  big-endian unsigned values.  Returns positive int if a &gt; b,
    *  negative int if a &lt; b and 0 if a == b */
   public static int compare(int count, byte[] a, int aOffset, byte[] b, int bOffset) {
+    // TODO: dedup this w/ BytesRef.compareTo?
     for(int i=0;i<count;i++) {
       int cmp = (a[aOffset+i]&0xff) - (b[bOffset+i]&0xff);
       if (cmp != 0) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/251cdbce/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
index d7e1954..4397f2e 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
@@ -665,34 +665,35 @@ public class BKDWriter implements Closeable {
       // Offline sort:
       assert tempInput != null;
 
-      final ByteArrayDataInput reader = new ByteArrayDataInput();
       Comparator<BytesRef> cmp = new Comparator<BytesRef>() {
-        private final ByteArrayDataInput readerB = new ByteArrayDataInput();
+ 
+        final ByteArrayDataInput reader = new ByteArrayDataInput();
 
         @Override
         public int compare(BytesRef a, BytesRef b) {
-          reader.reset(a.bytes, a.offset, a.length);
-          reader.readBytes(scratch1, 0, scratch1.length);
-          final int docIDA = reader.readVInt();
-          final long ordA = reader.readVLong();
-
-          reader.reset(b.bytes, b.offset, b.length);
-          reader.readBytes(scratch2, 0, scratch2.length);
-          final int docIDB = reader.readVInt();
-          final long ordB = reader.readVLong();
 
-          int cmp = StringHelper.compare(bytesPerDim, scratch1, bytesPerDim*dim, scratch2, bytesPerDim*dim);
+          // First compare the bytes on the dimension we are sorting on:
+          int cmp = StringHelper.compare(bytesPerDim, a.bytes, a.offset + bytesPerDim*dim, b.bytes, b.offset + bytesPerDim*dim);
 
           if (cmp != 0) {
             return cmp;
           }
 
-          // Tie-break
+          // Tie-break by docID and then ord:
+          reader.reset(a.bytes, a.offset + packedBytesLength, a.length);
+          final int docIDA = reader.readVInt();
+          final long ordA = reader.readVLong();
+
+          reader.reset(b.bytes, b.offset + packedBytesLength, b.length);
+          final int docIDB = reader.readVInt();
+          final long ordB = reader.readVLong();
+
           cmp = Integer.compare(docIDA, docIDB);
           if (cmp != 0) {
             return cmp;
           }
 
+          // TODO: is this really necessary?  If OfflineSorter is stable, we can safely return 0 here, and avoid writing ords?
           return Long.compare(ordA, ordB);
         }
       };