You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2016/03/24 15:02:06 UTC

lucene-solr:branch_6x: simplify BKDWriter's comparator tie break for its temp files; add test case verifying tie break by docID

Repository: lucene-solr
Updated Branches:
  refs/heads/branch_6x d9de15867 -> 2ca08aa6a


simplify BKDWriter's comparator tie break for its temp files; add test case verifying tie break by docID


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/2ca08aa6
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/2ca08aa6
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/2ca08aa6

Branch: refs/heads/branch_6x
Commit: 2ca08aa6ad09dd652440c9aab1269e8652f75ae7
Parents: d9de158
Author: Mike McCandless <mi...@apache.org>
Authored: Thu Mar 24 10:02:48 2016 -0400
Committer: Mike McCandless <mi...@apache.org>
Committed: Thu Mar 24 10:03:27 2016 -0400

----------------------------------------------------------------------
 .../org/apache/lucene/util/bkd/BKDWriter.java   | 26 ++++---------
 .../lucene/util/bkd/OfflinePointReader.java     |  4 +-
 .../lucene/util/bkd/OfflinePointWriter.java     |  2 +-
 .../org/apache/lucene/util/bkd/TestBKD.java     | 40 +++++++++++++++++++-
 4 files changed, 49 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2ca08aa6/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
index 6dc413a..ffba762 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
@@ -716,32 +716,20 @@ public class BKDWriter implements Closeable {
 
         @Override
         public int compare(BytesRef a, BytesRef b) {
-
-          // First compare the bytes on the dimension we are sorting on:
+          // First compare by the requested dimension we are sorting by:
           int cmp = StringHelper.compare(bytesPerDim, a.bytes, a.offset + bytesPerDim*dim, b.bytes, b.offset + bytesPerDim*dim);
 
           if (cmp != 0) {
             return cmp;
           }
 
-          // Tie-break by docID:
-          int offset;
-          if (singleValuePerDoc) {
-            offset = 0;
-          } else if (longOrds) {
-            offset = Long.BYTES;
-          } else {
-            offset = Integer.BYTES;
-          }
-          reader.reset(a.bytes, a.offset + packedBytesLength + offset, a.length);
-          final int docIDA = reader.readInt();
-
-          reader.reset(b.bytes, b.offset + packedBytesLength + offset, b.length);
-          final int docIDB = reader.readInt();
+          // Tie-break by docID ... no need to tie break on ord, for the case where the same doc has
+          // the same value in a given dimension indexed more than once: it can't matter at search
+          // time since we don't write ords into the index:
 
-          // No need to tie break on ord, for the case where the same doc has the same value in a given dimension indexed more than once: it
-          // can't matter at search time since we don't write ords into the index:
-          return Integer.compare(docIDA, docIDB);
+          return StringHelper.compare(Integer.BYTES,
+                                      a.bytes, a.offset + packedBytesLength,
+                                      b.bytes, b.offset + packedBytesLength);
         }
       };
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2ca08aa6/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java
index ebb2fd1..f3bf2bc 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java
@@ -93,15 +93,15 @@ final class OfflinePointReader extends PointReader {
       assert countLeft == -1;
       return false;
     }
+    docID = in.readInt();
     if (singleValuePerDoc == false) {
       if (longOrds) {
         ord = in.readLong();
       } else {
         ord = in.readInt();
       }
-      docID = in.readInt();
     } else {
-      ord = docID = in.readInt();
+      ord = docID;
     }
     return true;
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2ca08aa6/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java
index 2261b81..2a5952e 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java
@@ -68,6 +68,7 @@ final class OfflinePointWriter implements PointWriter {
   public void append(byte[] packedValue, long ord, int docID) throws IOException {
     assert packedValue.length == packedBytesLength;
     out.writeBytes(packedValue, 0, packedValue.length);
+    out.writeInt(docID);
     if (singleValuePerDoc == false) {
       if (longOrds) {
         out.writeLong(ord);
@@ -76,7 +77,6 @@ final class OfflinePointWriter implements PointWriter {
         out.writeInt((int) ord);
       }
     }
-    out.writeInt(docID);
     count++;
     assert expectedCount == 0 || count <= expectedCount;
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2ca08aa6/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
index 1a6e653..1aaa689 100644
--- a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
+++ b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
@@ -45,7 +45,7 @@ public class TestBKD extends LuceneTestCase {
 
   public void testBasicInts1D() throws Exception {
     try (Directory dir = getDirectory(100)) {
-        BKDWriter w = new BKDWriter(100, dir, "tmp", 1, 4, 2, 1.0f, 100, true);
+      BKDWriter w = new BKDWriter(100, dir, "tmp", 1, 4, 2, 1.0f, 100, true);
       byte[] scratch = new byte[4];
       for(int docID=0;docID<100;docID++) {
         NumericUtils.intToSortableBytes(docID, scratch, 0);
@@ -889,4 +889,42 @@ public class TestBKD extends LuceneTestCase {
     }
     fail("did not see a supporessed CorruptIndexException");
   }
+
+  public void testTieBreakOrder() throws Exception {
+    try (Directory dir = newDirectory()) {
+      int numDocs = 10000;
+      BKDWriter w = new BKDWriter(numDocs+1, dir, "tmp", 1, 4, 2, 0.01f, numDocs, true);
+      for(int i=0;i<numDocs;i++) {
+        w.add(new byte[Integer.BYTES], i);
+      }
+
+      IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT);
+      long fp = w.finish(out);
+      out.close();
+
+      IndexInput in = dir.openInput("bkd", IOContext.DEFAULT);
+      in.seek(fp);
+      BKDReader r = new BKDReader(in);
+      r.intersect(new IntersectVisitor() {
+          int lastDocID = -1;
+
+          @Override
+          public void visit(int docID) {
+            assertTrue("lastDocID=" + lastDocID + " docID=" + docID, docID > lastDocID);
+            lastDocID = docID;
+          }
+
+          @Override
+          public void visit(int docID, byte[] packedValue) {
+            visit(docID);
+          }
+
+          @Override
+          public Relation compare(byte[] minPacked, byte[] maxPacked) {
+            return Relation.CELL_CROSSES_QUERY;
+          }
+      });
+      in.close();
+    }
+  }
 }