You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2016/03/13 12:25:20 UTC

[1/5] lucene-solr git commit: fix int overflow

Repository: lucene-solr
Updated Branches:
  refs/heads/branch_6x d2aaae001 -> 42d1eb536


fix int overflow


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/85739d86
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/85739d86
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/85739d86

Branch: refs/heads/branch_6x
Commit: 85739d86299907e4d9f484d017c68a5b76fe075d
Parents: d2aaae0
Author: Mike McCandless <mi...@apache.org>
Authored: Sun Mar 13 06:28:18 2016 -0400
Committer: Mike McCandless <mi...@apache.org>
Committed: Sun Mar 13 07:01:06 2016 -0400

----------------------------------------------------------------------
 lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/85739d86/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
index 765b01c..33d7bc4 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
@@ -1177,7 +1177,7 @@ public class BKDWriter implements Closeable {
              PointReader reader = slices[dim].writer.getReader(slices[dim].start);) {
 
           // Partition this source according to how the splitDim split the values:
-          int nextRightCount = 0;
+          long nextRightCount = 0;
           for (long i=0;i<source.count;i++) {
             boolean result = reader.next();
             assert result;


[3/5] lucene-solr git commit: improve 2B points test; add new 2B test against BKD directly

Posted by mi...@apache.org.
improve 2B points test; add new 2B test against BKD directly


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/114f8507
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/114f8507
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/114f8507

Branch: refs/heads/branch_6x
Commit: 114f85076ec7516fc3590376d3622e05fb648e53
Parents: 641c6d3
Author: Mike McCandless <mi...@apache.org>
Authored: Sun Mar 13 06:41:19 2016 -0400
Committer: Mike McCandless <mi...@apache.org>
Committed: Sun Mar 13 07:01:25 2016 -0400

----------------------------------------------------------------------
 .../org/apache/lucene/index/Test2BPoints.java   |   9 +-
 .../apache/lucene/util/bkd/Test2BBKDPoints.java | 121 +++++++++++++++++++
 2 files changed, 127 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/114f8507/lucene/core/src/test/org/apache/lucene/index/Test2BPoints.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/Test2BPoints.java b/lucene/core/src/test/org/apache/lucene/index/Test2BPoints.java
index 43207b8..75f2bbe 100644
--- a/lucene/core/src/test/org/apache/lucene/index/Test2BPoints.java
+++ b/lucene/core/src/test/org/apache/lucene/index/Test2BPoints.java
@@ -70,11 +70,12 @@ public class Test2BPoints extends LuceneTestCase {
     }
 
     final int numDocs = (Integer.MAX_VALUE / 26) + 1;
-    long counter = 0;
+    int counter = 0;
     for (int i = 0; i < numDocs; i++) {
       Document doc = new Document();
       for (int j=0;j<26;j++) {
-        doc.add(new LongPoint("long", counter));
+        long x = (((long) random().nextInt() << 32)) | (long) counter;
+        doc.add(new LongPoint("long", x));
         counter++;
       }
       w.addDocument(doc);
@@ -120,7 +121,9 @@ public class Test2BPoints extends LuceneTestCase {
     for (int i = 0; i < numDocs; i++) {
       Document doc = new Document();
       for (int j=0;j<26;j++) {
-        doc.add(new LongPoint("long", counter, 2*counter+1));
+        long x = (((long) random().nextInt() << 32)) | (long) counter;
+        long y = (((long) random().nextInt() << 32)) | (long) random().nextInt();
+        doc.add(new LongPoint("long", x, y));
         counter++;
       }
       w.addDocument(doc);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/114f8507/lucene/core/src/test/org/apache/lucene/util/bkd/Test2BBKDPoints.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/util/bkd/Test2BBKDPoints.java b/lucene/core/src/test/org/apache/lucene/util/bkd/Test2BBKDPoints.java
new file mode 100644
index 0000000..cf18409
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/util/bkd/Test2BBKDPoints.java
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.util.bkd;
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.FilterCodec;
+import org.apache.lucene.codecs.PointsFormat;
+import org.apache.lucene.codecs.PointsReader;
+import org.apache.lucene.codecs.PointsWriter;
+import org.apache.lucene.codecs.lucene60.Lucene60PointsReader;
+import org.apache.lucene.codecs.lucene60.Lucene60PointsWriter;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.LongPoint;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.LuceneTestCase.Monster;
+import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.NumericUtils;
+import org.apache.lucene.util.TestUtil;
+import org.apache.lucene.util.TimeUnits;
+
+import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
+
+// e.g. run like this: ant test -Dtestcase=Test2BBKDPoints -Dtests.nightly=true -Dtests.verbose=true -Dtests.monster=true
+// 
+//   or: python -u /l/util/src/python/repeatLuceneTest.py -heap 4g -once -nolog -tmpDir /b/tmp -logDir /l/logs Test2BBKDPoints.test2D -verbose
+
+@TimeoutSuite(millis = 365 * 24 * TimeUnits.HOUR) // hopefully ~1 year is long enough ;)
+@Monster("takes at least 4 hours and consumes many GB of temp disk space")
+public class Test2BBKDPoints extends LuceneTestCase {
+  public void test1D() throws Exception {
+    Directory dir = FSDirectory.open(createTempDir("2BBKDPoints1D"));
+
+    final int numDocs = (Integer.MAX_VALUE / 26) + 100;
+
+    BKDWriter w = new BKDWriter(numDocs, dir, "_0", 1, 1024, 128, Long.BYTES, 26L * numDocs);
+    int counter = 0;
+    byte[] packedBytes = new byte[Long.BYTES];
+    for (int docID = 0; docID < numDocs; docID++) {
+      for (int j=0;j<26;j++) {
+        // first a random int:
+        NumericUtils.intToSortableBytes(random().nextInt(), packedBytes, 0);
+        // then our counter, which will overflow a bit in the end:
+        NumericUtils.intToSortableBytes(counter, packedBytes, Integer.BYTES);
+        w.add(packedBytes, docID);
+        counter++;
+      }
+      if (VERBOSE && docID % 100000 == 0) {
+        System.out.println(docID + " of " + numDocs + "...");
+      }
+    }
+    IndexOutput out = dir.createOutput("1d.bkd", IOContext.DEFAULT);
+    long indexFP = w.finish(out);
+    out.close();
+
+    IndexInput in = dir.openInput("1d.bkd", IOContext.DEFAULT);
+    in.seek(indexFP);
+    BKDReader r = new BKDReader(in);
+    r.verify(numDocs);
+    in.close();
+    dir.close();
+  }
+
+  public void test2D() throws Exception {
+    Directory dir = FSDirectory.open(createTempDir("2BBKDPoints2D"));
+
+    final int numDocs = (Integer.MAX_VALUE / 26) + 100;
+
+    BKDWriter w = new BKDWriter(numDocs, dir, "_0", 2, Long.BYTES, 26L * numDocs);
+    long counter = 0;
+    byte[] packedBytes = new byte[2*Long.BYTES];
+    for (int docID = 0; docID < numDocs; docID++) {
+      for (int j=0;j<26;j++) {
+        // first a random int:
+        NumericUtils.intToSortableBytes(random().nextInt(), packedBytes, 0);
+        // then our counter, which will overflow a bit in the end:
+        NumericUtils.intToSortableBytes(counter, packedBytes, Integer.BYTES);
+        // then two random ints for the 2nd dimension:
+        NumericUtils.intoSortableBytes(random().nextInt(), packedBytes, Long.BYTES);
+        NumericUtils.intoSortableBytes(random().nextInt(), packedBytes, Long.BYTES + Integer.BYTES);
+        w.add(packedBytes, docID);
+        counter++;
+      }
+      if (VERBOSE && docID % 100000 == 0) {
+        System.out.println(docID + " of " + numDocs + "...");
+      }
+    }
+    IndexOutput out = dir.createOutput("2d.bkd", IOContext.DEFAULT);
+    long indexFP = w.finish(out);
+    out.close();
+
+    IndexInput in = dir.openInput("2d.bkd", IOContext.DEFAULT);
+    in.seek(indexFP);
+    BKDReader r = new BKDReader(in);
+    r.verify(numDocs);
+    in.close();
+    dir.close();
+  }
+}


[2/5] lucene-solr git commit: make BKD's temp file names a bit more descriptive

Posted by mi...@apache.org.
make BKD's temp file names a bit more descriptive


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/641c6d30
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/641c6d30
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/641c6d30

Branch: refs/heads/branch_6x
Commit: 641c6d30e842b221556967345eac383978b94c69
Parents: 85739d8
Author: Mike McCandless <mi...@apache.org>
Authored: Sun Mar 13 06:28:49 2016 -0400
Committer: Mike McCandless <mi...@apache.org>
Committed: Sun Mar 13 07:01:15 2016 -0400

----------------------------------------------------------------------
 .../src/java/org/apache/lucene/util/bkd/BKDWriter.java    | 10 +++++-----
 .../org/apache/lucene/util/bkd/OfflinePointWriter.java    |  4 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/641c6d30/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
index 33d7bc4..d4e30b7 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
@@ -216,7 +216,7 @@ public class BKDWriter implements Closeable {
   private void switchToOffline() throws IOException {
 
     // For each .add we just append to this input file, then in .finish we sort this input and resursively build the tree:
-    offlinePointWriter = new OfflinePointWriter(tempDir, tempFileNamePrefix, packedBytesLength, longOrds);
+    offlinePointWriter = new OfflinePointWriter(tempDir, tempFileNamePrefix, packedBytesLength, longOrds, "switch");
     tempInput = offlinePointWriter.out;
     PointReader reader = heapPointWriter.getReader(0);
     for(int i=0;i<pointCount;i++) {
@@ -1172,8 +1172,8 @@ public class BKDWriter implements Closeable {
           continue;
         }
 
-        try (PointWriter leftPointWriter = getPointWriter(leftCount);
-             PointWriter rightPointWriter = getPointWriter(source.count - leftCount);
+        try (PointWriter leftPointWriter = getPointWriter(leftCount, "left" + dim);
+             PointWriter rightPointWriter = getPointWriter(source.count - leftCount, "right" + dim);
              PointReader reader = slices[dim].writer.getReader(slices[dim].start);) {
 
           // Partition this source according to how the splitDim split the values:
@@ -1238,12 +1238,12 @@ public class BKDWriter implements Closeable {
     return true;
   }
 
-  PointWriter getPointWriter(long count) throws IOException {
+  PointWriter getPointWriter(long count, String desc) throws IOException {
     if (count <= maxPointsSortInHeap) {
       int size = Math.toIntExact(count);
       return new HeapPointWriter(size, size, packedBytesLength, longOrds);
     } else {
-      return new OfflinePointWriter(tempDir, tempFileNamePrefix, packedBytesLength, longOrds);
+      return new OfflinePointWriter(tempDir, tempFileNamePrefix, packedBytesLength, longOrds, desc);
     }
   }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/641c6d30/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java
index dcf6781..5aa11de 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java
@@ -33,8 +33,8 @@ final class OfflinePointWriter implements PointWriter {
   // true if ords are written as long (8 bytes), else 4 bytes
   private boolean longOrds;
 
-  public OfflinePointWriter(Directory tempDir, String tempFileNamePrefix, int packedBytesLength, boolean longOrds) throws IOException {
-    this.out = tempDir.createTempOutput(tempFileNamePrefix, "bkd", IOContext.DEFAULT);
+  public OfflinePointWriter(Directory tempDir, String tempFileNamePrefix, int packedBytesLength, boolean longOrds, String desc) throws IOException {
+    this.out = tempDir.createTempOutput(tempFileNamePrefix, "bkd_" + desc, IOContext.DEFAULT);
     this.tempDir = tempDir;
     this.packedBytesLength = packedBytesLength;
     this.longOrds = longOrds;


[5/5] lucene-solr git commit: fix compiler warning

Posted by mi...@apache.org.
fix compiler warning


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/42d1eb53
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/42d1eb53
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/42d1eb53

Branch: refs/heads/branch_6x
Commit: 42d1eb536461a5871f44a6d9f0b38ec839ce7283
Parents: 44bd24b
Author: Mike McCandless <mi...@apache.org>
Authored: Sun Mar 13 07:02:28 2016 -0400
Committer: Mike McCandless <mi...@apache.org>
Committed: Sun Mar 13 07:02:28 2016 -0400

----------------------------------------------------------------------
 lucene/core/src/test/org/apache/lucene/index/Test2BPoints.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/42d1eb53/lucene/core/src/test/org/apache/lucene/index/Test2BPoints.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/Test2BPoints.java b/lucene/core/src/test/org/apache/lucene/index/Test2BPoints.java
index 75f2bbe..5e69979 100644
--- a/lucene/core/src/test/org/apache/lucene/index/Test2BPoints.java
+++ b/lucene/core/src/test/org/apache/lucene/index/Test2BPoints.java
@@ -117,7 +117,7 @@ public class Test2BPoints extends LuceneTestCase {
     }
 
     final int numDocs = (Integer.MAX_VALUE / 26) + 1;
-    long counter = 0;
+    int counter = 0;
     for (int i = 0; i < numDocs; i++) {
       Document doc = new Document();
       for (int j=0;j<26;j++) {


[4/5] lucene-solr git commit: let BKD use 256 MB heap in 2B tests

Posted by mi...@apache.org.
let BKD use 256 MB heap in 2B tests


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/44bd24b1
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/44bd24b1
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/44bd24b1

Branch: refs/heads/branch_6x
Commit: 44bd24b1d4d6abd20c24fed09f73acd0874c78e6
Parents: 114f850
Author: Mike McCandless <mi...@apache.org>
Authored: Sun Mar 13 06:53:24 2016 -0400
Committer: Mike McCandless <mi...@apache.org>
Committed: Sun Mar 13 07:01:30 2016 -0400

----------------------------------------------------------------------
 .../test/org/apache/lucene/util/bkd/Test2BBKDPoints.java  | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/44bd24b1/lucene/core/src/test/org/apache/lucene/util/bkd/Test2BBKDPoints.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/util/bkd/Test2BBKDPoints.java b/lucene/core/src/test/org/apache/lucene/util/bkd/Test2BBKDPoints.java
index cf18409..eb3aa47 100644
--- a/lucene/core/src/test/org/apache/lucene/util/bkd/Test2BBKDPoints.java
+++ b/lucene/core/src/test/org/apache/lucene/util/bkd/Test2BBKDPoints.java
@@ -55,7 +55,7 @@ public class Test2BBKDPoints extends LuceneTestCase {
 
     final int numDocs = (Integer.MAX_VALUE / 26) + 100;
 
-    BKDWriter w = new BKDWriter(numDocs, dir, "_0", 1, 1024, 128, Long.BYTES, 26L * numDocs);
+    BKDWriter w = new BKDWriter(numDocs, dir, "_0", 1, 1024, 256, Long.BYTES, 26L * numDocs);
     int counter = 0;
     byte[] packedBytes = new byte[Long.BYTES];
     for (int docID = 0; docID < numDocs; docID++) {
@@ -88,8 +88,8 @@ public class Test2BBKDPoints extends LuceneTestCase {
 
     final int numDocs = (Integer.MAX_VALUE / 26) + 100;
 
-    BKDWriter w = new BKDWriter(numDocs, dir, "_0", 2, Long.BYTES, 26L * numDocs);
-    long counter = 0;
+    BKDWriter w = new BKDWriter(numDocs, dir, "_0", 2, 1024, 256, Long.BYTES, 26L * numDocs);
+    int counter = 0;
     byte[] packedBytes = new byte[2*Long.BYTES];
     for (int docID = 0; docID < numDocs; docID++) {
       for (int j=0;j<26;j++) {
@@ -98,8 +98,8 @@ public class Test2BBKDPoints extends LuceneTestCase {
         // then our counter, which will overflow a bit in the end:
         NumericUtils.intToSortableBytes(counter, packedBytes, Integer.BYTES);
         // then two random ints for the 2nd dimension:
-        NumericUtils.intoSortableBytes(random().nextInt(), packedBytes, Long.BYTES);
-        NumericUtils.intoSortableBytes(random().nextInt(), packedBytes, Long.BYTES + Integer.BYTES);
+        NumericUtils.intToSortableBytes(random().nextInt(), packedBytes, Long.BYTES);
+        NumericUtils.intToSortableBytes(random().nextInt(), packedBytes, Long.BYTES + Integer.BYTES);
         w.add(packedBytes, docID);
         counter++;
       }