You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by iv...@apache.org on 2019/06/26 07:50:43 UTC

[lucene-solr] branch branch_8x updated (9fd51ba -> e296c34)

This is an automated email from the ASF dual-hosted git repository.

ivera pushed a change to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git.


    from 9fd51ba  LUCENE-7714: Add a range query in sandbox that takes advantage of index sorting.
     new afb4b92  LUCENE-8879: Improve BKDRadixSelector tests
     new e296c34  Change Arrays for FutureArrays

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 lucene/CHANGES.txt                                 |   4 +-
 .../lucene/util/bkd/TestBKDRadixSelector.java      |  65 +++++---
 .../apache/lucene/util/bkd/TestBKDRadixSort.java   | 164 +++++++++++++++++++++
 3 files changed, 212 insertions(+), 21 deletions(-)
 create mode 100644 lucene/core/src/test/org/apache/lucene/util/bkd/TestBKDRadixSort.java


[lucene-solr] 02/02: Change Arrays for FutureArrays

Posted by iv...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

ivera pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git

commit e296c34ff290ba503a8c49a18b121a2c4167f734
Author: iverase <iv...@apache.org>
AuthorDate: Wed Jun 26 09:50:21 2019 +0200

    Change Arrays for FutureArrays
---
 .../core/src/test/org/apache/lucene/util/bkd/TestBKDRadixSort.java   | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKDRadixSort.java b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKDRadixSort.java
index a7cebd0..045f223 100644
--- a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKDRadixSort.java
+++ b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKDRadixSort.java
@@ -22,6 +22,7 @@ import java.util.Arrays;
 
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.FutureArrays;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;
 
@@ -127,7 +128,7 @@ public class TestBKDRadixSort extends LuceneTestCase {
       for (int j = start; j < end; j++) {
         PointValue pointValue = points.getPackedValueSlice(j);
         BytesRef value = pointValue.packedValue();
-        int cmp = Arrays.compareUnsigned(value.bytes, value.offset + dimOffset, value.offset + dimOffset + bytesPerDim, previous, dimOffset, dimOffset + bytesPerDim);
+        int cmp = FutureArrays.compareUnsigned(value.bytes, value.offset + dimOffset, value.offset + dimOffset + bytesPerDim, previous, dimOffset, dimOffset + bytesPerDim);
         assertTrue(cmp >= 0);
         if (cmp == 0) {
           assertTrue(pointValue.docID() >= previousDocId);
@@ -150,7 +151,7 @@ public class TestBKDRadixSort extends LuceneTestCase {
     for (int i = start + 1; i < end; i++) {
       value = points.getPackedValueSlice(i);
       bytesRef = value.packedValue();
-      int diff = Arrays.mismatch(bytesRef.bytes, bytesRef.offset + offset, bytesRef.offset + offset + bytesPerDimension, firstValue, 0, bytesPerDimension);
+      int diff = FutureArrays.mismatch(bytesRef.bytes, bytesRef.offset + offset, bytesRef.offset + offset + bytesPerDimension, firstValue, 0, bytesPerDimension);
       if (diff != -1 && commonPrefixLength > diff) {
         if (diff == 0) {
           return diff;


[lucene-solr] 01/02: LUCENE-8879: Improve BKDRadixSelector tests

Posted by iv...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

ivera pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git

commit afb4b928bc2c3d352e1002217787323d54f51535
Author: Ignacio Vera <iv...@gmail.com>
AuthorDate: Wed Jun 26 09:45:44 2019 +0200

    LUCENE-8879: Improve BKDRadixSelector tests
    
    This change adds explicit test for the sorting capabilities.
---
 lucene/CHANGES.txt                                 |   4 +-
 .../lucene/util/bkd/TestBKDRadixSelector.java      |  65 +++++---
 .../apache/lucene/util/bkd/TestBKDRadixSort.java   | 163 +++++++++++++++++++++
 3 files changed, 211 insertions(+), 21 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index bbe2ccc..6652c7c 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -121,7 +121,9 @@ Other
 * LUCENE-8778: Define analyzer SPI names as static final fields and document the names in Javadocs.
   (Tomoko Uchida, Uwe Schindler)
 
-* LUCENE-8838: Remove support for Steiner points on Tessellator.  (Ignacio Vera)
+* LUCENE-8838: Remove support for Steiner points on Tessellator. (Ignacio Vera)
+
+* LUCENE-8879: Improve BKDRadixSelector tests. (Ignacio Vera)
 
 ======================= Lucene 8.1.1 =======================
 (No Changes)
diff --git a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKDRadixSelector.java b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKDRadixSelector.java
index c290825..4566673 100644
--- a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKDRadixSelector.java
+++ b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKDRadixSelector.java
@@ -49,7 +49,7 @@ public class TestBKDRadixSelector extends LuceneTestCase {
     points.append(value, 3);
     points.close();
     PointWriter copy = copyPoints(dir,points, packedLength);
-    verify(dir, copy, dimensions, 0, values, middle, packedLength, bytesPerDimensions, 0);
+    verify(dir, copy, dimensions, dimensions, 0, values, middle, packedLength, bytesPerDimensions, 0);
     dir.close();
   }
 
@@ -80,9 +80,10 @@ public class TestBKDRadixSelector extends LuceneTestCase {
     }
     int partitionPoint = TestUtil.nextInt(random(), start + 1, end - 1);
     int sortedOnHeap = random().nextInt(5000);
-    int dimensions =  TestUtil.nextInt(random(), 1, 8);
+    int indexDimensions =  TestUtil.nextInt(random(), 1, 8);
+    int dataDimensions =  TestUtil.nextInt(random(), indexDimensions, 8);
     int bytesPerDimensions = TestUtil.nextInt(random(), 2, 30);
-    int packedLength = dimensions * bytesPerDimensions;
+    int packedLength = dataDimensions * bytesPerDimensions;
     PointWriter points = getRandomPointWriter(dir, values, packedLength);
     byte[] value = new byte[packedLength];
     for (int i =0; i < values; i++) {
@@ -90,7 +91,7 @@ public class TestBKDRadixSelector extends LuceneTestCase {
       points.append(value, i);
     }
     points.close();
-    verify(dir, points, dimensions, start, end, partitionPoint, packedLength, bytesPerDimensions, sortedOnHeap);
+    verify(dir, points, dataDimensions, indexDimensions, start, end, partitionPoint, packedLength, bytesPerDimensions, sortedOnHeap);
     dir.close();
   }
 
@@ -113,7 +114,7 @@ public class TestBKDRadixSelector extends LuceneTestCase {
       }
     }
     points.close();
-    verify(dir, points, dimensions, 0, values, partitionPoint, packedLength, bytesPerDimensions, sortedOnHeap);
+    verify(dir, points, dimensions, dimensions, 0, values, partitionPoint, packedLength, bytesPerDimensions, sortedOnHeap);
     dir.close();
   }
 
@@ -122,9 +123,10 @@ public class TestBKDRadixSelector extends LuceneTestCase {
     Directory dir = getDirectory(values);
     int partitionPoint = random().nextInt(values);
     int sortedOnHeap = random().nextInt(5000);
-    int dimensions =  TestUtil.nextInt(random(), 1, 8);
+    int indexDimensions =  TestUtil.nextInt(random(), 1, 8);
+    int dataDimensions =  TestUtil.nextInt(random(), indexDimensions, 8);
     int bytesPerDimensions = TestUtil.nextInt(random(), 2, 30);
-    int packedLength = dimensions * bytesPerDimensions;
+    int packedLength = dataDimensions * bytesPerDimensions;
     PointWriter points = getRandomPointWriter(dir, values, packedLength);
     byte[] value = new byte[packedLength];
     random().nextBytes(value);
@@ -136,7 +138,7 @@ public class TestBKDRadixSelector extends LuceneTestCase {
       }
     }
     points.close();
-    verify(dir, points, dimensions, 0, values, partitionPoint, packedLength, bytesPerDimensions, sortedOnHeap);
+    verify(dir, points, dataDimensions, indexDimensions, 0, values, partitionPoint, packedLength, bytesPerDimensions, sortedOnHeap);
     dir.close();
   }
 
@@ -145,9 +147,10 @@ public class TestBKDRadixSelector extends LuceneTestCase {
     Directory dir = getDirectory(values);
     int partitionPoint = random().nextInt(values);
     int sortedOnHeap = random().nextInt(5000);
-    int dimensions =  TestUtil.nextInt(random(), 1, 8);
+    int indexDimensions =  TestUtil.nextInt(random(), 1, 8);
+    int dataDimensions =  TestUtil.nextInt(random(), indexDimensions, 8);
     int bytesPerDimensions = TestUtil.nextInt(random(), 2, 30);
-    int packedLength = dimensions * bytesPerDimensions;
+    int packedLength = dataDimensions * bytesPerDimensions;
     PointWriter points = getRandomPointWriter(dir, values, packedLength);
     byte[] value = new byte[packedLength];
     random().nextBytes(value);
@@ -155,7 +158,7 @@ public class TestBKDRadixSelector extends LuceneTestCase {
       points.append(value, 0);
     }
     points.close();
-    verify(dir, points, dimensions, 0, values, partitionPoint, packedLength, bytesPerDimensions, sortedOnHeap);
+    verify(dir, points, dataDimensions, indexDimensions, 0, values, partitionPoint, packedLength, bytesPerDimensions, sortedOnHeap);
     dir.close();
   }
 
@@ -164,9 +167,10 @@ public class TestBKDRadixSelector extends LuceneTestCase {
     Directory dir = getDirectory(values);
     int partitionPoint = random().nextInt(values);
     int sortedOnHeap = random().nextInt(5000);
-    int dimensions =  TestUtil.nextInt(random(), 1, 8);
+    int indexDimensions =  TestUtil.nextInt(random(), 1, 8);
+    int dataDimensions =  TestUtil.nextInt(random(), indexDimensions, 8);
     int bytesPerDimensions = TestUtil.nextInt(random(), 2, 30);
-    int packedLength = dimensions * bytesPerDimensions;
+    int packedLength = dataDimensions * bytesPerDimensions;
     PointWriter points = getRandomPointWriter(dir, values, packedLength);
     int numberValues = random().nextInt(8) + 2;
     byte[][] differentValues = new byte[numberValues][packedLength];
@@ -177,14 +181,37 @@ public class TestBKDRadixSelector extends LuceneTestCase {
       points.append(differentValues[random().nextInt(numberValues)], i);
     }
     points.close();
-    verify(dir, points, dimensions, 0, values, partitionPoint, packedLength, bytesPerDimensions, sortedOnHeap);
+    verify(dir, points, dataDimensions, indexDimensions, 0, values, partitionPoint, packedLength, bytesPerDimensions, sortedOnHeap);
     dir.close();
   }
 
-  private void verify(Directory dir, PointWriter points, int dimensions, long start, long end, long middle, int packedLength, int bytesPerDimensions, int sortedOnHeap) throws IOException{
-    BKDRadixSelector radixSelector = new BKDRadixSelector(dimensions, bytesPerDimensions, sortedOnHeap, dir, "test");
-    //we check for each dimension
-    for (int splitDim =0; splitDim < dimensions; splitDim++) {
+  public void testRandomDataDimDiffValues() throws IOException {
+    int values = atLeast(15000);
+    Directory dir = getDirectory(values);
+    int partitionPoint = random().nextInt(values);
+    int sortedOnHeap = random().nextInt(5000);
+    int indexDimensions =  TestUtil.nextInt(random(), 1, 8);
+    int dataDimensions =  TestUtil.nextInt(random(), indexDimensions, 8);
+    int bytesPerDimensions = TestUtil.nextInt(random(), 2, 30);
+    int packedLength = dataDimensions * bytesPerDimensions;
+    PointWriter points = getRandomPointWriter(dir, values, packedLength);
+    byte[] value = new byte[packedLength];
+    byte[] dataValue = new byte[(dataDimensions - indexDimensions) * bytesPerDimensions];
+    random().nextBytes(value);
+    for (int i =0; i < values; i++) {
+      random().nextBytes(dataValue);
+      System.arraycopy(dataValue, 0, value, indexDimensions * bytesPerDimensions, (dataDimensions - indexDimensions) * bytesPerDimensions);
+      points.append(value, i);
+    }
+    points.close();
+    verify(dir, points, dataDimensions, indexDimensions, 0, values, partitionPoint, packedLength, bytesPerDimensions, sortedOnHeap);
+    dir.close();
+  }
+
+  private void verify(Directory dir, PointWriter points, int dataDimensions, int indexDimensions, long start, long end, long middle, int packedLength, int bytesPerDimensions, int sortedOnHeap) throws IOException{
+    BKDRadixSelector radixSelector = new BKDRadixSelector(dataDimensions, bytesPerDimensions, sortedOnHeap, dir, "test");
+    //we only split by indexed dimension so we check for each only those dimension
+    for (int splitDim = 0; splitDim < indexDimensions; splitDim++) {
       //We need to make a copy of the data as it is deleted in the process
       BKDRadixSelector.PathSlice inputSlice = new BKDRadixSelector.PathSlice(copyPoints(dir, points, packedLength), 0, points.count());
       int commonPrefixLengthInput = getRandomCommonPrefix(inputSlice, bytesPerDimensions, splitDim);
@@ -209,7 +236,6 @@ public class TestBKDRadixSelector extends LuceneTestCase {
     points.destroy();
   }
 
-
   private PointWriter copyPoints(Directory dir, PointWriter points, int packedLength) throws IOException {
     try (PointWriter copy  = getRandomPointWriter(dir, points.count(), packedLength);
          PointReader reader = points.getReader(0, points.count())) {
@@ -319,5 +345,4 @@ public class TestBKDRadixSelector extends LuceneTestCase {
     }
     return docID;
   }
-
 }
diff --git a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKDRadixSort.java b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKDRadixSort.java
new file mode 100644
index 0000000..a7cebd0
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKDRadixSort.java
@@ -0,0 +1,163 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.util.bkd;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
+
+public class TestBKDRadixSort extends LuceneTestCase {
+
+  public void testRandom() throws IOException {
+    int numPoints = TestUtil.nextInt(random(), 1, BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE);
+    int indexDimensions = TestUtil.nextInt(random(), 1, 8);
+    int dataDimensions = TestUtil.nextInt(random(), indexDimensions, 8);
+    int bytesPerDim = TestUtil.nextInt(random(), 2, 30);
+    int packedBytesLength = dataDimensions * bytesPerDim;
+    HeapPointWriter points = new HeapPointWriter(numPoints, packedBytesLength);
+    byte[] value = new byte[packedBytesLength];
+    for (int i = 0; i < numPoints; i++) {
+      random().nextBytes(value);
+      points.append(value, i);
+    }
+    verifySort(points, dataDimensions, indexDimensions, 0, numPoints, bytesPerDim);
+  }
+
+  public void testRandomAllEquals() throws IOException {
+    int numPoints = TestUtil.nextInt(random(), 1, BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE);
+    int indexDimensions = TestUtil.nextInt(random(), 1, 8);
+    int dataDimensions = TestUtil.nextInt(random(), indexDimensions, 8);
+    int bytesPerDim = TestUtil.nextInt(random(), 2, 30);
+    int packedBytesLength = dataDimensions * bytesPerDim;
+    HeapPointWriter points = new HeapPointWriter(numPoints, packedBytesLength);
+    byte[] value = new byte[packedBytesLength];
+    random().nextBytes(value);
+    for (int i = 0; i < numPoints; i++) {
+      points.append(value, random().nextInt(numPoints));
+    }
+    verifySort(points, dataDimensions, indexDimensions, 0, numPoints, bytesPerDim);
+  }
+
+  public void testRandomLastByteTwoValues() throws IOException {
+    int numPoints = TestUtil.nextInt(random(), 1, BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE);
+    int indexDimensions = TestUtil.nextInt(random(), 1, 8);
+    int dataDimensions = TestUtil.nextInt(random(), indexDimensions, 8);
+    int bytesPerDim = TestUtil.nextInt(random(), 2, 30);
+    int packedBytesLength = dataDimensions * bytesPerDim;
+    HeapPointWriter points = new HeapPointWriter(numPoints, packedBytesLength);
+    byte[] value = new byte[packedBytesLength];
+    random().nextBytes(value);
+    for (int i = 0; i < numPoints; i++) {
+      if (random().nextBoolean()) {
+        points.append(value, 1);
+      } else {
+        points.append(value, 2);
+      }
+    }
+    verifySort(points, dataDimensions, indexDimensions, 0, numPoints, bytesPerDim);
+  }
+
+  public void testRandomFewDifferentValues() throws IOException {
+    int numPoints = TestUtil.nextInt(random(), 1, BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE);
+    int indexDimensions = TestUtil.nextInt(random(), 1, 8);
+    int dataDimensions = TestUtil.nextInt(random(), indexDimensions, 8);
+    int bytesPerDim = TestUtil.nextInt(random(), 2, 30);
+    int packedBytesLength = dataDimensions * bytesPerDim;
+    HeapPointWriter points = new HeapPointWriter(numPoints, packedBytesLength);
+    int numberValues = random().nextInt(8) + 2;
+    byte[][] differentValues = new byte[numberValues][packedBytesLength];
+    for (int i = 0; i < numberValues; i++) {
+      random().nextBytes(differentValues[i]);
+    }
+    for (int i = 0; i < numPoints; i++) {
+      points.append(differentValues[random().nextInt(numberValues)], i);
+    }
+    verifySort(points, dataDimensions, indexDimensions, 0, numPoints, bytesPerDim);
+  }
+
+  public void testRandomDataDimDifferent() throws IOException {
+    int numPoints = TestUtil.nextInt(random(), 1, BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE);
+    int indexDimensions = TestUtil.nextInt(random(), 1, 8);
+    int dataDimensions = TestUtil.nextInt(random(), indexDimensions, 8);
+    int bytesPerDim = TestUtil.nextInt(random(), 2, 30);
+    int packedBytesLength = dataDimensions * bytesPerDim;
+    HeapPointWriter points = new HeapPointWriter(numPoints, packedBytesLength);
+    byte[] value = new byte[packedBytesLength];
+    int totalDataDimension = dataDimensions - indexDimensions;
+    byte[] dataDimensionValues = new byte[totalDataDimension * bytesPerDim];
+    random().nextBytes(value);
+    for (int i = 0; i < numPoints; i++) {
+      random().nextBytes(dataDimensionValues);
+      System.arraycopy(dataDimensionValues, 0, value, indexDimensions * bytesPerDim, totalDataDimension * bytesPerDim);
+      points.append(value, random().nextInt(numPoints));
+    }
+    verifySort(points, dataDimensions, indexDimensions, 0, numPoints, bytesPerDim);
+  }
+
+  private void verifySort(HeapPointWriter points, int dataDimensions, int indexDimensions, int start, int end, int bytesPerDim) throws IOException{
+    int packedBytesLength = dataDimensions * bytesPerDim;
+    Directory dir = newDirectory();
+    BKDRadixSelector radixSelector = new BKDRadixSelector(dataDimensions, bytesPerDim, 1000, dir, "test");
+    // we check for each dimension
+    for (int splitDim = 0; splitDim < dataDimensions; splitDim++) {
+      radixSelector.heapRadixSort(points, start, end, splitDim, getRandomCommonPrefix(points, start, end, bytesPerDim, splitDim));
+      byte[] previous = new byte[bytesPerDim * dataDimensions];
+      int previousDocId = -1;
+      Arrays.fill(previous, (byte) 0);
+      int dimOffset = splitDim * bytesPerDim;
+      for (int j = start; j < end; j++) {
+        PointValue pointValue = points.getPackedValueSlice(j);
+        BytesRef value = pointValue.packedValue();
+        int cmp = Arrays.compareUnsigned(value.bytes, value.offset + dimOffset, value.offset + dimOffset + bytesPerDim, previous, dimOffset, dimOffset + bytesPerDim);
+        assertTrue(cmp >= 0);
+        if (cmp == 0) {
+          assertTrue(pointValue.docID() >= previousDocId);
+        }
+        System.arraycopy(value.bytes, value.offset, previous, 0, packedBytesLength);
+        previousDocId = pointValue.docID();
+      }
+    }
+    dir.close();
+  }
+
+  /** returns a common prefix length equal or lower than the current one */
+  private int getRandomCommonPrefix(HeapPointWriter points, int start, int end, int bytesPerDimension, int sortDim)  {
+    int commonPrefixLength = bytesPerDimension;
+    PointValue value = points.getPackedValueSlice(start);
+    BytesRef bytesRef = value.packedValue();
+    byte[] firstValue = new byte[bytesPerDimension];
+    int offset = sortDim * bytesPerDimension;
+    System.arraycopy(bytesRef.bytes, bytesRef.offset + offset, firstValue, 0, bytesPerDimension);
+    for (int i = start + 1; i < end; i++) {
+      value = points.getPackedValueSlice(i);
+      bytesRef = value.packedValue();
+      int diff = Arrays.mismatch(bytesRef.bytes, bytesRef.offset + offset, bytesRef.offset + offset + bytesPerDimension, firstValue, 0, bytesPerDimension);
+      if (diff != -1 && commonPrefixLength > diff) {
+        if (diff == 0) {
+          return diff;
+        }
+        commonPrefixLength = diff;
+      }
+    }
+    return (random().nextBoolean()) ? commonPrefixLength : random().nextInt(commonPrefixLength);
+  }
+}