You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by em...@apache.org on 2019/08/10 03:22:45 UTC

[arrow] branch master updated: ARROW-6093: [Java] reduce branches in algo for first match in VectorRangeSearcher

This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 94fe2d5  ARROW-6093: [Java] reduce branches in algo for first match in VectorRangeSearcher
94fe2d5 is described below

commit 94fe2d587035d51ee11a6eda394c2a29244e1d1c
Author: liyafan82 <fa...@foxmail.com>
AuthorDate: Fri Aug 9 20:20:07 2019 -0700

    ARROW-6093: [Java] reduce branches in algo for first match in VectorRangeSearcher
    
    This is a follow up Jira for the improvement suggested by Francois Saint-Jacques in the PR for
    https://github.com/apache/arrow/pull/4925
    
    After careful analysis and proof, I found @fsaintjacques 's proposal really works. The idea behind the algorithm is tricky. This PR is the implementation based on @fsaintjacques 's proposal.
    
    The benefit of this implementation:
    1. the code is much clearer
    2. there is fewer branches, which is beneficial for performance.
    
    The drawback of this implementation:
    1. there can be dumb iterations of the main loop. That is, the while loop may run a few more times after the "solution" is found, just to wait for the loop termination condition is satisfied.
    
    However, the time complexity of the algorithm is still O(logn), and the number of dumb iterations can not be large.
    
    Closes #5011 from liyafan82/fly_0805_range and squashes the following commits:
    
    f47a2e7d8 <liyafan82>  reduce branches in algo for first match in VectorRangeSearcher
    
    Authored-by: liyafan82 <fa...@foxmail.com>
    Signed-off-by: Micah Kornfield <em...@gmail.com>
---
 .../algorithm/search/VectorRangeSearcher.java      | 58 ++++++----------------
 .../algorithm/search/TestVectorRangeSearcher.java  | 26 ++++++++--
 2 files changed, 36 insertions(+), 48 deletions(-)

diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorRangeSearcher.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorRangeSearcher.java
index e7ce95b..2491948 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorRangeSearcher.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorRangeSearcher.java
@@ -44,6 +44,8 @@ public class VectorRangeSearcher {
           V targetVector, VectorValueComparator<V> comparator, V keyVector, int keyIndex) {
     comparator.attachVectors(keyVector, targetVector);
 
+    int ret = SEARCH_FAIL_RESULT;
+
     int low = 0;
     int high = targetVector.getValueCount() - 1;
 
@@ -57,30 +59,13 @@ public class VectorRangeSearcher {
         // the key is larger
         low = mid + 1;
       } else {
-        // the key equals the mid value, find the lower bound by going left-ward.
-
-        // compare with the left neighbour
-        int left = mid - 1;
-        if (left == -1) {
-          // this is the first value in the vector
-          return mid;
-        } else {
-          int leftResult = comparator.compare(keyIndex, left);
-          if (leftResult > 0) {
-            // the key is greater than the left neighbour, and equal to the current one
-            // we find it
-            return mid;
-          } else if (leftResult == 0) {
-            // the left neighbour is also equal, continue to go left
-            high = mid - 1;
-          } else {
-            // the key is larger than the left neighbour, this is not possible
-            throw new IllegalStateException("The target vector is not sorted ");
-          }
-        }
+        // an equal element is found
+        // continue to go left-ward
+        ret = mid;
+        high = mid - 1;
       }
     }
-    return SEARCH_FAIL_RESULT;
+    return ret;
   }
 
   /**
@@ -97,6 +82,8 @@ public class VectorRangeSearcher {
           V targetVector, VectorValueComparator<V> comparator, V keyVector, int keyIndex) {
     comparator.attachVectors(keyVector, targetVector);
 
+    int ret = SEARCH_FAIL_RESULT;
+
     int low = 0;
     int high = targetVector.getValueCount() - 1;
 
@@ -110,29 +97,12 @@ public class VectorRangeSearcher {
         // the key is larger
         low = mid + 1;
       } else {
-        // the key equals the mid value, find the upper bound by going right-ward.
-
-        // compare with the right neighbour
-        int right = mid + 1;
-        if (right == targetVector.getValueCount()) {
-          // this is the last value in the vector
-          return mid;
-        } else {
-          int rightResult = comparator.compare(keyIndex, right);
-          if (rightResult < 0) {
-            // the key is smaller than the right neighbour, and equal to the current one
-            // we find it
-            return mid;
-          } else if (rightResult == 0) {
-            // the right neighbour is also equal, continue to go right
-            low = mid + 1;
-          } else {
-            // the key is smaller than the right neighbour, this is not possible
-            throw new IllegalStateException("The target vector is not sorted ");
-          }
-        }
+        // an equal element is found,
+        // continue to go right-ward
+        ret = mid;
+        low = mid + 1;
       }
     }
-    return SEARCH_FAIL_RESULT;
+    return ret;
   }
 }
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorRangeSearcher.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorRangeSearcher.java
index 88f6e23..e8f8056 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorRangeSearcher.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorRangeSearcher.java
@@ -19,6 +19,9 @@ package org.apache.arrow.algorithm.search;
 
 import static org.junit.Assert.assertEquals;
 
+import java.util.Arrays;
+import java.util.Collection;
+
 import org.apache.arrow.algorithm.sort.DefaultVectorComparators;
 import org.apache.arrow.algorithm.sort.VectorValueComparator;
 import org.apache.arrow.memory.BufferAllocator;
@@ -28,14 +31,23 @@ import org.apache.arrow.vector.IntVector;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
 
 /**
  * Test cases for {@link VectorRangeSearcher}.
  */
+@RunWith(Parameterized.class)
 public class TestVectorRangeSearcher {
 
   private BufferAllocator allocator;
 
+  private int repeat;
+
+  public TestVectorRangeSearcher(int repeat) {
+    this.repeat = repeat;
+  }
+
   @Before
   public void prepare() {
     allocator = new RootAllocator(1024 * 1024);
@@ -49,7 +61,6 @@ public class TestVectorRangeSearcher {
   @Test
   public void testGetLowerBounds() {
     final int maxValue = 100;
-    final int repeat = 5;
     try (IntVector intVector = new IntVector("int vec", allocator)) {
       // allocate vector
       intVector.allocateNew(maxValue * repeat);
@@ -79,7 +90,6 @@ public class TestVectorRangeSearcher {
   @Test
   public void testGetLowerBoundsNegative() {
     final int maxValue = 100;
-    final int repeat = 5;
     try (IntVector intVector = new IntVector("int vec", allocator);
     IntVector negVector = new IntVector("neg vec", allocator)) {
       // allocate vector
@@ -114,7 +124,6 @@ public class TestVectorRangeSearcher {
   @Test
   public void testGetUpperBounds() {
     final int maxValue = 100;
-    final int repeat = 5;
     try (IntVector intVector = new IntVector("int vec", allocator)) {
       // allocate vector
       intVector.allocateNew(maxValue * repeat);
@@ -144,7 +153,6 @@ public class TestVectorRangeSearcher {
   @Test
   public void testGetUpperBoundsNegative() {
     final int maxValue = 100;
-    final int repeat = 5;
     try (IntVector intVector = new IntVector("int vec", allocator);
          IntVector negVector = new IntVector("neg vec", allocator)) {
       // allocate vector
@@ -175,4 +183,14 @@ public class TestVectorRangeSearcher {
       }
     }
   }
+
+  @Parameterized.Parameters(name = "repeat = {0}")
+  public static Collection<Object[]> getRepeat() {
+    return Arrays.asList(
+      new Object[]{1},
+      new Object[]{2},
+      new Object[]{5},
+      new Object[]{10}
+    );
+  }
 }