You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2020/03/12 02:19:21 UTC

[hive] branch master updated: HIVE-22975 : Optimise TopNKeyFilter with boundary checks (Rajesh Balamohan via Ashutosh Chauhan)

This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 812a626  HIVE-22975 : Optimise TopNKeyFilter with boundary checks (Rajesh Balamohan via Ashutosh Chauhan)
812a626 is described below

commit 812a6269a3abfba6d278d2dfa86f6f7137719d69
Author: Rajesh Balamohan <rb...@apache.org>
AuthorDate: Wed Mar 11 19:18:46 2020 -0700

    HIVE-22975 : Optimise TopNKeyFilter with boundary checks (Rajesh Balamohan via Ashutosh Chauhan)
    
    Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
---
 .../apache/hadoop/hive/ql/exec/TopNKeyFilter.java  | 54 ++++++++++++++++++++++
 .../hadoop/hive/ql/exec/TestTopNKeyFilter.java     | 44 ++++++++++++++++++
 2 files changed, 98 insertions(+)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyFilter.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyFilter.java
index 361dbc3..2f296c0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyFilter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyFilter.java
@@ -17,10 +17,14 @@
  */
 package org.apache.hadoop.hive.ql.exec;
 
+import com.google.common.annotations.VisibleForTesting;
+
 import static java.util.Arrays.binarySearch;
 
 import java.util.Arrays;
 import java.util.Comparator;
+import java.util.HashSet;
+import java.util.Set;
 
 /**
  * Implementation of filtering out keys.
@@ -36,14 +40,43 @@ public final class TopNKeyFilter {
   private long added = 0;
   private long total = 0;
 
+  //to track effectiveness of boundary check
+  private long eff = 0;
+  private final Set<KeyWrapper> topNKeySet;
+
   public TopNKeyFilter(int topN, Comparator<? extends KeyWrapper> comparator) {
     this.comparator = comparator;
     this.sortedTopItems = new KeyWrapper[topN +1];
     this.topN = topN;
+    this.topNKeySet = new HashSet<>();
+  }
+
+  private int compareWithBoundary(KeyWrapper kw) {
+    return ((Comparator<? super KeyWrapper>) comparator).compare(kw, sortedTopItems[topN - 1]);
   }
 
   public final boolean canForward(KeyWrapper kw) {
     total++;
+    if (topN > 0 && (size == topN)) {
+      int comp = compareWithBoundary(kw);
+      if (comp == 0) {
+        // special case, if last element is same as kw;
+        // Avoids duplicate comparison later
+        eff++;
+        return true;
+      }
+
+      if (comp > 0) {
+        eff++;
+        return false;
+      }
+    }
+
+    if (topNKeySet.contains(kw)) {
+      repeated++;
+      return true;
+    }
+
     int pos = binarySearch(sortedTopItems, 0, size, kw, (Comparator<? super KeyWrapper>) comparator);
     if (pos >= 0) { // found
       repeated++;
@@ -53,8 +86,13 @@ public final class TopNKeyFilter {
     if (pos >= topN) { // would be inserted to the end, there are topN elements which are smaller/larger
       return false;
     }
+    KeyWrapper oldElement = sortedTopItems[pos];
     System.arraycopy(sortedTopItems, pos, sortedTopItems, pos +1, size - pos); // make space by shifting
     sortedTopItems[pos] = kw.copyKey();
+
+    topNKeySet.remove(oldElement);
+    topNKeySet.add(sortedTopItems[pos]);
+
     added++;
     if (size < topN) {
       size++;
@@ -78,6 +116,7 @@ public final class TopNKeyFilter {
     sb.append(", repeated=").append(repeated);
     sb.append(", added=").append(added);
     sb.append(", total=").append(total);
+    sb.append(", eff=").append(eff);
     sb.append(", forwardingRatio=").append(forwardingRatio());
     sb.append('}');
     return sb.toString();
@@ -99,4 +138,19 @@ public final class TopNKeyFilter {
   public long getTotal() {
     return total;
   }
+
+  @VisibleForTesting
+  long getEffectiveBoundaryChecks() {
+    return eff;
+  }
+
+  @VisibleForTesting
+  long getRepeated() {
+    return repeated;
+  }
+
+  @VisibleForTesting
+  long getKeySetSize() {
+    return topNKeySet.size();
+  }
 }
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestTopNKeyFilter.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestTopNKeyFilter.java
index 0ee5c8d..222e29e 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestTopNKeyFilter.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestTopNKeyFilter.java
@@ -21,6 +21,7 @@ import static org.apache.hadoop.hive.ql.exec.vector.VectorTopNKeyOperator.checkT
 import static org.hamcrest.Matchers.hasItem;
 import static org.hamcrest.Matchers.hasSize;
 import static org.hamcrest.Matchers.is;
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
 
 import java.util.Comparator;
@@ -48,6 +49,10 @@ public class TestTopNKeyFilter {
     TopNKeyFilter topNKeyFilter = new TopNKeyFilter(0, TEST_KEY_WRAPPER_COMPARATOR);
     assertThat(topNKeyFilter.canForward(new TestKeyWrapper(1)), is(false));
     assertThat(topNKeyFilter.canForward(new TestKeyWrapper(-1)), is(false));
+
+    assertEquals(0, topNKeyFilter.getEffectiveBoundaryChecks());
+    assertEquals(0, topNKeyFilter.getRepeated());
+    assertEquals(0, topNKeyFilter.getKeySetSize());
   }
 
   @Test
@@ -55,8 +60,47 @@ public class TestTopNKeyFilter {
     TopNKeyFilter topNKeyFilter = new TopNKeyFilter(3, TEST_KEY_WRAPPER_COMPARATOR);
     assertThat(topNKeyFilter.canForward(new TestKeyWrapper(1)), is(true));
     assertThat(topNKeyFilter.canForward(new TestKeyWrapper(5)), is(true));
+
+    assertEquals(0, topNKeyFilter.getEffectiveBoundaryChecks());
+    assertEquals(0, topNKeyFilter.getRepeated());
+
     assertThat(topNKeyFilter.canForward(new TestKeyWrapper(10)), is(true));
     assertThat(topNKeyFilter.canForward(new TestKeyWrapper(11)), is(false));
+    assertEquals(1, topNKeyFilter.getEffectiveBoundaryChecks());
+
+    assertEquals(3, topNKeyFilter.getKeySetSize());
+
+    //new key same as boundary element (in 1,5,10)
+    assertThat(topNKeyFilter.canForward(new TestKeyWrapper(10)), is(true));
+    assertEquals(2, topNKeyFilter.getEffectiveBoundaryChecks());
+  }
+
+  @Test
+  public void testFirstTopNKeysCanBeForwardedDesc() {
+    TopNKeyFilter topNKeyFilter = new TopNKeyFilter(3, TEST_KEY_WRAPPER_COMPARATOR.reversed());
+    assertThat(topNKeyFilter.canForward(new TestKeyWrapper(10)), is(true));
+    assertThat(topNKeyFilter.canForward(new TestKeyWrapper(5)), is(true));
+
+    assertEquals(0, topNKeyFilter.getEffectiveBoundaryChecks());
+    assertEquals(0, topNKeyFilter.getRepeated());
+
+    assertThat(topNKeyFilter.canForward(new TestKeyWrapper(1)), is(true));
+
+    assertThat(topNKeyFilter.canForward(new TestKeyWrapper(11)), is(true));
+    assertEquals(0, topNKeyFilter.getEffectiveBoundaryChecks());
+    assertEquals(0, topNKeyFilter.getRepeated());
+
+    assertThat(topNKeyFilter.canForward(new TestKeyWrapper(0)), is(false));
+    assertEquals(1, topNKeyFilter.getEffectiveBoundaryChecks());
+
+    assertThat(topNKeyFilter.canForward(new TestKeyWrapper(11)), is(true));
+    assertEquals(1, topNKeyFilter.getRepeated());
+
+    assertEquals(3, topNKeyFilter.getKeySetSize());
+
+    //new key same as boundary element (in 11, 10, 5)
+    assertThat(topNKeyFilter.canForward(new TestKeyWrapper(5)), is(true));
+    assertEquals(2, topNKeyFilter.getEffectiveBoundaryChecks());
   }
 
   @Test