You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2020/03/12 02:19:21 UTC
[hive] branch master updated: HIVE-22975 : Optimise TopNKeyFilter
with boundary checks (Rajesh Balamohan via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository.
hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 812a626 HIVE-22975 : Optimise TopNKeyFilter with boundary checks (Rajesh Balamohan via Ashutosh Chauhan)
812a626 is described below
commit 812a6269a3abfba6d278d2dfa86f6f7137719d69
Author: Rajesh Balamohan <rb...@apache.org>
AuthorDate: Wed Mar 11 19:18:46 2020 -0700
HIVE-22975 : Optimise TopNKeyFilter with boundary checks (Rajesh Balamohan via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
---
.../apache/hadoop/hive/ql/exec/TopNKeyFilter.java | 54 ++++++++++++++++++++++
.../hadoop/hive/ql/exec/TestTopNKeyFilter.java | 44 ++++++++++++++++++
2 files changed, 98 insertions(+)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyFilter.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyFilter.java
index 361dbc3..2f296c0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyFilter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyFilter.java
@@ -17,10 +17,14 @@
*/
package org.apache.hadoop.hive.ql.exec;
+import com.google.common.annotations.VisibleForTesting;
+
import static java.util.Arrays.binarySearch;
import java.util.Arrays;
import java.util.Comparator;
+import java.util.HashSet;
+import java.util.Set;
/**
* Implementation of filtering out keys.
@@ -36,14 +40,43 @@ public final class TopNKeyFilter {
private long added = 0;
private long total = 0;
+ //to track effectiveness of boundary check
+ private long eff = 0;
+ private final Set<KeyWrapper> topNKeySet;
+
public TopNKeyFilter(int topN, Comparator<? extends KeyWrapper> comparator) {
this.comparator = comparator;
this.sortedTopItems = new KeyWrapper[topN +1];
this.topN = topN;
+ this.topNKeySet = new HashSet<>();
+ }
+
+ private int compareWithBoundary(KeyWrapper kw) {
+ return ((Comparator<? super KeyWrapper>) comparator).compare(kw, sortedTopItems[topN - 1]);
}
public final boolean canForward(KeyWrapper kw) {
total++;
+ if (topN > 0 && (size == topN)) {
+ int comp = compareWithBoundary(kw);
+ if (comp == 0) {
+ // special case, if last element is same as kw;
+ // Avoids duplicate comparison later
+ eff++;
+ return true;
+ }
+
+ if (comp > 0) {
+ eff++;
+ return false;
+ }
+ }
+
+ if (topNKeySet.contains(kw)) {
+ repeated++;
+ return true;
+ }
+
int pos = binarySearch(sortedTopItems, 0, size, kw, (Comparator<? super KeyWrapper>) comparator);
if (pos >= 0) { // found
repeated++;
@@ -53,8 +86,13 @@ public final class TopNKeyFilter {
if (pos >= topN) { // would be inserted to the end, there are topN elements which are smaller/larger
return false;
}
+ KeyWrapper oldElement = sortedTopItems[pos];
System.arraycopy(sortedTopItems, pos, sortedTopItems, pos +1, size - pos); // make space by shifting
sortedTopItems[pos] = kw.copyKey();
+
+ topNKeySet.remove(oldElement);
+ topNKeySet.add(sortedTopItems[pos]);
+
added++;
if (size < topN) {
size++;
@@ -78,6 +116,7 @@ public final class TopNKeyFilter {
sb.append(", repeated=").append(repeated);
sb.append(", added=").append(added);
sb.append(", total=").append(total);
+ sb.append(", eff=").append(eff);
sb.append(", forwardingRatio=").append(forwardingRatio());
sb.append('}');
return sb.toString();
@@ -99,4 +138,19 @@ public final class TopNKeyFilter {
public long getTotal() {
return total;
}
+
+ @VisibleForTesting
+ long getEffectiveBoundaryChecks() {
+ return eff;
+ }
+
+ @VisibleForTesting
+ long getRepeated() {
+ return repeated;
+ }
+
+ @VisibleForTesting
+ long getKeySetSize() {
+ return topNKeySet.size();
+ }
}
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestTopNKeyFilter.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestTopNKeyFilter.java
index 0ee5c8d..222e29e 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestTopNKeyFilter.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestTopNKeyFilter.java
@@ -21,6 +21,7 @@ import static org.apache.hadoop.hive.ql.exec.vector.VectorTopNKeyOperator.checkT
import static org.hamcrest.Matchers.hasItem;
import static org.hamcrest.Matchers.hasSize;
import static org.hamcrest.Matchers.is;
+import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertThat;
import java.util.Comparator;
@@ -48,6 +49,10 @@ public class TestTopNKeyFilter {
TopNKeyFilter topNKeyFilter = new TopNKeyFilter(0, TEST_KEY_WRAPPER_COMPARATOR);
assertThat(topNKeyFilter.canForward(new TestKeyWrapper(1)), is(false));
assertThat(topNKeyFilter.canForward(new TestKeyWrapper(-1)), is(false));
+
+ assertEquals(0, topNKeyFilter.getEffectiveBoundaryChecks());
+ assertEquals(0, topNKeyFilter.getRepeated());
+ assertEquals(0, topNKeyFilter.getKeySetSize());
}
@Test
@@ -55,8 +60,47 @@ public class TestTopNKeyFilter {
TopNKeyFilter topNKeyFilter = new TopNKeyFilter(3, TEST_KEY_WRAPPER_COMPARATOR);
assertThat(topNKeyFilter.canForward(new TestKeyWrapper(1)), is(true));
assertThat(topNKeyFilter.canForward(new TestKeyWrapper(5)), is(true));
+
+ assertEquals(0, topNKeyFilter.getEffectiveBoundaryChecks());
+ assertEquals(0, topNKeyFilter.getRepeated());
+
assertThat(topNKeyFilter.canForward(new TestKeyWrapper(10)), is(true));
assertThat(topNKeyFilter.canForward(new TestKeyWrapper(11)), is(false));
+ assertEquals(1, topNKeyFilter.getEffectiveBoundaryChecks());
+
+ assertEquals(3, topNKeyFilter.getKeySetSize());
+
+ //new key same as boundary element (in 1,5,10)
+ assertThat(topNKeyFilter.canForward(new TestKeyWrapper(10)), is(true));
+ assertEquals(2, topNKeyFilter.getEffectiveBoundaryChecks());
+ }
+
+ @Test
+ public void testFirstTopNKeysCanBeForwardedDesc() {
+ TopNKeyFilter topNKeyFilter = new TopNKeyFilter(3, TEST_KEY_WRAPPER_COMPARATOR.reversed());
+ assertThat(topNKeyFilter.canForward(new TestKeyWrapper(10)), is(true));
+ assertThat(topNKeyFilter.canForward(new TestKeyWrapper(5)), is(true));
+
+ assertEquals(0, topNKeyFilter.getEffectiveBoundaryChecks());
+ assertEquals(0, topNKeyFilter.getRepeated());
+
+ assertThat(topNKeyFilter.canForward(new TestKeyWrapper(1)), is(true));
+
+ assertThat(topNKeyFilter.canForward(new TestKeyWrapper(11)), is(true));
+ assertEquals(0, topNKeyFilter.getEffectiveBoundaryChecks());
+ assertEquals(0, topNKeyFilter.getRepeated());
+
+ assertThat(topNKeyFilter.canForward(new TestKeyWrapper(0)), is(false));
+ assertEquals(1, topNKeyFilter.getEffectiveBoundaryChecks());
+
+ assertThat(topNKeyFilter.canForward(new TestKeyWrapper(11)), is(true));
+ assertEquals(1, topNKeyFilter.getRepeated());
+
+ assertEquals(3, topNKeyFilter.getKeySetSize());
+
+ //new key same as boundary element (in 11, 10, 5)
+ assertThat(topNKeyFilter.canForward(new TestKeyWrapper(5)), is(true));
+ assertEquals(2, topNKeyFilter.getEffectiveBoundaryChecks());
}
@Test