You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by bo...@apache.org on 2019/01/09 04:11:59 UTC

[drill] 01/03: DRILL-6880: For Hash-Join hash-table build - treat null keys as an equal match

This is an automated email from the ASF dual-hosted git repository.

boaz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git

commit 4bc562361b5d178a15f42b46b91bd635805b1d88
Author: Ben-Zvi <bb...@mapr.com>
AuthorDate: Fri Jan 4 20:14:02 2019 -0800

    DRILL-6880: For Hash-Join hash-table build - treat null keys as an equal match
---
 .../physical/impl/common/ChainedHashTable.java     | 49 +++++++++++++++-------
 .../physical/impl/common/HashTableTemplate.java    | 13 ++++++
 2 files changed, 46 insertions(+), 16 deletions(-)

diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/ChainedHashTable.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/ChainedHashTable.java
index dcdac95..9171ad1 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/ChainedHashTable.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/ChainedHashTable.java
@@ -63,6 +63,10 @@ public class ChainedHashTable {
       GeneratorMapping.create("setupInterior" /* setup method */, "isKeyMatchInternalBuild" /* eval method */,
           null /* reset */, null /* cleanup */);
 
+  private static final GeneratorMapping BOTH_KEYS_NULL =
+    GeneratorMapping.create("setupInterior" /* setup method */, "areBothKeysNull" /* eval method */,
+      null /* reset */, null /* cleanup */);
+
   private static final GeneratorMapping KEY_MATCH_PROBE =
       GeneratorMapping.create("setupInterior" /* setup method */, "isKeyMatchInternalProbe" /* eval method */,
           null /* reset */, null /* cleanup */);
@@ -95,10 +99,14 @@ public class ChainedHashTable {
 
   private final MappingSet KeyMatchIncomingBuildMapping =
       new MappingSet("incomingRowIdx", null, "incomingBuild", null, SETUP_INTERIOR_CONSTANT, KEY_MATCH_BUILD);
+  private final MappingSet bothKeysNullIncomingBuildMapping =
+    new MappingSet("incomingRowIdx", null, "incomingBuild", null, SETUP_INTERIOR_CONSTANT, BOTH_KEYS_NULL);
   private final MappingSet KeyMatchIncomingProbeMapping =
       new MappingSet("incomingRowIdx", null, "incomingProbe", null, SETUP_INTERIOR_CONSTANT, KEY_MATCH_PROBE);
   private final MappingSet KeyMatchHtableMapping =
       new MappingSet("htRowIdx", null, "htContainer", null, SETUP_INTERIOR_CONSTANT, KEY_MATCH_BUILD);
+  private final MappingSet bothKeysNullHtableMapping =
+    new MappingSet("htRowIdx", null, "htContainer", null, SETUP_INTERIOR_CONSTANT, BOTH_KEYS_NULL);
   private final MappingSet KeyMatchHtableProbeMapping =
       new MappingSet("htRowIdx", null, "htContainer", null, SETUP_INTERIOR_CONSTANT, KEY_MATCH_PROBE);
   private final MappingSet GetHashIncomingBuildMapping =
@@ -208,7 +216,10 @@ public class ChainedHashTable {
       i++;
     }
 
-
+    // Only in case of an equi-join: Generate a special method to check if both the new key and the existing key (in this HT bucket) are nulls
+    // (used by Hash-Join to avoid creating a long hash-table chain of null keys, which can lead to O(n^2) work on that chain.)
+    setupIsKeyMatchInternal(cgInner, bothKeysNullIncomingBuildMapping, bothKeysNullHtableMapping, keyExprsBuild,
+        htConfig.getComparators(), htKeyFieldIds);
     // generate code for isKeyMatch(), setValue(), getHash() and outputRecordKeys()
     setupIsKeyMatchInternal(cgInner, KeyMatchIncomingBuildMapping, KeyMatchHtableMapping, keyExprsBuild,
         htConfig.getComparators(), htKeyFieldIds);
@@ -235,9 +246,14 @@ public class ChainedHashTable {
 
   private void setupIsKeyMatchInternal(ClassGenerator<HashTable> cg, MappingSet incomingMapping, MappingSet htableMapping,
       LogicalExpression[] keyExprs, List<Comparator> comparators, TypedFieldId[] htKeyFieldIds) {
+
+    boolean isProbe = incomingMapping == KeyMatchIncomingProbeMapping;
+    boolean areBothNulls = incomingMapping == bothKeysNullIncomingBuildMapping;
+
     cg.setMappingSet(incomingMapping);
 
-    if (keyExprs == null || keyExprs.length == 0) {
+    if (keyExprs == null || keyExprs.length == 0 ||
+        areBothNulls && ! comparators.contains(Comparator.EQUALS)) { // e.g. for Hash-Aggr, or non-equi join
       cg.getEvalBlock()._return(JExpr.FALSE);
       return;
     }
@@ -253,28 +269,29 @@ public class ChainedHashTable {
 
       JConditional jc;
 
-      // codegen for nullable columns if nulls are not equal
-      if (comparators.get(i) == Comparator.EQUALS
-          && left.isOptional() && right.isOptional()) {
-        jc = cg.getEvalBlock()._if(left.getIsSet().eq(JExpr.lit(0)).
+      if ( areBothNulls || isProbe ) {
+        // codegen for the special case when both columns are null (i.e., return early - as equal or not)
+        if (comparators.get(i) == Comparator.EQUALS
+            && left.isOptional() && right.isOptional()) {
+          jc = cg.getEvalBlock()._if(left.getIsSet().eq(JExpr.lit(0)).
             cand(right.getIsSet().eq(JExpr.lit(0))));
-        jc._then()._return(JExpr.FALSE);
+          jc._then()._return( areBothNulls ? JExpr.TRUE : JExpr.FALSE);
+        }
       }
+      if ( ! areBothNulls ) {
+        final LogicalExpression f = FunctionGenerationHelper.getOrderingComparatorNullsHigh(left, right, context.getFunctionRegistry());
 
-      final LogicalExpression f =
-          FunctionGenerationHelper
-          .getOrderingComparatorNullsHigh(left, right, context.getFunctionRegistry());
-
-      HoldingContainer out = cg.addExpr(f, ClassGenerator.BlkCreateMode.FALSE);
+        HoldingContainer out = cg.addExpr(f, ClassGenerator.BlkCreateMode.FALSE);
 
-      // check if two values are not equal (comparator result != 0)
-      jc = cg.getEvalBlock()._if(out.getValue().ne(JExpr.lit(0)));
+        // check if two values are not equal (comparator result != 0)
+        jc = cg.getEvalBlock()._if(out.getValue().ne(JExpr.lit(0)));
 
-      jc._then()._return(JExpr.FALSE);
+        jc._then()._return(JExpr.FALSE);
+      }
     }
 
     // All key expressions compared equal, so return TRUE
-    cg.getEvalBlock()._return(JExpr.TRUE);
+    cg.getEvalBlock()._return( areBothNulls ? JExpr.FALSE : JExpr.TRUE);
   }
 
   private void setupSetValue(ClassGenerator<HashTable> cg, LogicalExpression[] keyExprs,
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/HashTableTemplate.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/HashTableTemplate.java
index 25ada28..e123896 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/HashTableTemplate.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/HashTableTemplate.java
@@ -224,6 +224,13 @@ public abstract class HashTableTemplate implements HashTable {
       if (isProbe) {
         match = isKeyMatchInternalProbe(incomingRowIdx, currentIdxWithinBatch);
       } else {
+        // in case (of a hash-join only) where both the new incoming key and the current are null, treat them as
+        // a match; i.e. the new would be added into the helper (but not the Hash-Table !), though it would never
+        // be used (not putting it into the helper would take a bigger code change, and some performance cost, hence
+        // not worth it).  In the past such a new null key was added into the Hash-Table (i.e., no match), which
+        // created long costly chains - SEE DRILL-6880)
+        if ( areBothKeysNull(incomingRowIdx, currentIdxWithinBatch) ) { return true; }
+
         match = isKeyMatchInternalBuild(incomingRowIdx, currentIdxWithinBatch);
       }
 
@@ -424,6 +431,12 @@ public abstract class HashTableTemplate implements HashTable {
     }
 
     @RuntimeOverridden
+    protected boolean areBothKeysNull(
+      @Named("incomingRowIdx") int incomingRowIdx, @Named("htRowIdx") int htRowIdx) throws SchemaChangeException {
+      return false;
+    }
+
+    @RuntimeOverridden
     protected boolean isKeyMatchInternalProbe(
         @Named("incomingRowIdx") int incomingRowIdx, @Named("htRowIdx") int htRowIdx) throws SchemaChangeException {
       return false;