You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jr...@apache.org on 2017/09/12 07:16:13 UTC

[2/4] incubator-impala git commit: IMPALA-3897 Codegen null-aware constant in PHJ::ProcessBuildBatch()

IMPALA-3897 Codegen null-aware constant in PHJ::ProcessBuildBatch()

This change codegen outs a branch in ProcessBuildBatch(). This branch never gets
executed for most of the join types except NULL_AWARE_LEFT_ANTI_JOIN.
The branch itself is not expensive to execute, but it will reduce codegen
time by removing the dead code inside the branch for almost all join
modes.

Change-Id: I06acbebc9d2d23bef4734b480a5d3ce41680ea70
Reviewed-on: http://gerrit.cloudera.org:8080/7849
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/4e842620
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/4e842620
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/4e842620

Branch: refs/heads/master
Commit: 4e84262074cab9cc8ad83a5a9d5bc42b4be42d76
Parents: e1ae988
Author: aphadke <ap...@cloudera.com>
Authored: Fri Aug 18 17:02:02 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Tue Sep 12 03:46:21 2017 +0000

----------------------------------------------------------------------
 be/src/codegen/gen_ir_descriptions.py           |  2 +-
 be/src/exec/partitioned-hash-join-builder-ir.cc |  5 ++---
 be/src/exec/partitioned-hash-join-builder.cc    | 15 ++++++++++++---
 be/src/exec/partitioned-hash-join-builder.h     |  8 +++++---
 4 files changed, 20 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4e842620/be/src/codegen/gen_ir_descriptions.py
----------------------------------------------------------------------
diff --git a/be/src/codegen/gen_ir_descriptions.py b/be/src/codegen/gen_ir_descriptions.py
index 668ae24..75d233c 100755
--- a/be/src/codegen/gen_ir_descriptions.py
+++ b/be/src/codegen/gen_ir_descriptions.py
@@ -99,7 +99,7 @@ ir_functions = [
   ["HASH_FNV", "IrFnvHash"],
   ["HASH_MURMUR", "IrMurmurHash"],
   ["PHJ_PROCESS_BUILD_BATCH",
-   "_ZN6impala10PhjBuilder17ProcessBuildBatchEPNS_8RowBatchEPNS_12HashTableCtxEb"],
+   "_ZN6impala10PhjBuilder17ProcessBuildBatchEPNS_8RowBatchEPNS_12HashTableCtxEbb"],
   ["PHJ_PROCESS_PROBE_BATCH_INNER_JOIN",
    "_ZN6impala23PartitionedHashJoinNode17ProcessProbeBatchILi0EEEiNS_13TPrefetchMode4typeEPNS_8RowBatchEPNS_12HashTableCtxEPNS_6StatusE"],
   ["PHJ_PROCESS_PROBE_BATCH_LEFT_OUTER_JOIN",

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4e842620/be/src/exec/partitioned-hash-join-builder-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/partitioned-hash-join-builder-ir.cc b/be/src/exec/partitioned-hash-join-builder-ir.cc
index e15e116..b9c2cc3 100644
--- a/be/src/exec/partitioned-hash-join-builder-ir.cc
+++ b/be/src/exec/partitioned-hash-join-builder-ir.cc
@@ -37,15 +37,14 @@ inline bool PhjBuilder::AppendRow(
 }
 
 Status PhjBuilder::ProcessBuildBatch(
-    RowBatch* build_batch, HashTableCtx* ctx, bool build_filters) {
+    RowBatch* build_batch, HashTableCtx* ctx, bool build_filters, bool is_null_aware) {
   Status status;
   HashTableCtx::ExprValuesCache* expr_vals_cache = ctx->expr_values_cache();
   expr_vals_cache->Reset();
   FOREACH_ROW(build_batch, 0, build_batch_iter) {
     TupleRow* build_row = build_batch_iter.Get();
     if (!ctx->EvalAndHashBuild(build_row)) {
-      if (null_aware_partition_ != NULL) {
-        // TODO: remove with codegen/template
+      if (is_null_aware) {
         // If we are NULL aware and this build row has NULL in the eq join slot,
         // append it to the null_aware partition. We will need it later.
         if (UNLIKELY(

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4e842620/be/src/exec/partitioned-hash-join-builder.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/partitioned-hash-join-builder.cc b/be/src/exec/partitioned-hash-join-builder.cc
index 0c04541..d292c59 100644
--- a/be/src/exec/partitioned-hash-join-builder.cc
+++ b/be/src/exec/partitioned-hash-join-builder.cc
@@ -191,14 +191,18 @@ Status PhjBuilder::Send(RuntimeState* state, RowBatch* batch) {
   SCOPED_TIMER(partition_build_rows_timer_);
   bool build_filters = ht_ctx_->level() == 0 && filter_ctxs_.size() > 0;
   if (process_build_batch_fn_ == NULL) {
-    RETURN_IF_ERROR(ProcessBuildBatch(batch, ht_ctx_.get(), build_filters));
+      RETURN_IF_ERROR(ProcessBuildBatch(batch, ht_ctx_.get(), build_filters,
+          join_op_ == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN));
+
   } else {
     DCHECK(process_build_batch_fn_level0_ != NULL);
     if (ht_ctx_->level() == 0) {
       RETURN_IF_ERROR(
-          process_build_batch_fn_level0_(this, batch, ht_ctx_.get(), build_filters));
+          process_build_batch_fn_level0_(this, batch, ht_ctx_.get(), build_filters,
+              join_op_ == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN));
     } else {
-      RETURN_IF_ERROR(process_build_batch_fn_(this, batch, ht_ctx_.get(), build_filters));
+      RETURN_IF_ERROR(process_build_batch_fn_(this, batch, ht_ctx_.get(), build_filters,
+          join_op_ == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN));
     }
   }
 
@@ -807,6 +811,11 @@ Status PhjBuilder::CodegenProcessBuildBatch(LlvmCodeGen* codegen,
   DCHECK_EQ(replaced_constants.stores_tuples, 0);
   DCHECK_EQ(replaced_constants.quadratic_probing, 0);
 
+  Value* is_null_aware_arg = codegen->GetArgument(process_build_batch_fn, 5);
+  is_null_aware_arg->replaceAllUsesWith(
+      ConstantInt::get(Type::getInt1Ty(codegen->context()),
+      join_op_ == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN));
+
   Function* process_build_batch_fn_level0 =
       codegen->CloneFunction(process_build_batch_fn);
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4e842620/be/src/exec/partitioned-hash-join-builder.h
----------------------------------------------------------------------
diff --git a/be/src/exec/partitioned-hash-join-builder.h b/be/src/exec/partitioned-hash-join-builder.h
index eba9e9f..2c52988 100644
--- a/be/src/exec/partitioned-hash-join-builder.h
+++ b/be/src/exec/partitioned-hash-join-builder.h
@@ -290,9 +290,11 @@ class PhjBuilder : public DataSink {
   Status CreateAndPreparePartition(int level, Partition** partition) WARN_UNUSED_RESULT;
 
   /// Reads the rows in build_batch and partitions them into hash_partitions_. If
-  /// 'build_filters' is true, runtime filters are populated.
+  /// 'build_filters' is true, runtime filters are populated. 'is_null_aware' is
+  /// set to true if the join type is a null aware join.
   Status ProcessBuildBatch(
-      RowBatch* build_batch, HashTableCtx* ctx, bool build_filters) WARN_UNUSED_RESULT;
+      RowBatch* build_batch, HashTableCtx* ctx, bool build_filters,
+      bool is_null_aware) WARN_UNUSED_RESULT;
 
   /// Append 'row' to 'stream'. In the common case, appending the row to the stream
   /// immediately succeeds. Otherwise this function falls back to the slower path of
@@ -491,7 +493,7 @@ class PhjBuilder : public DataSink {
   /// and is used when the partition level is 0, otherwise xxx_fn_ uses murmur hash and is
   /// used for subsequent levels.
   typedef Status (*ProcessBuildBatchFn)(
-      PhjBuilder*, RowBatch*, HashTableCtx*, bool build_filters);
+      PhjBuilder*, RowBatch*, HashTableCtx*, bool build_filters, bool is_null_aware);
   /// Jitted ProcessBuildBatch function pointers.  NULL if codegen is disabled.
   ProcessBuildBatchFn process_build_batch_fn_;
   ProcessBuildBatchFn process_build_batch_fn_level0_;