You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jr...@apache.org on 2017/09/12 07:16:13 UTC
[2/4] incubator-impala git commit: IMPALA-3897 Codegen null-aware
constant in PHJ::ProcessBuildBatch()
IMPALA-3897 Codegen null-aware constant in PHJ::ProcessBuildBatch()
This change codegen outs a branch in ProcessBuildBatch(). This branch never gets
executed for most of the join types except NULL_AWARE_LEFT_ANTI_JOIN.
The branch itself is not expensive to execute, but it will reduce codegen
time by removing the dead code inside the branch for almost all join
modes.
Change-Id: I06acbebc9d2d23bef4734b480a5d3ce41680ea70
Reviewed-on: http://gerrit.cloudera.org:8080/7849
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins
Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/4e842620
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/4e842620
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/4e842620
Branch: refs/heads/master
Commit: 4e84262074cab9cc8ad83a5a9d5bc42b4be42d76
Parents: e1ae988
Author: aphadke <ap...@cloudera.com>
Authored: Fri Aug 18 17:02:02 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Tue Sep 12 03:46:21 2017 +0000
----------------------------------------------------------------------
be/src/codegen/gen_ir_descriptions.py | 2 +-
be/src/exec/partitioned-hash-join-builder-ir.cc | 5 ++---
be/src/exec/partitioned-hash-join-builder.cc | 15 ++++++++++++---
be/src/exec/partitioned-hash-join-builder.h | 8 +++++---
4 files changed, 20 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4e842620/be/src/codegen/gen_ir_descriptions.py
----------------------------------------------------------------------
diff --git a/be/src/codegen/gen_ir_descriptions.py b/be/src/codegen/gen_ir_descriptions.py
index 668ae24..75d233c 100755
--- a/be/src/codegen/gen_ir_descriptions.py
+++ b/be/src/codegen/gen_ir_descriptions.py
@@ -99,7 +99,7 @@ ir_functions = [
["HASH_FNV", "IrFnvHash"],
["HASH_MURMUR", "IrMurmurHash"],
["PHJ_PROCESS_BUILD_BATCH",
- "_ZN6impala10PhjBuilder17ProcessBuildBatchEPNS_8RowBatchEPNS_12HashTableCtxEb"],
+ "_ZN6impala10PhjBuilder17ProcessBuildBatchEPNS_8RowBatchEPNS_12HashTableCtxEbb"],
["PHJ_PROCESS_PROBE_BATCH_INNER_JOIN",
"_ZN6impala23PartitionedHashJoinNode17ProcessProbeBatchILi0EEEiNS_13TPrefetchMode4typeEPNS_8RowBatchEPNS_12HashTableCtxEPNS_6StatusE"],
["PHJ_PROCESS_PROBE_BATCH_LEFT_OUTER_JOIN",
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4e842620/be/src/exec/partitioned-hash-join-builder-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/partitioned-hash-join-builder-ir.cc b/be/src/exec/partitioned-hash-join-builder-ir.cc
index e15e116..b9c2cc3 100644
--- a/be/src/exec/partitioned-hash-join-builder-ir.cc
+++ b/be/src/exec/partitioned-hash-join-builder-ir.cc
@@ -37,15 +37,14 @@ inline bool PhjBuilder::AppendRow(
}
Status PhjBuilder::ProcessBuildBatch(
- RowBatch* build_batch, HashTableCtx* ctx, bool build_filters) {
+ RowBatch* build_batch, HashTableCtx* ctx, bool build_filters, bool is_null_aware) {
Status status;
HashTableCtx::ExprValuesCache* expr_vals_cache = ctx->expr_values_cache();
expr_vals_cache->Reset();
FOREACH_ROW(build_batch, 0, build_batch_iter) {
TupleRow* build_row = build_batch_iter.Get();
if (!ctx->EvalAndHashBuild(build_row)) {
- if (null_aware_partition_ != NULL) {
- // TODO: remove with codegen/template
+ if (is_null_aware) {
// If we are NULL aware and this build row has NULL in the eq join slot,
// append it to the null_aware partition. We will need it later.
if (UNLIKELY(
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4e842620/be/src/exec/partitioned-hash-join-builder.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/partitioned-hash-join-builder.cc b/be/src/exec/partitioned-hash-join-builder.cc
index 0c04541..d292c59 100644
--- a/be/src/exec/partitioned-hash-join-builder.cc
+++ b/be/src/exec/partitioned-hash-join-builder.cc
@@ -191,14 +191,18 @@ Status PhjBuilder::Send(RuntimeState* state, RowBatch* batch) {
SCOPED_TIMER(partition_build_rows_timer_);
bool build_filters = ht_ctx_->level() == 0 && filter_ctxs_.size() > 0;
if (process_build_batch_fn_ == NULL) {
- RETURN_IF_ERROR(ProcessBuildBatch(batch, ht_ctx_.get(), build_filters));
+ RETURN_IF_ERROR(ProcessBuildBatch(batch, ht_ctx_.get(), build_filters,
+ join_op_ == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN));
+
} else {
DCHECK(process_build_batch_fn_level0_ != NULL);
if (ht_ctx_->level() == 0) {
RETURN_IF_ERROR(
- process_build_batch_fn_level0_(this, batch, ht_ctx_.get(), build_filters));
+ process_build_batch_fn_level0_(this, batch, ht_ctx_.get(), build_filters,
+ join_op_ == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN));
} else {
- RETURN_IF_ERROR(process_build_batch_fn_(this, batch, ht_ctx_.get(), build_filters));
+ RETURN_IF_ERROR(process_build_batch_fn_(this, batch, ht_ctx_.get(), build_filters,
+ join_op_ == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN));
}
}
@@ -807,6 +811,11 @@ Status PhjBuilder::CodegenProcessBuildBatch(LlvmCodeGen* codegen,
DCHECK_EQ(replaced_constants.stores_tuples, 0);
DCHECK_EQ(replaced_constants.quadratic_probing, 0);
+ Value* is_null_aware_arg = codegen->GetArgument(process_build_batch_fn, 5);
+ is_null_aware_arg->replaceAllUsesWith(
+ ConstantInt::get(Type::getInt1Ty(codegen->context()),
+ join_op_ == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN));
+
Function* process_build_batch_fn_level0 =
codegen->CloneFunction(process_build_batch_fn);
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4e842620/be/src/exec/partitioned-hash-join-builder.h
----------------------------------------------------------------------
diff --git a/be/src/exec/partitioned-hash-join-builder.h b/be/src/exec/partitioned-hash-join-builder.h
index eba9e9f..2c52988 100644
--- a/be/src/exec/partitioned-hash-join-builder.h
+++ b/be/src/exec/partitioned-hash-join-builder.h
@@ -290,9 +290,11 @@ class PhjBuilder : public DataSink {
Status CreateAndPreparePartition(int level, Partition** partition) WARN_UNUSED_RESULT;
/// Reads the rows in build_batch and partitions them into hash_partitions_. If
- /// 'build_filters' is true, runtime filters are populated.
+ /// 'build_filters' is true, runtime filters are populated. 'is_null_aware' is
+ /// set to true if the join type is a null aware join.
Status ProcessBuildBatch(
- RowBatch* build_batch, HashTableCtx* ctx, bool build_filters) WARN_UNUSED_RESULT;
+ RowBatch* build_batch, HashTableCtx* ctx, bool build_filters,
+ bool is_null_aware) WARN_UNUSED_RESULT;
/// Append 'row' to 'stream'. In the common case, appending the row to the stream
/// immediately succeeds. Otherwise this function falls back to the slower path of
@@ -491,7 +493,7 @@ class PhjBuilder : public DataSink {
/// and is used when the partition level is 0, otherwise xxx_fn_ uses murmur hash and is
/// used for subsequent levels.
typedef Status (*ProcessBuildBatchFn)(
- PhjBuilder*, RowBatch*, HashTableCtx*, bool build_filters);
+ PhjBuilder*, RowBatch*, HashTableCtx*, bool build_filters, bool is_null_aware);
/// Jitted ProcessBuildBatch function pointers. NULL if codegen is disabled.
ProcessBuildBatchFn process_build_batch_fn_;
ProcessBuildBatchFn process_build_batch_fn_level0_;