You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2020/08/04 23:16:24 UTC

[impala] 02/02: IMPALA-9984: Implement codegen for TupleIsNullPredicate

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 15afacd124b2f889d42e1fc2db9b03e9ba9a6121
Author: Daniel Becker <da...@cloudera.com>
AuthorDate: Tue Jul 21 11:12:19 2020 +0200

    IMPALA-9984: Implement codegen for TupleIsNullPredicate
    
    This commit implements proper codegen for TupleIsNullPredicate.
    
    Change-Id: I410aa7ec762ca16f455bd7da1dce763c1a7b156e
    Reviewed-on: http://gerrit.cloudera.org:8080/16227
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/codegen/gen_ir_descriptions.py      |   1 +
 be/src/codegen/impala-ir.cc                |   1 +
 be/src/exprs/CMakeLists.txt                |   1 +
 be/src/exprs/tuple-is-null-predicate-ir.cc |  24 ++++++
 be/src/exprs/tuple-is-null-predicate.cc    | 121 ++++++++++++++++++++++++++++-
 be/src/exprs/tuple-is-null-predicate.h     |   6 +-
 6 files changed, 152 insertions(+), 2 deletions(-)

diff --git a/be/src/codegen/gen_ir_descriptions.py b/be/src/codegen/gen_ir_descriptions.py
index fbbca0d..f085a9c 100755
--- a/be/src/codegen/gen_ir_descriptions.py
+++ b/be/src/codegen/gen_ir_descriptions.py
@@ -230,6 +230,7 @@ ir_functions = [
   ["DOUBLE_MIN_MAX_FILTER_INSERT", "_ZN6impala18DoubleMinMaxFilter6InsertEPKv"],
   ["STRING_MIN_MAX_FILTER_INSERT", "_ZN6impala18StringMinMaxFilter6InsertEPKv"],
   ["TIMESTAMP_MIN_MAX_FILTER_INSERT", "_ZN6impala21TimestampMinMaxFilter6InsertEPKv"],
+  ["TUPLE_ROW_GET_TUPLE_IS_NULL", "_ZN6impala19TupleRowTupleIsNullEPKNS_8TupleRowEi"],
   ["DATE_MIN_MAX_FILTER_INSERT", "_ZN6impala16DateMinMaxFilter6InsertEPKv"],
   ["DECIMAL_MIN_MAX_FILTER_INSERT4", "_ZN6impala19DecimalMinMaxFilter7Insert4EPKv"],
   ["DECIMAL_MIN_MAX_FILTER_INSERT8", "_ZN6impala19DecimalMinMaxFilter7Insert8EPKv"],
diff --git a/be/src/codegen/impala-ir.cc b/be/src/codegen/impala-ir.cc
index 2fdc82f..c5bc441 100644
--- a/be/src/codegen/impala-ir.cc
+++ b/be/src/codegen/impala-ir.cc
@@ -56,6 +56,7 @@
 #include "exprs/scalar-expr-ir.cc"
 #include "exprs/string-functions-ir.cc"
 #include "exprs/timestamp-functions-ir.cc"
+#include "exprs/tuple-is-null-predicate-ir.cc"
 #include "exprs/udf-builtins-ir.cc"
 #include "exprs/utility-functions-ir.cc"
 #include "runtime/krpc-data-stream-sender-ir.cc"
diff --git a/be/src/exprs/CMakeLists.txt b/be/src/exprs/CMakeLists.txt
index 649be16..38de9ef 100644
--- a/be/src/exprs/CMakeLists.txt
+++ b/be/src/exprs/CMakeLists.txt
@@ -64,6 +64,7 @@ add_library(Exprs
   timestamp-functions-ir.cc
   timezone_db.cc
   tuple-is-null-predicate.cc
+  tuple-is-null-predicate-ir.cc
   scalar-fn-call.cc
   udf-builtins.cc
   udf-builtins-ir.cc
diff --git a/be/src/exprs/tuple-is-null-predicate-ir.cc b/be/src/exprs/tuple-is-null-predicate-ir.cc
new file mode 100644
index 0000000..9497724
--- /dev/null
+++ b/be/src/exprs/tuple-is-null-predicate-ir.cc
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "runtime/tuple-row.h"
+
+namespace impala {
+bool TupleRowTupleIsNull(const TupleRow* row, int index) {
+  return row->GetTuple(index) == nullptr;
+}
+}
diff --git a/be/src/exprs/tuple-is-null-predicate.cc b/be/src/exprs/tuple-is-null-predicate.cc
index 4225863..6289f84 100644
--- a/be/src/exprs/tuple-is-null-predicate.cc
+++ b/be/src/exprs/tuple-is-null-predicate.cc
@@ -22,6 +22,8 @@
 #include "gen-cpp/Exprs_types.h"
 
 #include "common/names.h"
+#include "codegen/llvm-codegen.h"
+#include "codegen/codegen-anyval.h"
 #include "runtime/descriptors.h"
 #include "runtime/tuple-row.h"
 
@@ -61,9 +63,126 @@ Status TupleIsNullPredicate::Init(
   return Status::OK();
 }
 
+/// For sample IR, see 'FillCodegendComputeFnConstantFalse' and
+/// 'FillCodegendComputeFnNonConstant'.
+/// If there is at least one non-nullable tuple, we codegen a function that returns a
+/// constant false BooleanValue.
 Status TupleIsNullPredicate::GetCodegendComputeFnImpl(LlvmCodeGen* codegen,
     llvm::Function** fn) {
-  return GetCodegendComputeFnWrapper(codegen, fn);
+  DCHECK_EQ(0, GetNumChildren());
+
+  llvm::Value* args[2];
+  llvm::Function* function = CreateIrFunctionPrototype(
+      "TupleIsNullPredicate", codegen, &args);
+
+  if (tuple_idxs_.size() < tuple_ids_.size()) {
+    FillCodegendComputeFnConstantFalse(codegen, function);
+  } else {
+    FillCodegendComputeFnNonConstant(codegen, function, args);
+  }
+
+  *fn = codegen->FinalizeFunction(function);
+  if (UNLIKELY(*fn == nullptr)) {
+    return Status(TErrorCode::IR_VERIFY_FAILED, "TupleIsNullPredicate");
+  }
+  return Status::OK();
+}
+
+/// Sample IR:
+/// define i16 @TupleIsNullPredicate(%"class.impala::ScalarExprEvaluator"* %eval,
+///                                  %"class.impala::TupleRow"* %row) #50 {
+/// return_false:
+///   ret i16 0
+/// }
+void TupleIsNullPredicate::FillCodegendComputeFnConstantFalse(
+    LlvmCodeGen* codegen, llvm::Function* function) const {
+  llvm::LLVMContext& context = codegen->context();
+  LlvmBuilder builder(context);
+
+  llvm::BasicBlock* return_false_block =
+      llvm::BasicBlock::Create(context, "return_false", function);
+  builder.SetInsertPoint(return_false_block);
+
+  CodegenAnyVal ret_val = CodegenAnyVal::GetNonNullVal(
+      codegen, &builder, ColumnType(TYPE_BOOLEAN), "ret_val");
+  ret_val.SetVal(false);
+  builder.CreateRet(ret_val.GetLoweredValue());
+}
+
+/// Sample IR:
+/// define i16 @TupleIsNullPredicate(%"class.impala::ScalarExprEvaluator"* %eval,
+///                                  %"class.impala::TupleRow"* %row) #51 {
+/// entry:
+///   br label %check_null
+///
+/// check_null:                                       ; preds = %entry
+///   %tuple_is_null_fn = call i1 @_ZN6impala19TupleRowTupleIsNullEPKNS_8TupleRowEi(
+///       %"class.impala::TupleRow"* %row, i32 0)
+///   %0 = select i1 %tuple_is_null_fn, i32 1, i32 0
+///   %count = add i32 0, %0
+///   br label %check_null1
+///
+/// check_null1:                                      ; preds = %check_null
+///   %tuple_is_null_fn2 = call i1 @_ZN6impala19TupleRowTupleIsNullEPKNS_8TupleRowEi(
+///       %"class.impala::TupleRow"* %row, i32 1)
+///   %1 = select i1 %tuple_is_null_fn2, i32 1, i32 0
+///   %count3 = add i32 %count, %1
+///   br label %compare_count
+///
+/// compare_count:                                    ; preds = %check_null1
+///   %count_eq_col_num = icmp eq i32 %count3, 2
+///   %2 = zext i1 %count_eq_col_num to i16
+///   %3 = shl i16 %2, 8
+///   %ret_val = or i16 0, %3
+///   ret i16 %ret_val
+/// }
+void TupleIsNullPredicate::FillCodegendComputeFnNonConstant(
+    LlvmCodeGen* codegen, llvm::Function* function, llvm::Value* args[2]) const {
+  llvm::LLVMContext& context = codegen->context();
+  LlvmBuilder builder(context);
+
+  llvm::BasicBlock* entry_block =
+      llvm::BasicBlock::Create(context, "entry", function);
+  builder.SetInsertPoint(entry_block);
+
+  // Signature:
+  // bool TupleRowTupleIsNull(const TupleRow* row, int index);
+  // Returns whether row->GetTuple(index) is nullptr.
+  llvm::Function* tuple_is_null_fn =
+      codegen->GetFunction(IRFunction::TUPLE_ROW_GET_TUPLE_IS_NULL, false);
+
+  llvm::Value* current_count_val = codegen->GetI32Constant(0);
+  for (int i = 0; i < tuple_idxs_.size(); i++) {
+    // We create the next block that checks whether the next tuple is null and increases
+    // 'count' if it is. While the builder's insertion point is still in the previous
+    // block we insert a branch to the new block.
+    llvm::BasicBlock* check_null_block
+        = llvm::BasicBlock::Create(context, "check_null", function);
+    builder.CreateBr(check_null_block);
+
+    builder.SetInsertPoint(check_null_block);
+    llvm::Value* tuple_is_null = builder.CreateCall(tuple_is_null_fn,
+        {args[1], codegen->GetI32Constant(tuple_idxs_[i])}, "tuple_is_null_fn");
+    llvm::Value* tuple_is_null_int = builder.CreateSelect(tuple_is_null,
+        codegen->GetI32Constant(1), codegen->GetI32Constant(0));
+    current_count_val = builder.CreateAdd(current_count_val, tuple_is_null_int, "count");
+  }
+
+  // Create the last block that will compare 'count' with the original number of tuples.
+  // If they are equal, return a true value, otherwise a false value. While the builder's
+  // insertion point is still in the previous block we insert a branch to the new block.
+  llvm::BasicBlock* compare_count_block
+      = llvm::BasicBlock::Create(context, "compare_count", function);
+  builder.CreateBr(compare_count_block);
+
+  builder.SetInsertPoint(compare_count_block);
+  llvm::Constant* tuple_ids_size = codegen->GetI32Constant(tuple_ids_.size());
+  llvm::Value* cmp =
+      builder.CreateICmpEQ(current_count_val, tuple_ids_size, "count_eq_col_num");
+  CodegenAnyVal ret_val = CodegenAnyVal::GetNonNullVal(
+      codegen, &builder, ColumnType(TYPE_BOOLEAN), "ret_val");
+  ret_val.SetVal(cmp);
+  builder.CreateRet(ret_val.GetLoweredValue());
 }
 
 string TupleIsNullPredicate::DebugString() const {
diff --git a/be/src/exprs/tuple-is-null-predicate.h b/be/src/exprs/tuple-is-null-predicate.h
index e028ca3..4d1c0b3 100644
--- a/be/src/exprs/tuple-is-null-predicate.h
+++ b/be/src/exprs/tuple-is-null-predicate.h
@@ -32,7 +32,6 @@ class TExprNode;
 /// on non-nullable tuples (see IMPALA-904/IMPALA-5504). This happens for exprs that
 /// are evaluated before the outer join that makes the tuples given to this predicate
 /// nullable, e.g., in the ON-clause of that join.
-/// TODO: Implement codegen to eliminate overhead on non-nullable tuples.
 class TupleIsNullPredicate: public Predicate {
  protected:
   friend class ScalarExpr;
@@ -49,6 +48,11 @@ class TupleIsNullPredicate: public Predicate {
       ScalarExprEvaluator*, const TupleRow*) const override;
 
  private:
+  void FillCodegendComputeFnConstantFalse(
+      LlvmCodeGen* codegen, llvm::Function* function) const;
+  void FillCodegendComputeFnNonConstant(
+      LlvmCodeGen* codegen, llvm::Function* function, llvm::Value* args[2]) const;
+
   /// Tuple ids to check for NULL. May contain ids of nullable and non-nullable tuples.
   std::vector<TupleId> tuple_ids_;