You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2023/03/27 23:02:00 UTC

[impala] 06/17: IMPALA-11779: Fix crash in TopNNode due to slots in null type

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch branch-4.1.2
in repository https://gitbox.apache.org/repos/asf/impala.git

commit dff569b7e388b0ece08e04d9cc13d2a8a96d629e
Author: stiga-huang <hu...@gmail.com>
AuthorDate: Sat Dec 10 17:10:38 2022 +0800

    IMPALA-11779: Fix crash in TopNNode due to slots in null type
    
    BE can't codegen or evaluate exprs in NULL type. So when FE transfers
    exprs to BE (via thrift), it will convert exprs in NULL type into
    NullLiteral with Boolean type, e.g. see code in Expr#treeToThrift().
    The type doesn't matter since ScalarExprEvaluator::GetValue() in BE
    returns nullptr for null values of all types, and nullptr is treated as
    null value.
    
    Most of the exprs in BE are generated from thrift TExprs transferred
    from FE, which guarantees they are not NULL type exprs. However, in
    TopNPlanNode::Init(), we create SlotRefs directly based on the sort
    tuple descriptor. If there are NULL type slots in the tuple descriptor,
    we get SlotRefs in NULL type, which will crash codegen or evaluation (if
    codegen is disabled) on them.
    
    This patch adds a type-safe create method for SlotRef which uses
    TYPE_BOOLEAN for TYPE_NULL. BE codes that create SlotRef directly from
    SlotDescriptors are replaced by calling this create method, which
    guarantees no TYPE_NULL exprs are used in the corresponding evaluators.
    
    Tests:
     - Added new tests in partitioned-top-n.test
     - Ran exhaustive tests
    
    Change-Id: I6aaf80c5129eaf788c70c8f041021eaf73087f94
    Reviewed-on: http://gerrit.cloudera.org:8080/19336
    Reviewed-by: Zoltan Borok-Nagy <bo...@cloudera.com>
    Tested-by: Quanlong Huang <hu...@gmail.com>
---
 be/src/exec/grouping-aggregator.cc                 |  6 ++--
 be/src/exec/topn-node.cc                           |  3 +-
 be/src/exprs/slot-ref.cc                           | 10 ++++++
 be/src/exprs/slot-ref.h                            |  4 +++
 .../queries/QueryTest/partitioned-top-n.test       | 40 ++++++++++++++++++++++
 5 files changed, 57 insertions(+), 6 deletions(-)

diff --git a/be/src/exec/grouping-aggregator.cc b/be/src/exec/grouping-aggregator.cc
index 1b8ebd104..40471892e 100644
--- a/be/src/exec/grouping-aggregator.cc
+++ b/be/src/exec/grouping-aggregator.cc
@@ -103,10 +103,8 @@ Status GroupingAggregatorConfig::Init(
   for (int i = 0; i < grouping_exprs_.size(); ++i) {
     SlotDescriptor* desc = intermediate_tuple_desc_->slots()[i];
     DCHECK(desc->type().type == TYPE_NULL || desc->type() == grouping_exprs_[i]->type());
-    // Hack to avoid TYPE_NULL SlotRefs.
-    SlotRef* build_expr = state->obj_pool()->Add(desc->type().type != TYPE_NULL ?
-            new SlotRef(desc) :
-            new SlotRef(desc, ColumnType(TYPE_BOOLEAN)));
+    // Use SlotRef::TypeSafeCreate() to avoid TYPE_NULL SlotRefs.
+    SlotRef* build_expr = state->obj_pool()->Add(SlotRef::TypeSafeCreate(desc));
     build_exprs_.push_back(build_expr);
     // Not an entry point because all hash table callers support codegen.
     RETURN_IF_ERROR(
diff --git a/be/src/exec/topn-node.cc b/be/src/exec/topn-node.cc
index 7d2ff11b1..3de1b68d1 100644
--- a/be/src/exec/topn-node.cc
+++ b/be/src/exec/topn-node.cc
@@ -95,8 +95,7 @@ Status TopNPlanNode::Init(const TPlanNode& tnode, FragmentState* state) {
 
     // Construct SlotRefs that simply copy the output tuple to itself.
     for (const SlotDescriptor* slot_desc : output_tuple_desc_->slots()) {
-      SlotRef* slot_ref =
-          state->obj_pool()->Add(new SlotRef(slot_desc, slot_desc->type()));
+      SlotRef* slot_ref = state->obj_pool()->Add(SlotRef::TypeSafeCreate(slot_desc));
       noop_tuple_exprs_.push_back(slot_ref);
       RETURN_IF_ERROR(slot_ref->Init(*row_descriptor_, true, state));
     }
diff --git a/be/src/exprs/slot-ref.cc b/be/src/exprs/slot-ref.cc
index 32d4e254c..d883f68db 100644
--- a/be/src/exprs/slot-ref.cc
+++ b/be/src/exprs/slot-ref.cc
@@ -72,6 +72,16 @@ SlotRef::SlotRef(const ColumnType& type, int offset, const bool nullable /* = fa
     null_indicator_offset_(0, nullable ? offset : -1),
     slot_id_(-1) {}
 
+SlotRef* SlotRef::TypeSafeCreate(const SlotDescriptor* desc) {
+  if (desc->type().type == TYPE_NULL) {
+    // ScalarExprEvaluator requires a non-null type for the expr. It returns nullptr for
+    // null values of all types. So replacing TYPE_NULL to an arbitrary type is ok.
+    // Here we use TYPE_BOOLEAN for consistency with other places.
+    return new SlotRef(desc, ColumnType(TYPE_BOOLEAN));
+  }
+  return new SlotRef(desc);
+}
+
 Status SlotRef::Init(
     const RowDescriptor& row_desc, bool is_entry_point, FragmentState* state) {
   DCHECK(type_.IsStructType() || children_.size() == 0);
diff --git a/be/src/exprs/slot-ref.h b/be/src/exprs/slot-ref.h
index dfb299825..c0b1bf31a 100644
--- a/be/src/exprs/slot-ref.h
+++ b/be/src/exprs/slot-ref.h
@@ -45,6 +45,10 @@ class SlotRef : public ScalarExpr {
   /// does not generate the appropriate exprs).
   SlotRef(const SlotDescriptor* desc, const ColumnType& type);
 
+  /// Create a SlotRef based on the given SlotDescriptor 'desc' and make sure the type is
+  /// not TYPE_NULL (if so, replaced it with TYPE_BOOLEAN).
+  static SlotRef* TypeSafeCreate(const SlotDescriptor* desc);
+
   /// Used for testing.  GetValue will return tuple + offset interpreted as 'type'
   SlotRef(const ColumnType& type, int offset, const bool nullable = false);
 
diff --git a/testdata/workloads/functional-query/queries/QueryTest/partitioned-top-n.test b/testdata/workloads/functional-query/queries/QueryTest/partitioned-top-n.test
index 2c29b7389..7cfff846f 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/partitioned-top-n.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/partitioned-top-n.test
@@ -80,3 +80,43 @@ NULL,0,1
 ---- TYPES
 TINYINT, INT, BIGINT
 ====
+---- QUERY
+# IMPALA-11779: test null slots in the sort tuple
+with v1 as (
+  select '0' as a1, '' as b1 from alltypestiny
+), v2 as (
+  select '' as a2, null as b2
+), v3 as (
+  select b1 as b
+  from v1 left join v2 on a1 = a2
+)
+select 1 from (
+  select row_number() over (partition by b order by b) rnk
+  from v3
+) v
+where rnk = 1
+---- RESULTS
+1
+---- TYPES
+TINYINT
+====
+---- QUERY
+# IMPALA-11779: test null slots in the sort tuple
+with v1 as (
+  select '0' as a1, '' as b1 from alltypes
+), v2 as (
+  select '' as a2, null as b2
+), v3 as (
+  select b1 as b
+  from v1 left join v2 on a1 = a2
+)
+select count(*) from (
+  select row_number() over (partition by b order by b) rnk
+  from v3
+) v
+where rnk < 10
+---- RESULTS
+9
+---- TYPES
+BIGINT
+====