You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2020/03/28 21:34:36 UTC

[impala] 03/06: IMPALA-8533: Impala daemon crash on sort

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch branch-3.4.0
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 4b00c7d9f9fa12fb703e510c1adcff4dfbaf1b4b
Author: Kurt Deschler <kd...@cloudera.com>
AuthorDate: Tue Mar 17 19:20:52 2020 -0500

    IMPALA-8533: Impala daemon crash on sort
    
    This crash was caused by an empty sort tuple descriptor that was
    generated as a result of union substitutions replacing all sort
    fields with literals that were subsequently removed from the ordering
    spec. There was no check in place to prevent the empty tuple descriptor
    from being sent to impalad where it caused a divide-by-zero crash.
    
    Fix:
    This fix avoids inserting a sort node when there are no fields remaining
    to sort on. Also added a precondition to the SortNode that will prevent
    similar issues from crashing impalad.
    
    Testing:
    Testcases added to PlannerTest/union.test
    
    Change-Id: If19303fbf55927c1e1b76b9b22ab354322b21c54
    Reviewed-on: http://gerrit.cloudera.org:8080/15473
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    (cherry picked from commit b865de7d971313beab486211b227b7ce4ad10c44)
---
 .../org/apache/impala/planner/AnalyticPlanner.java |  35 +--
 .../java/org/apache/impala/planner/SortNode.java   |   2 +
 .../queries/PlannerTest/union.test                 | 260 +++++++++++++++++++++
 3 files changed, 281 insertions(+), 16 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/planner/AnalyticPlanner.java b/fe/src/main/java/org/apache/impala/planner/AnalyticPlanner.java
index 05ae95c..4b6ea6e 100644
--- a/fe/src/main/java/org/apache/impala/planner/AnalyticPlanner.java
+++ b/fe/src/main/java/org/apache/impala/planner/AnalyticPlanner.java
@@ -363,24 +363,27 @@ public class AnalyticPlanner {
       }
 
       SortInfo sortInfo = createSortInfo(root, sortExprs, isAsc, nullsFirst);
-      SortNode sortNode =
-          SortNode.createTotalSortNode(ctx_.getNextNodeId(), root, sortInfo, 0);
-
-      // if this sort group does not have partitioning exprs, we want the sort
-      // to be executed like a regular distributed sort
-      if (hasActivePartition) sortNode.setIsAnalyticSort(true);
-
-      if (partitionExprs != null) {
-        // create required input partition
-        DataPartition inputPartition = DataPartition.UNPARTITIONED;
-        if (hasActivePartition) {
-          inputPartition = DataPartition.hashPartitioned(partitionExprs);
+      // IMPALA-8533: Avoid generating sort with empty tuple descriptor
+      if(sortInfo.getSortTupleDescriptor().getSlots().size() > 0) {
+        SortNode sortNode =
+            SortNode.createTotalSortNode(ctx_.getNextNodeId(), root, sortInfo, 0);
+
+        // if this sort group does not have partitioning exprs, we want the sort
+        // to be executed like a regular distributed sort
+        if (hasActivePartition) sortNode.setIsAnalyticSort(true);
+
+        if (partitionExprs != null) {
+          // create required input partition
+          DataPartition inputPartition = DataPartition.UNPARTITIONED;
+          if (hasActivePartition) {
+            inputPartition = DataPartition.hashPartitioned(partitionExprs);
+          }
+          sortNode.setInputPartition(inputPartition);
         }
-        sortNode.setInputPartition(inputPartition);
-      }
 
-      root = sortNode;
-      root.init(analyzer_);
+        root = sortNode;
+        root.init(analyzer_);
+      }
     }
 
     // create one AnalyticEvalNode per window group
diff --git a/fe/src/main/java/org/apache/impala/planner/SortNode.java b/fe/src/main/java/org/apache/impala/planner/SortNode.java
index ba894c3..fe8e9ef 100644
--- a/fe/src/main/java/org/apache/impala/planner/SortNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/SortNode.java
@@ -134,6 +134,8 @@ public class SortNode extends PlanNode {
 
     // populate resolvedTupleExprs_ and outputSmap_
     List<SlotDescriptor> sortTupleSlots = info_.getSortTupleDescriptor().getSlots();
+    Preconditions.checkState(sortTupleSlots.size() > 0,
+        "empty sort tuple descriptor");
     List<Expr> slotExprs = info_.getMaterializedExprs();
     resolvedTupleExprs_ = new ArrayList<>();
     outputSmap_ = new ExprSubstitutionMap();
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/union.test b/testdata/workloads/functional-planner/queries/PlannerTest/union.test
index 8cb122b..e410f9d 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/union.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/union.test
@@ -4204,3 +4204,263 @@ Per-Host Resources: mem-estimate=128.00MB mem-reservation=32.00KB thread-reserva
    tuple-ids=0 row-size=4B cardinality=7.30K
    in pipelines: 01(GETNEXT)
 ====
+#IMPALA-8533: Analytic ordering columns are constant after union substitution
+WITH
+base_10 AS (SELECT 1 UNION ALL SELECT 1),
+base_10k AS (SELECT 2 constant FROM base_10 b1)
+SELECT ROW_NUMBER() OVER (ORDER BY b1.constant) row_num
+FROM base_10k b1;
+---- QUERYOPTIONS
+explain_level=2
+---- DISTRIBUTEDPLAN
+F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+|  Per-Host Resources: mem-estimate=4.00MB mem-reservation=4.00MB thread-reservation=1
+PLAN-ROOT SINK
+|  output exprs: row_number()
+|  mem-estimate=0B mem-reservation=0B thread-reservation=0
+|
+01:ANALYTIC
+|  functions: row_number()
+|  order by: 2 ASC
+|  window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+|  mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0
+|  tuple-ids=0,4 row-size=8B cardinality=2
+|  in pipelines: <none>
+|
+00:UNION
+   constant-operands=2
+   mem-estimate=0B mem-reservation=0B thread-reservation=0
+   tuple-ids=0 row-size=0B cardinality=2
+   in pipelines: <none>
+====
+#IMPALA-8533: Ordering on select same constant from all union arms.
+# Sort not currently eliminated.
+SELECT c1 FROM (SELECT 1 c1 UNION ALL SELECT 1 c1) a ORDER BY a.c1;
+---- QUERYOPTIONS
+explain_level=2
+---- DISTRIBUTEDPLAN
+F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+|  Per-Host Resources: mem-estimate=6.00MB mem-reservation=6.00MB thread-reservation=1
+PLAN-ROOT SINK
+|  output exprs: c1
+|  mem-estimate=0B mem-reservation=0B thread-reservation=0
+|
+01:SORT
+|  order by: c1 ASC
+|  mem-estimate=6.00MB mem-reservation=6.00MB spill-buffer=2.00MB thread-reservation=0
+|  tuple-ids=2 row-size=1B cardinality=2
+|  in pipelines: 01(GETNEXT)
+|
+00:UNION
+   constant-operands=2
+   mem-estimate=0B mem-reservation=0B thread-reservation=0
+   tuple-ids=0 row-size=1B cardinality=2
+   in pipelines: <none>
+====
+#IMPALA-8533: Ordering on select constant union select same constant from table
+# Sort not currently eliminated.
+SELECT c1 FROM (SELECT 1 c1 FROM functional.alltypestiny UNION ALL SELECT 1 c1 FROM functional.alltypestiny) a ORDER BY c1;
+---- QUERYOPTIONS
+explain_level=2
+---- DISTRIBUTEDPLAN
+F03:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+|  Per-Host Resources: mem-estimate=16.00KB mem-reservation=0B thread-reservation=1
+PLAN-ROOT SINK
+|  output exprs: c1
+|  mem-estimate=0B mem-reservation=0B thread-reservation=0
+|
+04:MERGING-EXCHANGE [UNPARTITIONED]
+|  order by: c1 ASC
+|  mem-estimate=16.00KB mem-reservation=0B thread-reservation=0
+|  tuple-ids=4 row-size=1B cardinality=16
+|  in pipelines: 03(GETNEXT)
+|
+F02:PLAN FRAGMENT [RANDOM] hosts=3 instances=3
+Per-Host Resources: mem-estimate=38.00MB mem-reservation=6.01MB thread-reservation=2
+03:SORT
+|  order by: c1 ASC
+|  mem-estimate=6.00MB mem-reservation=6.00MB spill-buffer=2.00MB thread-reservation=0
+|  tuple-ids=4 row-size=1B cardinality=16
+|  in pipelines: 03(GETNEXT), 01(OPEN), 02(OPEN)
+|
+00:UNION
+|  mem-estimate=0B mem-reservation=0B thread-reservation=0
+|  tuple-ids=2 row-size=1B cardinality=16
+|  in pipelines: 01(GETNEXT), 02(GETNEXT)
+|
+|--02:SCAN HDFS [functional.alltypestiny, RANDOM]
+|     HDFS partitions=4/4 files=4 size=460B
+|     stored statistics:
+|       table: rows=8 size=460B
+|       partitions: 4/4 rows=8
+|       columns: all
+|     extrapolated-rows=disabled max-scan-range-rows=2
+|     mem-estimate=32.00MB mem-reservation=8.00KB thread-reservation=1
+|     tuple-ids=1 row-size=0B cardinality=8
+|     in pipelines: 02(GETNEXT)
+|
+01:SCAN HDFS [functional.alltypestiny, RANDOM]
+   HDFS partitions=4/4 files=4 size=460B
+   stored statistics:
+     table: rows=8 size=460B
+     partitions: 4/4 rows=8
+     columns: all
+   extrapolated-rows=disabled max-scan-range-rows=2
+   mem-estimate=32.00MB mem-reservation=8.00KB thread-reservation=1
+   tuple-ids=0 row-size=0B cardinality=8
+   in pipelines: 01(GETNEXT)
+====
+#IMPALA-8533: Select constant from unioned query.
+# Sort not currently eliminated.
+SELECT 1 FROM (SELECT int_col FROM functional.alltypestiny UNION ALL SELECT int_col FROM functional.alltypestiny) a ORDER BY int_col;
+---- QUERYOPTIONS
+explain_level=2
+---- DISTRIBUTEDPLAN
+F03:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+|  Per-Host Resources: mem-estimate=16.00KB mem-reservation=0B thread-reservation=1
+PLAN-ROOT SINK
+|  output exprs: CAST(1 AS TINYINT)
+|  mem-estimate=0B mem-reservation=0B thread-reservation=0
+|
+04:MERGING-EXCHANGE [UNPARTITIONED]
+|  order by: int_col ASC
+|  mem-estimate=16.00KB mem-reservation=0B thread-reservation=0
+|  tuple-ids=4 row-size=4B cardinality=16
+|  in pipelines: 03(GETNEXT)
+|
+F02:PLAN FRAGMENT [RANDOM] hosts=3 instances=3
+Per-Host Resources: mem-estimate=38.00MB mem-reservation=6.01MB thread-reservation=2
+03:SORT
+|  order by: int_col ASC
+|  mem-estimate=6.00MB mem-reservation=6.00MB spill-buffer=2.00MB thread-reservation=0
+|  tuple-ids=4 row-size=4B cardinality=16
+|  in pipelines: 03(GETNEXT), 01(OPEN), 02(OPEN)
+|
+00:UNION
+|  pass-through-operands: all
+|  mem-estimate=0B mem-reservation=0B thread-reservation=0
+|  tuple-ids=2 row-size=4B cardinality=16
+|  in pipelines: 01(GETNEXT), 02(GETNEXT)
+|
+|--02:SCAN HDFS [functional.alltypestiny, RANDOM]
+|     HDFS partitions=4/4 files=4 size=460B
+|     stored statistics:
+|       table: rows=8 size=460B
+|       partitions: 4/4 rows=8
+|       columns: all
+|     extrapolated-rows=disabled max-scan-range-rows=2
+|     mem-estimate=32.00MB mem-reservation=8.00KB thread-reservation=1
+|     tuple-ids=1 row-size=4B cardinality=8
+|     in pipelines: 02(GETNEXT)
+|
+01:SCAN HDFS [functional.alltypestiny, RANDOM]
+   HDFS partitions=4/4 files=4 size=460B
+   stored statistics:
+     table: rows=8 size=460B
+     partitions: 4/4 rows=8
+     columns: all
+   extrapolated-rows=disabled max-scan-range-rows=2
+   mem-estimate=32.00MB mem-reservation=8.00KB thread-reservation=1
+   tuple-ids=0 row-size=4B cardinality=8
+   in pipelines: 01(GETNEXT)
+====
+#IMPALA-8533: Select constant from select constant union select same constant
+# Sort not currently eliminated.
+SELECT 1 FROM (SELECT 1 c1 FROM functional.alltypestiny UNION ALL SELECT 1 c1 FROM functional.alltypestiny) a ORDER BY c1;
+---- QUERYOPTIONS
+explain_level=2
+---- DISTRIBUTEDPLAN
+F03:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+|  Per-Host Resources: mem-estimate=16.00KB mem-reservation=0B thread-reservation=1
+PLAN-ROOT SINK
+|  output exprs: CAST(1 AS TINYINT)
+|  mem-estimate=0B mem-reservation=0B thread-reservation=0
+|
+04:MERGING-EXCHANGE [UNPARTITIONED]
+|  order by: c1 ASC
+|  mem-estimate=16.00KB mem-reservation=0B thread-reservation=0
+|  tuple-ids=4 row-size=1B cardinality=16
+|  in pipelines: 03(GETNEXT)
+|
+F02:PLAN FRAGMENT [RANDOM] hosts=3 instances=3
+Per-Host Resources: mem-estimate=38.00MB mem-reservation=6.01MB thread-reservation=2
+03:SORT
+|  order by: c1 ASC
+|  mem-estimate=6.00MB mem-reservation=6.00MB spill-buffer=2.00MB thread-reservation=0
+|  tuple-ids=4 row-size=1B cardinality=16
+|  in pipelines: 03(GETNEXT), 01(OPEN), 02(OPEN)
+|
+00:UNION
+|  mem-estimate=0B mem-reservation=0B thread-reservation=0
+|  tuple-ids=2 row-size=1B cardinality=16
+|  in pipelines: 01(GETNEXT), 02(GETNEXT)
+|
+|--02:SCAN HDFS [functional.alltypestiny, RANDOM]
+|     HDFS partitions=4/4 files=4 size=460B
+|     stored statistics:
+|       table: rows=8 size=460B
+|       partitions: 4/4 rows=8
+|       columns: all
+|     extrapolated-rows=disabled max-scan-range-rows=2
+|     mem-estimate=32.00MB mem-reservation=8.00KB thread-reservation=1
+|     tuple-ids=1 row-size=0B cardinality=8
+|     in pipelines: 02(GETNEXT)
+|
+01:SCAN HDFS [functional.alltypestiny, RANDOM]
+   HDFS partitions=4/4 files=4 size=460B
+   stored statistics:
+     table: rows=8 size=460B
+     partitions: 4/4 rows=8
+     columns: all
+   extrapolated-rows=disabled max-scan-range-rows=2
+   mem-estimate=32.00MB mem-reservation=8.00KB thread-reservation=1
+   tuple-ids=0 row-size=0B cardinality=8
+   in pipelines: 01(GETNEXT)
+====
+#IMPALA-8533: Select constant from union order by 1
+# Sort is eliminated.
+SELECT 1 FROM (SELECT int_col c1 FROM functional.alltypestiny UNION ALL SELECT int_col c1 FROM functional.alltypestiny) a ORDER BY 1;
+---- QUERYOPTIONS
+explain_level=2
+---- DISTRIBUTEDPLAN
+F03:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+|  Per-Host Resources: mem-estimate=16.00KB mem-reservation=0B thread-reservation=1
+PLAN-ROOT SINK
+|  output exprs: CAST(1 AS TINYINT)
+|  mem-estimate=0B mem-reservation=0B thread-reservation=0
+|
+03:EXCHANGE [UNPARTITIONED]
+|  mem-estimate=16.00KB mem-reservation=0B thread-reservation=0
+|  tuple-ids=2 row-size=0B cardinality=16
+|  in pipelines: 01(GETNEXT), 02(GETNEXT)
+|
+F02:PLAN FRAGMENT [RANDOM] hosts=3 instances=3
+Per-Host Resources: mem-estimate=32.00MB mem-reservation=8.00KB thread-reservation=2
+00:UNION
+|  pass-through-operands: all
+|  mem-estimate=0B mem-reservation=0B thread-reservation=0
+|  tuple-ids=2 row-size=0B cardinality=16
+|  in pipelines: 01(GETNEXT), 02(GETNEXT)
+|
+|--02:SCAN HDFS [functional.alltypestiny, RANDOM]
+|     HDFS partitions=4/4 files=4 size=460B
+|     stored statistics:
+|       table: rows=8 size=460B
+|       partitions: 4/4 rows=8
+|       columns: all
+|     extrapolated-rows=disabled max-scan-range-rows=2
+|     mem-estimate=32.00MB mem-reservation=8.00KB thread-reservation=1
+|     tuple-ids=1 row-size=0B cardinality=8
+|     in pipelines: 02(GETNEXT)
+|
+01:SCAN HDFS [functional.alltypestiny, RANDOM]
+   HDFS partitions=4/4 files=4 size=460B
+   stored statistics:
+     table: rows=8 size=460B
+     partitions: 4/4 rows=8
+     columns: all
+   extrapolated-rows=disabled max-scan-range-rows=2
+   mem-estimate=32.00MB mem-reservation=8.00KB thread-reservation=1
+   tuple-ids=0 row-size=0B cardinality=8
+   in pipelines: 01(GETNEXT)
+====