You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2018/02/09 16:59:39 UTC

[20/21] impala git commit: IMPALA-6473: Fix analytic fn that partitions and orders on same expr

IMPALA-6473: Fix analytic fn that partitions and orders on same expr

Previously, an analytic function that used the same expr in both the
'partition by' and 'order by' clauses, and where the expr meets the
criteria for being materialized before the sort, would hit an
IllegalStateException due to the expr being inserted into the same
ExprSubstitutionMap twice.

If the values have already been partitioned on the expr, then all of
the values for it in each partition will be the same and also ordering
on the expr doesn't change the results. So, the fix is to simply
exclude the duplicate expr from the 'order by' while still
partitioning on it.

Testing:
- Added a regression test to PlannerTest.

Change-Id: Id5f1d5fbc6f69df5850f96afed345ce27668c30b
Reviewed-on: http://gerrit.cloudera.org:8080/9218
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/2037222c
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/2037222c
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/2037222c

Branch: refs/heads/2.x
Commit: 2037222cdf9bd730a2ba103eecff7874dc61a702
Parents: e7df3b2
Author: Thomas Tauber-Marshall <tm...@cloudera.com>
Authored: Mon Feb 5 14:24:23 2018 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Thu Feb 8 07:01:53 2018 +0000

----------------------------------------------------------------------
 .../apache/impala/planner/AnalyticPlanner.java  | 10 +++++---
 .../queries/PlannerTest/analytic-fns.test       | 27 ++++++++++++++++++--
 .../queries/PlannerTest/insert.test             |  2 +-
 3 files changed, 33 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/2037222c/fe/src/main/java/org/apache/impala/planner/AnalyticPlanner.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/AnalyticPlanner.java b/fe/src/main/java/org/apache/impala/planner/AnalyticPlanner.java
index 156bd5b..f504fda 100644
--- a/fe/src/main/java/org/apache/impala/planner/AnalyticPlanner.java
+++ b/fe/src/main/java/org/apache/impala/planner/AnalyticPlanner.java
@@ -339,9 +339,13 @@ public class AnalyticPlanner {
 
       // then sort on orderByExprs
       for (OrderByElement orderByElement: sortGroup.orderByElements) {
-        sortExprs.add(orderByElement.getExpr());
-        isAsc.add(orderByElement.isAsc());
-        nullsFirst.add(orderByElement.getNullsFirstParam());
+        // If the expr is in the PARTITION BY and already in 'sortExprs', but also in
+        // the ORDER BY, its unnecessary to add it to 'sortExprs' again.
+        if (!sortExprs.contains(orderByElement.getExpr())) {
+          sortExprs.add(orderByElement.getExpr());
+          isAsc.add(orderByElement.isAsc());
+          nullsFirst.add(orderByElement.getNullsFirstParam());
+        }
       }
 
       SortInfo sortInfo = createSortInfo(root, sortExprs, isAsc, nullsFirst);

http://git-wip-us.apache.org/repos/asf/impala/blob/2037222c/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test b/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test
index 2977f5d..f78bfb2 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test
@@ -285,7 +285,7 @@ PLAN-ROOT SINK
 |  window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
 |
 01:SORT
-|  order by: int_col ASC NULLS FIRST, smallint_col ASC NULLS FIRST, int_col ASC
+|  order by: int_col ASC NULLS FIRST, smallint_col ASC NULLS FIRST
 |
 00:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
@@ -342,7 +342,7 @@ PLAN-ROOT SINK
 |  window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
 |
 01:SORT
-|  order by: int_col ASC NULLS FIRST, smallint_col ASC NULLS FIRST, int_col ASC
+|  order by: int_col ASC NULLS FIRST, smallint_col ASC NULLS FIRST
 |
 12:EXCHANGE [HASH(int_col,smallint_col)]
 |
@@ -2434,3 +2434,26 @@ PLAN-ROOT SINK
 00:SCAN HDFS [functional.alltypessmall]
    partitions=4/4 files=4 size=6.32KB
 ====
+# IMPALA-6473: analytic fn where the same expr is in the 'partition by' and the 'order by'
+select last_value(int_col)
+   over (partition by abs(int_col), string_col order by id, abs(int_col))
+from functional.alltypestiny
+---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
+04:EXCHANGE [UNPARTITIONED]
+|
+02:ANALYTIC
+|  functions: last_value(int_col)
+|  partition by: abs(int_col), string_col
+|  order by: id ASC, abs(int_col) ASC
+|  window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+|
+01:SORT
+|  order by: abs(int_col) ASC NULLS FIRST, string_col ASC NULLS FIRST, id ASC
+|
+03:EXCHANGE [HASH(abs(int_col),string_col)]
+|
+00:SCAN HDFS [functional.alltypestiny]
+   partitions=4/4 files=4 size=460B
+====

http://git-wip-us.apache.org/repos/asf/impala/blob/2037222c/testdata/workloads/functional-planner/queries/PlannerTest/insert.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/insert.test b/testdata/workloads/functional-planner/queries/PlannerTest/insert.test
index 522b058..ac225c2 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/insert.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/insert.test
@@ -523,7 +523,7 @@ WRITE TO HDFS [functional.alltypestiny, OVERWRITE=false, PARTITION-KEYS=(2009,1)
 |  window: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
 |
 01:SORT
-|  order by: id ASC NULLS FIRST, id ASC
+|  order by: id ASC NULLS FIRST
 |
 00:SCAN HDFS [functional.alltypestiny]
    partitions=4/4 files=4 size=460B