You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2023/03/05 23:53:38 UTC
[impala] 02/02: IMPALA-11960: Fix constant propagation from TIMESTAMP to DATE
This is an automated email from the ASF dual-hosted git repository.
stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 3573db68c83918126337c9267140b7fa36f153e4
Author: Csaba Ringhofer <cs...@cloudera.com>
AuthorDate: Thu Mar 2 14:37:17 2023 +0100
IMPALA-11960: Fix constant propagation from TIMESTAMP to DATE
The constant propagation introduced in IMPALA-10064 handled conversion
of < and > predicates from timestamps to dates incorrectly.
Example:
select * from functional.alltypes_date_partition
where date_col = cast(timestamp_col as date)
and timestamp_col > '2009-01-01 01:00:00'
and timestamp_col < '2009-02-01 01:00:00';
Before this change query rewrites added the following predicates:
date_col > DATE '2009-01-01' AND date_col < DATE '2009-02-01'
This incorrectly rejected all timestamps on the days of the
lower / upper bounds.
The fix is to rewrite < and > to <= and >= in the date predicates.
< could be kept if the upper bound is a constant with no time-of-day
part, e.g. timestamp_col < "2009-01-01" could be rewritten to
date_col < "2009-01-01", but this optimization is not added in this
patch to make it simpler.
Testing:
- added planner + EE regression tests
Change-Id: I1938bf5e91057b220daf8a1892940f674aac3d68
Reviewed-on: http://gerrit.cloudera.org:8080/19572
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
.../impala/analysis/ConstantPredicateHandler.java | 9 ++++++
.../queries/PlannerTest/constant-propagation.test | 36 ++++++++++++++++++++--
.../QueryTest/range-constant-propagation.test | 21 +++++++++++++
3 files changed, 64 insertions(+), 2 deletions(-)
diff --git a/fe/src/main/java/org/apache/impala/analysis/ConstantPredicateHandler.java b/fe/src/main/java/org/apache/impala/analysis/ConstantPredicateHandler.java
index 36ae91360..1cf5e3822 100644
--- a/fe/src/main/java/org/apache/impala/analysis/ConstantPredicateHandler.java
+++ b/fe/src/main/java/org/apache/impala/analysis/ConstantPredicateHandler.java
@@ -172,6 +172,15 @@ public class ConstantPredicateHandler {
private Expr rewriteWithOp(BinaryPredicate pred, BinaryPredicate.Operator newOp,
Analyzer analyzer) {
+ // Allow equality to avoid problems with truncating the time-of-day part of
+ // timestamps (IMPALA-11960).
+ // TODO: the original (stricter) op could be kept if the upper bound is a
+ // TimestampLiteral without time component, e.g ts_col < "2000-01-01"
+ if (newOp == BinaryPredicate.Operator.LT) {
+ newOp = BinaryPredicate.Operator.LE;
+ } else if (newOp == BinaryPredicate.Operator.GT) {
+ newOp = BinaryPredicate.Operator.GE;
+ }
if (pred.getOp() == newOp) return pred;
BinaryPredicate newPred = new BinaryPredicate(newOp, pred.getChild(0),
pred.getChild(1));
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/constant-propagation.test b/testdata/workloads/functional-planner/queries/PlannerTest/constant-propagation.test
index 350cfbddf..3455e227c 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/constant-propagation.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/constant-propagation.test
@@ -434,6 +434,38 @@ PLAN-ROOT SINK
predicates: functional.alltypes_date_partition.timestamp_col <= TIMESTAMP '2009-02-01 00:00:00', functional.alltypes_date_partition.timestamp_col >= TIMESTAMP '2009-01-01 00:00:00', date_col = CAST(timestamp_col AS DATE)
row-size=65B cardinality=30
====
+# Same as above but using < > and timestamps with time-of-day parts.
+# Regression test for IMPALA-11960.
+with dp_view as
+(select * from functional.alltypes_date_partition
+where date_col = cast(timestamp_col as date))
+select * from dp_view
+where timestamp_col > '2009-01-01 01:00:00' and timestamp_col < '2009-02-01 01:00:00';
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [functional.alltypes_date_partition]
+ partition predicates: date_col >= DATE '2009-01-01' AND date_col <= DATE '2009-02-01'
+ HDFS partitions=32/55 files=32 size=15.99KB
+ predicates: functional.alltypes_date_partition.timestamp_col < TIMESTAMP '2009-02-01 01:00:00', functional.alltypes_date_partition.timestamp_col > TIMESTAMP '2009-01-01 01:00:00', date_col = CAST(timestamp_col AS DATE)
+ row-size=65B cardinality=30
+====
+# Same as above but using >= instead of >.
+# Regression test for IMPALA-11960.
+with dp_view as
+(select * from functional.alltypes_date_partition
+where date_col = cast(timestamp_col as date))
+select * from dp_view
+where timestamp_col >= '2009-01-01 01:00:00' and timestamp_col < '2009-02-01 01:00:00';
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [functional.alltypes_date_partition]
+ partition predicates: date_col >= DATE '2009-01-01' AND date_col <= DATE '2009-02-01'
+ HDFS partitions=32/55 files=32 size=15.99KB
+ predicates: functional.alltypes_date_partition.timestamp_col < TIMESTAMP '2009-02-01 01:00:00', functional.alltypes_date_partition.timestamp_col >= TIMESTAMP '2009-01-01 01:00:00', date_col = CAST(timestamp_col AS DATE)
+ row-size=65B cardinality=30
+====
# Mix of various predicates some of which are eligible for propagation
with dp_view as
(select * from functional.alltypes_date_partition
@@ -521,8 +553,8 @@ and timestamp_col > from_timestamp('2009-02-01 00:02:00', 'yyyy-mm-dd');
PLAN-ROOT SINK
|
00:SCAN HDFS [functional.alltypes_date_partition]
- partition predicates: date_col > DATE '2009-02-01'
- HDFS partitions=23/55 files=23 size=11.28KB
+ partition predicates: date_col >= DATE '2009-02-01'
+ HDFS partitions=24/55 files=24 size=11.82KB
predicates: timestamp_col > TIMESTAMP '2009-02-01 00:00:00', date_col = CAST(timestamp_col AS DATE)
row-size=65B cardinality=21
====
diff --git a/testdata/workloads/functional-query/queries/QueryTest/range-constant-propagation.test b/testdata/workloads/functional-query/queries/QueryTest/range-constant-propagation.test
index aa714147d..ba7b3b11d 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/range-constant-propagation.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/range-constant-propagation.test
@@ -10,6 +10,27 @@ and timestamp_col between '2009-01-01' and '2009-02-01';
BIGINT, BIGINT
====
---- QUERY
+# Same query as above with time-of-day parts. Regression test for IMPALA-11960.
+select count(*), sum(int_col) from alltypes_date_partition
+where date_col = cast(timestamp_col as date)
+and timestamp_col > '2009-01-01 01:00:00' and timestamp_col < '2009-02-01 01:00:00';
+---- RESULTS
+155,620
+---- TYPES
+BIGINT, BIGINT
+====
+---- QUERY
+# Same as above but using >= instead of > and using a lower bound that matches with
+# a row. Regression test for IMPALA-11960.
+select count(*), sum(int_col) from alltypes_date_partition
+where date_col = cast(timestamp_col as date)
+and timestamp_col >= '2009-01-01 00:08:00.280' and timestamp_col < '2009-02-01 01:00:00';
+---- RESULTS
+156,628
+---- TYPES
+BIGINT, BIGINT
+====
+---- QUERY
# Mix of various predicates some of which are eligible for propagation
with dp_view as
(select * from alltypes_date_partition