You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2023/03/05 23:53:38 UTC

[impala] 02/02: IMPALA-11960: Fix constant propagation from TIMESTAMP to DATE

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 3573db68c83918126337c9267140b7fa36f153e4
Author: Csaba Ringhofer <cs...@cloudera.com>
AuthorDate: Thu Mar 2 14:37:17 2023 +0100

    IMPALA-11960: Fix constant propagation from TIMESTAMP to DATE
    
    The constant propagation introduced in IMPALA-10064 handled conversion
    of < and > predicates from timestamps to dates incorrectly.
    
    Example:
    select * from functional.alltypes_date_partition
      where date_col = cast(timestamp_col as date)
        and timestamp_col > '2009-01-01 01:00:00'
        and timestamp_col < '2009-02-01 01:00:00';
    
    Before this change query rewrites added the following predicates:
    date_col > DATE '2009-01-01' AND date_col < DATE '2009-02-01'
    This incorrectly rejected all timestamps on the days of the
    lower / upper bounds.
    
    The fix is to rewrite < and > to <= and >= in the date predicates.
    
    < could be kept if the upper bound is a constant with no time-of-day
    part, e.g. timestamp_col < "2009-01-01" could be rewritten to
    date_col < "2009-01-01", but this optimization is not added in this
    patch to make it simpler.
    
    Testing:
    - added planner + EE regression tests
    
    Change-Id: I1938bf5e91057b220daf8a1892940f674aac3d68
    Reviewed-on: http://gerrit.cloudera.org:8080/19572
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../impala/analysis/ConstantPredicateHandler.java  |  9 ++++++
 .../queries/PlannerTest/constant-propagation.test  | 36 ++++++++++++++++++++--
 .../QueryTest/range-constant-propagation.test      | 21 +++++++++++++
 3 files changed, 64 insertions(+), 2 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/ConstantPredicateHandler.java b/fe/src/main/java/org/apache/impala/analysis/ConstantPredicateHandler.java
index 36ae91360..1cf5e3822 100644
--- a/fe/src/main/java/org/apache/impala/analysis/ConstantPredicateHandler.java
+++ b/fe/src/main/java/org/apache/impala/analysis/ConstantPredicateHandler.java
@@ -172,6 +172,15 @@ public class ConstantPredicateHandler {
 
   private Expr rewriteWithOp(BinaryPredicate pred, BinaryPredicate.Operator newOp,
     Analyzer analyzer) {
+    // Allow equality to avoid problems with truncating the time-of-day part of
+    // timestamps (IMPALA-11960).
+    // TODO: the original (stricter) op could be kept if the upper bound is a
+    //       TimestampLiteral without time component, e.g ts_col < "2000-01-01"
+    if (newOp == BinaryPredicate.Operator.LT) {
+      newOp = BinaryPredicate.Operator.LE;
+    } else if (newOp == BinaryPredicate.Operator.GT) {
+      newOp = BinaryPredicate.Operator.GE;
+    }
     if (pred.getOp() == newOp) return pred;
     BinaryPredicate newPred = new BinaryPredicate(newOp, pred.getChild(0),
         pred.getChild(1));
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/constant-propagation.test b/testdata/workloads/functional-planner/queries/PlannerTest/constant-propagation.test
index 350cfbddf..3455e227c 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/constant-propagation.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/constant-propagation.test
@@ -434,6 +434,38 @@ PLAN-ROOT SINK
    predicates: functional.alltypes_date_partition.timestamp_col <= TIMESTAMP '2009-02-01 00:00:00', functional.alltypes_date_partition.timestamp_col >= TIMESTAMP '2009-01-01 00:00:00', date_col = CAST(timestamp_col AS DATE)
    row-size=65B cardinality=30
 ====
+# Same as above but using < > and timestamps with time-of-day parts.
+# Regression test for IMPALA-11960.
+with dp_view as
+(select * from functional.alltypes_date_partition
+where date_col = cast(timestamp_col as date))
+select * from dp_view
+where timestamp_col > '2009-01-01 01:00:00' and timestamp_col < '2009-02-01 01:00:00';
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [functional.alltypes_date_partition]
+   partition predicates: date_col >= DATE '2009-01-01' AND date_col <= DATE '2009-02-01'
+   HDFS partitions=32/55 files=32 size=15.99KB
+   predicates: functional.alltypes_date_partition.timestamp_col < TIMESTAMP '2009-02-01 01:00:00', functional.alltypes_date_partition.timestamp_col > TIMESTAMP '2009-01-01 01:00:00', date_col = CAST(timestamp_col AS DATE)
+   row-size=65B cardinality=30
+====
+# Same as above but using >= instead of >.
+# Regression test for IMPALA-11960.
+with dp_view as
+(select * from functional.alltypes_date_partition
+where date_col = cast(timestamp_col as date))
+select * from dp_view
+where timestamp_col >= '2009-01-01 01:00:00' and timestamp_col < '2009-02-01 01:00:00';
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [functional.alltypes_date_partition]
+   partition predicates: date_col >= DATE '2009-01-01' AND date_col <= DATE '2009-02-01'
+   HDFS partitions=32/55 files=32 size=15.99KB
+   predicates: functional.alltypes_date_partition.timestamp_col < TIMESTAMP '2009-02-01 01:00:00', functional.alltypes_date_partition.timestamp_col >= TIMESTAMP '2009-01-01 01:00:00', date_col = CAST(timestamp_col AS DATE)
+   row-size=65B cardinality=30
+====
 # Mix of various predicates some of which are eligible for propagation
 with dp_view as
 (select * from functional.alltypes_date_partition
@@ -521,8 +553,8 @@ and timestamp_col > from_timestamp('2009-02-01 00:02:00', 'yyyy-mm-dd');
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [functional.alltypes_date_partition]
-   partition predicates: date_col > DATE '2009-02-01'
-   HDFS partitions=23/55 files=23 size=11.28KB
+   partition predicates: date_col >= DATE '2009-02-01'
+   HDFS partitions=24/55 files=24 size=11.82KB
    predicates: timestamp_col > TIMESTAMP '2009-02-01 00:00:00', date_col = CAST(timestamp_col AS DATE)
    row-size=65B cardinality=21
 ====
diff --git a/testdata/workloads/functional-query/queries/QueryTest/range-constant-propagation.test b/testdata/workloads/functional-query/queries/QueryTest/range-constant-propagation.test
index aa714147d..ba7b3b11d 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/range-constant-propagation.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/range-constant-propagation.test
@@ -10,6 +10,27 @@ and timestamp_col between '2009-01-01' and '2009-02-01';
 BIGINT, BIGINT
 ====
 ---- QUERY
+# Same query as above with time-of-day parts. Regression test for IMPALA-11960.
+select count(*), sum(int_col) from alltypes_date_partition
+where date_col = cast(timestamp_col as date)
+and timestamp_col > '2009-01-01 01:00:00' and timestamp_col < '2009-02-01 01:00:00';
+---- RESULTS
+155,620
+---- TYPES
+BIGINT, BIGINT
+====
+---- QUERY
+# Same as above but using >= instead of > and using a lower bound that matches with
+# a row. Regression test for IMPALA-11960.
+select count(*), sum(int_col) from alltypes_date_partition
+where date_col = cast(timestamp_col as date)
+and timestamp_col >= '2009-01-01 00:08:00.280' and timestamp_col < '2009-02-01 01:00:00';
+---- RESULTS
+156,628
+---- TYPES
+BIGINT, BIGINT
+====
+---- QUERY
 # Mix of various predicates some of which are eligible for propagation
 with dp_view as
 (select * from alltypes_date_partition