You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2021/05/19 01:36:55 UTC

[impala] 05/05: IMPALA-10433: Use Iceberg's fixed partition transforms

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 824b39e82967261f5a40f26318cf0913751b5134
Author: Zoltan Borok-Nagy <bo...@cloudera.com>
AuthorDate: Mon May 10 15:35:16 2021 +0200

    IMPALA-10433: Use Iceberg's fixed partition transforms
    
    Because of an Iceberg bug Impala didn't push predicates to
    Iceberg for dates/timestamps when the predicate referred to a
    value before the UNIX epoch.
    
    https://github.com/apache/iceberg/pull/1981 fixed the Iceberg
    bug, and lately Impala switched to an Iceberg version that has
    the fix, therefore this patch enables predicate pushdown for all
    timestamp/date values.
    
    The above Iceberg patch maintains backward compatibility with the
    old, wrong behavior. Therefore sometimes we need to read plus one
    Iceberg partition than necessary.
    
    Testing:
     * Updated current e2e tests
    
    Change-Id: Ie67f41a53f21c7bdb8449ca0d27746158be7675a
    Reviewed-on: http://gerrit.cloudera.org:8080/17417
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../java/org/apache/impala/planner/IcebergScanNode.java  | 13 ++-----------
 .../QueryTest/iceberg-partition-transform-insert.test    | 16 ++++++++--------
 2 files changed, 10 insertions(+), 19 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java b/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java
index f700deb..f719735 100644
--- a/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java
@@ -195,21 +195,12 @@ public class IcebergScanNode extends HdfsScanNode {
       }
       case TIMESTAMP: {
         long unixMicros = KuduUtil.timestampToUnixTimeMicros(analyzer, literal);
-        if (unixMicros >= 0) {
-          // Iceberg's partition transformations have a bug for times before the epoch.
-          // See https://github.com/apache/iceberg/pull/1981
-          // TODO: IMPALA-10433 remove the workarounds once we use an Iceberg version
-          // that contains the fix.
-          unboundPredicate = Expressions.predicate(op, colName, unixMicros);
-        }
+        unboundPredicate = Expressions.predicate(op, colName, unixMicros);
         break;
       }
       case DATE: {
         int daysSinceEpoch = ((DateLiteral) literal).getValue();
-        if (daysSinceEpoch >= 0) {
-          // See comment at TIMESTAMP.
-          unboundPredicate = Expressions.predicate(op, colName, daysSinceEpoch);
-        }
+        unboundPredicate = Expressions.predicate(op, colName, daysSinceEpoch);
         break;
       }
       case DECIMAL: {
diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-partition-transform-insert.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-partition-transform-insert.test
index 06979be..0c9c623 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-partition-transform-insert.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-partition-transform-insert.test
@@ -402,7 +402,7 @@ where t = '1969-02-15 13:55:03';
 TIMESTAMP,DATE
 ---- RUNTIME_PROFILE
 aggregation(SUM, RowsRead): 2
-aggregation(SUM, NumRowGroups): 3
+aggregation(SUM, NumRowGroups): 2
 ====
 ---- QUERY
 # Test partition pruning with RUNTIME_PROFILE.
@@ -452,7 +452,7 @@ where d = '1969-12-15';
 TIMESTAMP,DATE
 ---- RUNTIME_PROFILE
 aggregation(SUM, RowsRead): 2
-aggregation(SUM, NumRowGroups): 3
+aggregation(SUM, NumRowGroups): 2
 ====
 ---- QUERY
 # Create table with MONTH partition transform
@@ -524,7 +524,7 @@ where t = '1969-02-15 13:55:03';
 TIMESTAMP,DATE
 ---- RUNTIME_PROFILE
 aggregation(SUM, RowsRead): 1
-aggregation(SUM, NumRowGroups): 5
+aggregation(SUM, NumRowGroups): 1
 ====
 ---- QUERY
 # Test partition pruning with RUNTIME_PROFILE.
@@ -560,7 +560,7 @@ where d = '1969-12-15';
 TIMESTAMP,DATE
 ---- RUNTIME_PROFILE
 aggregation(SUM, RowsRead): 1
-aggregation(SUM, NumRowGroups): 5
+aggregation(SUM, NumRowGroups): 2
 ====
 ---- QUERY
 # Create table with DAY partition transform
@@ -648,7 +648,7 @@ where t = '1969-02-15 13:55:03';
 TIMESTAMP,DATE
 ---- RUNTIME_PROFILE
 aggregation(SUM, RowsRead): 1
-aggregation(SUM, NumRowGroups): 6
+aggregation(SUM, NumRowGroups): 1
 ====
 ---- QUERY
 # Test partition pruning with RUNTIME_PROFILE.
@@ -684,7 +684,7 @@ where d = '1969-12-15';
 TIMESTAMP,DATE
 ---- RUNTIME_PROFILE
 aggregation(SUM, RowsRead): 1
-aggregation(SUM, NumRowGroups): 6
+aggregation(SUM, NumRowGroups): 1
 ====
 ---- QUERY
 # Create table with HOUR partition transform
@@ -739,7 +739,7 @@ where t = '1969-12-31 22:55:03';
 TIMESTAMP
 ---- RUNTIME_PROFILE
 aggregation(SUM, RowsRead): 1
-aggregation(SUM, NumRowGroups): 6
+aggregation(SUM, NumRowGroups): 2
 ====
 ---- QUERY
 # Test partition pruning with RUNTIME_PROFILE.
@@ -751,7 +751,7 @@ where t = '1969-12-31 23:55:03';
 TIMESTAMP
 ---- RUNTIME_PROFILE
 aggregation(SUM, RowsRead): 2
-aggregation(SUM, NumRowGroups): 6
+aggregation(SUM, NumRowGroups): 2
 ====
 ---- QUERY
 # Test partition pruning with RUNTIME_PROFILE.