You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2021/05/19 01:36:55 UTC
[impala] 05/05: IMPALA-10433: Use Iceberg's fixed partition
transforms
This is an automated email from the ASF dual-hosted git repository.
stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 824b39e82967261f5a40f26318cf0913751b5134
Author: Zoltan Borok-Nagy <bo...@cloudera.com>
AuthorDate: Mon May 10 15:35:16 2021 +0200
IMPALA-10433: Use Iceberg's fixed partition transforms
Because of an Iceberg bug Impala didn't push predicates to
Iceberg for dates/timestamps when the predicate referred to a
value before the UNIX epoch.
https://github.com/apache/iceberg/pull/1981 fixed the Iceberg
bug, and lately Impala switched to an Iceberg version that has
the fix, therefore this patch enables predicate pushdown for all
timestamp/date values.
The above Iceberg patch maintains backward compatibility with the
old, wrong behavior. Therefore sometimes we need to read plus one
Iceberg partition than necessary.
Testing:
* Updated current e2e tests
Change-Id: Ie67f41a53f21c7bdb8449ca0d27746158be7675a
Reviewed-on: http://gerrit.cloudera.org:8080/17417
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
.../java/org/apache/impala/planner/IcebergScanNode.java | 13 ++-----------
.../QueryTest/iceberg-partition-transform-insert.test | 16 ++++++++--------
2 files changed, 10 insertions(+), 19 deletions(-)
diff --git a/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java b/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java
index f700deb..f719735 100644
--- a/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java
@@ -195,21 +195,12 @@ public class IcebergScanNode extends HdfsScanNode {
}
case TIMESTAMP: {
long unixMicros = KuduUtil.timestampToUnixTimeMicros(analyzer, literal);
- if (unixMicros >= 0) {
- // Iceberg's partition transformations have a bug for times before the epoch.
- // See https://github.com/apache/iceberg/pull/1981
- // TODO: IMPALA-10433 remove the workarounds once we use an Iceberg version
- // that contains the fix.
- unboundPredicate = Expressions.predicate(op, colName, unixMicros);
- }
+ unboundPredicate = Expressions.predicate(op, colName, unixMicros);
break;
}
case DATE: {
int daysSinceEpoch = ((DateLiteral) literal).getValue();
- if (daysSinceEpoch >= 0) {
- // See comment at TIMESTAMP.
- unboundPredicate = Expressions.predicate(op, colName, daysSinceEpoch);
- }
+ unboundPredicate = Expressions.predicate(op, colName, daysSinceEpoch);
break;
}
case DECIMAL: {
diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-partition-transform-insert.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-partition-transform-insert.test
index 06979be..0c9c623 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-partition-transform-insert.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-partition-transform-insert.test
@@ -402,7 +402,7 @@ where t = '1969-02-15 13:55:03';
TIMESTAMP,DATE
---- RUNTIME_PROFILE
aggregation(SUM, RowsRead): 2
-aggregation(SUM, NumRowGroups): 3
+aggregation(SUM, NumRowGroups): 2
====
---- QUERY
# Test partition pruning with RUNTIME_PROFILE.
@@ -452,7 +452,7 @@ where d = '1969-12-15';
TIMESTAMP,DATE
---- RUNTIME_PROFILE
aggregation(SUM, RowsRead): 2
-aggregation(SUM, NumRowGroups): 3
+aggregation(SUM, NumRowGroups): 2
====
---- QUERY
# Create table with MONTH partition transform
@@ -524,7 +524,7 @@ where t = '1969-02-15 13:55:03';
TIMESTAMP,DATE
---- RUNTIME_PROFILE
aggregation(SUM, RowsRead): 1
-aggregation(SUM, NumRowGroups): 5
+aggregation(SUM, NumRowGroups): 1
====
---- QUERY
# Test partition pruning with RUNTIME_PROFILE.
@@ -560,7 +560,7 @@ where d = '1969-12-15';
TIMESTAMP,DATE
---- RUNTIME_PROFILE
aggregation(SUM, RowsRead): 1
-aggregation(SUM, NumRowGroups): 5
+aggregation(SUM, NumRowGroups): 2
====
---- QUERY
# Create table with DAY partition transform
@@ -648,7 +648,7 @@ where t = '1969-02-15 13:55:03';
TIMESTAMP,DATE
---- RUNTIME_PROFILE
aggregation(SUM, RowsRead): 1
-aggregation(SUM, NumRowGroups): 6
+aggregation(SUM, NumRowGroups): 1
====
---- QUERY
# Test partition pruning with RUNTIME_PROFILE.
@@ -684,7 +684,7 @@ where d = '1969-12-15';
TIMESTAMP,DATE
---- RUNTIME_PROFILE
aggregation(SUM, RowsRead): 1
-aggregation(SUM, NumRowGroups): 6
+aggregation(SUM, NumRowGroups): 1
====
---- QUERY
# Create table with HOUR partition transform
@@ -739,7 +739,7 @@ where t = '1969-12-31 22:55:03';
TIMESTAMP
---- RUNTIME_PROFILE
aggregation(SUM, RowsRead): 1
-aggregation(SUM, NumRowGroups): 6
+aggregation(SUM, NumRowGroups): 2
====
---- QUERY
# Test partition pruning with RUNTIME_PROFILE.
@@ -751,7 +751,7 @@ where t = '1969-12-31 23:55:03';
TIMESTAMP
---- RUNTIME_PROFILE
aggregation(SUM, RowsRead): 2
-aggregation(SUM, NumRowGroups): 6
+aggregation(SUM, NumRowGroups): 2
====
---- QUERY
# Test partition pruning with RUNTIME_PROFILE.