You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by bl...@apache.org on 2020/05/26 23:56:06 UTC

[incubator-iceberg] branch master updated: ORC: Disable predicate pushdown for timestamp type (#1069)

This is an automated email from the ASF dual-hosted git repository.

blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-iceberg.git


The following commit(s) were added to refs/heads/master by this push:
     new 9583787  ORC: Disable predicate pushdown for timestamp type (#1069)
9583787 is described below

commit 9583787eb428a1d2015ba08ceabe43521fc2bbef
Author: Shardul Mahadik <sm...@linkedin.com>
AuthorDate: Tue May 26 16:55:55 2020 -0700

    ORC: Disable predicate pushdown for timestamp type (#1069)
    
    * Correctly handle timestamps less than epoch
    * Disable ORC pushdown for timestamp types because of ORC-611
---
 .../java/org/apache/iceberg/data/TestLocalScan.java     |  2 --
 .../iceberg/data/TestMetricsRowGroupFilterTypes.java    | 13 +++++++------
 .../apache/iceberg/orc/ExpressionToSearchArgument.java  |  7 +++++--
 .../iceberg/orc/TestExpressionToSearchArgument.java     | 17 ++++++++++++-----
 4 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/data/src/test/java/org/apache/iceberg/data/TestLocalScan.java b/data/src/test/java/org/apache/iceberg/data/TestLocalScan.java
index f51fb5e..cd80f23 100644
--- a/data/src/test/java/org/apache/iceberg/data/TestLocalScan.java
+++ b/data/src/test/java/org/apache/iceberg/data/TestLocalScan.java
@@ -56,7 +56,6 @@ import org.apache.iceberg.relocated.com.google.common.collect.Lists;
 import org.apache.iceberg.relocated.com.google.common.collect.Sets;
 import org.apache.iceberg.types.Types;
 import org.junit.Assert;
-import org.junit.Assume;
 import org.junit.Before;
 import org.junit.Rule;
 import org.junit.Test;
@@ -457,7 +456,6 @@ public class TestLocalScan {
 
   @Test
   public void testFilterWithDateAndTimestamp() throws IOException {
-    Assume.assumeFalse(format == FileFormat.ORC);
     Schema schema = new Schema(
         required(1, "timestamp_with_zone", Types.TimestampType.withZone()),
         required(2, "timestamp_without_zone", Types.TimestampType.withoutZone()),
diff --git a/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilterTypes.java b/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilterTypes.java
index 8647a9c..17d3b99 100644
--- a/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilterTypes.java
+++ b/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilterTypes.java
@@ -253,12 +253,13 @@ public class TestMetricsRowGroupFilterTypes {
         new Object[] { "orc", "double", 2.11d, 1.97d },
         new Object[] { "orc", "date", "2018-06-29", "2018-05-03" },
         new Object[] { "orc", "time", "10:02:34.000000", "10:02:34.000001" },
-        new Object[] { "orc", "timestamp",
-            "2018-06-29T10:02:34.000000",
-            "2018-06-29T15:02:34.000000" },
-        new Object[] { "orc", "timestamptz",
-            "2018-06-29T10:02:34.000000+00:00",
-            "2018-06-29T10:02:34.000000-07:00" },
+        // Temporarily disable filters on Timestamp columns due to ORC-611
+        // new Object[] { "orc", "timestamp",
+        //     "2018-06-29T10:02:34.000000",
+        //     "2018-06-29T15:02:34.000000" },
+        // new Object[] { "orc", "timestamptz",
+        //     "2018-06-29T10:02:34.000000+00:00",
+        //     "2018-06-29T10:02:34.000000-07:00" },
         new Object[] { "orc", "string", "tapir", "monthly" },
         // uuid, fixed and binary types not supported yet
         // new Object[] { "orc", "uuid", uuid, UUID.randomUUID() },
diff --git a/orc/src/main/java/org/apache/iceberg/orc/ExpressionToSearchArgument.java b/orc/src/main/java/org/apache/iceberg/orc/ExpressionToSearchArgument.java
index 8f25e1f..716e92e 100644
--- a/orc/src/main/java/org/apache/iceberg/orc/ExpressionToSearchArgument.java
+++ b/orc/src/main/java/org/apache/iceberg/orc/ExpressionToSearchArgument.java
@@ -53,6 +53,7 @@ class ExpressionToSearchArgument extends ExpressionVisitors.BoundVisitor<Express
 
   // Currently every predicate in ORC requires a PredicateLeaf.Type field which is not available for these Iceberg types
   private static final Set<TypeID> UNSUPPORTED_TYPES = ImmutableSet.of(
+      TypeID.TIMESTAMP, // Temporarily disable filters on Timestamp columns due to ORC-611
       TypeID.BINARY,
       TypeID.FIXED,
       TypeID.UUID,
@@ -254,8 +255,10 @@ class ExpressionToSearchArgument extends ExpressionVisitors.BoundVisitor<Express
         return Date.valueOf(LocalDate.ofEpochDay((Integer) icebergLiteral));
       case TIMESTAMP:
         long microsFromEpoch = (Long) icebergLiteral;
-        return Timestamp.from(Instant.ofEpochSecond(Math.floorDiv(microsFromEpoch, 1_000_000),
-            (microsFromEpoch % 1_000_000) * 1_000));
+        return Timestamp.from(Instant.ofEpochSecond(
+            Math.floorDiv(microsFromEpoch, 1_000_000),
+            Math.floorMod(microsFromEpoch, 1_000_000) * 1_000
+        ));
       case DECIMAL:
         return new HiveDecimalWritable(HiveDecimal.create((BigDecimal) icebergLiteral, false));
       default:
diff --git a/orc/src/test/java/org/apache/iceberg/orc/TestExpressionToSearchArgument.java b/orc/src/test/java/org/apache/iceberg/orc/TestExpressionToSearchArgument.java
index f09d811..9ddf8d0 100644
--- a/orc/src/test/java/org/apache/iceberg/orc/TestExpressionToSearchArgument.java
+++ b/orc/src/test/java/org/apache/iceberg/orc/TestExpressionToSearchArgument.java
@@ -22,10 +22,12 @@ package org.apache.iceberg.orc;
 import java.math.BigDecimal;
 import java.nio.ByteBuffer;
 import java.sql.Date;
-import java.sql.Timestamp;
 import java.time.Instant;
 import java.time.LocalDate;
+import java.time.OffsetDateTime;
+import java.time.ZoneOffset;
 import java.time.format.DateTimeFormatter;
+import java.time.temporal.ChronoUnit;
 import java.util.TimeZone;
 import java.util.UUID;
 import org.apache.iceberg.Schema;
@@ -105,6 +107,10 @@ public class TestExpressionToSearchArgument {
   public void testTimezoneSensitiveTypes() {
     for (String timezone : new String[]{"America/New_York", "Asia/Kolkata", "UTC/Greenwich"}) {
       TimeZone.setDefault(TimeZone.getTimeZone(timezone));
+      OffsetDateTime tsTzPredicate = OffsetDateTime.parse("2019-10-02T00:47:28.207366Z");
+      OffsetDateTime tsPredicate = OffsetDateTime.parse("1968-01-16T13:07:59.048625Z");
+      OffsetDateTime epoch = Instant.ofEpochSecond(0).atOffset(ZoneOffset.UTC);
+
       Schema schema = new Schema(
           required(1, "date", Types.DateType.get()),
           required(2, "tsTz", Types.TimestampType.withZone()),
@@ -112,15 +118,16 @@ public class TestExpressionToSearchArgument {
       );
 
       Expression expr = and(
-          and(equal("date", 10L), equal("tsTz", 10 * 3600 * 1000000L)),
-          equal("ts", 20 * 3600 * 1000000L)
+          and(equal("date", 10L), equal("tsTz", ChronoUnit.MICROS.between(epoch, tsTzPredicate))),
+          equal("ts", ChronoUnit.MICROS.between(epoch, tsPredicate))
       );
       Expression boundFilter = Binder.bind(schema.asStruct(), expr, true);
       SearchArgument expected = SearchArgumentFactory.newBuilder()
           .startAnd()
           .equals("`date`", Type.DATE, Date.valueOf(LocalDate.parse("1970-01-11", DateTimeFormatter.ISO_LOCAL_DATE)))
-          .equals("`tsTz`", Type.TIMESTAMP, Timestamp.from(Instant.ofEpochSecond(10 * 3600)))
-          .equals("`ts`", Type.TIMESTAMP, Timestamp.from(Instant.ofEpochSecond(20 * 3600)))
+          // Temporarily disable filters on Timestamp columns due to ORC-611
+          // .equals("`tsTz`", Type.TIMESTAMP, Timestamp.from(tsTzPredicate.toInstant()))
+          // .equals("`ts`", Type.TIMESTAMP, Timestamp.from(tsPredicate.toInstant()))
           .end()
           .build();