You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2019/10/02 20:33:36 UTC

[orc] 02/02: ORC-554: Float to timestamp schema evolution should handle overflow.

This is an automated email from the ASF dual-hosted git repository.

omalley pushed a commit to branch branch-1.6
in repository https://gitbox.apache.org/repos/asf/orc.git

commit 2f1cc766ee19e655bd7db16f36f6790f6ea93ccf
Author: Laszlo Bodor <bo...@gmail.com>
AuthorDate: Thu Sep 12 09:21:46 2019 +0200

    ORC-554: Float to timestamp schema evolution should handle overflow.
    
    Fixes #431
    
    Signed-off-by: Owen O'Malley <om...@apache.org>
---
 java/core/src/findbugs/exclude.xml                 |   6 --
 .../apache/orc/impl/ConvertTreeReaderFactory.java  |  18 +++-
 .../org/apache/orc/impl/TestSchemaEvolution.java   | 101 +++++++++++++++++++++
 3 files changed, 116 insertions(+), 9 deletions(-)

diff --git a/java/core/src/findbugs/exclude.xml b/java/core/src/findbugs/exclude.xml
index 76d395a..6112afd 100644
--- a/java/core/src/findbugs/exclude.xml
+++ b/java/core/src/findbugs/exclude.xml
@@ -60,11 +60,5 @@
   <Match>
     <Bug pattern="RCN_REDUNDANT_NULLCHECK_WOULD_HAVE_BEEN_A_NPE"/>
     <Class name="org.apache.orc.impl.TestSchemaEvolution"/>
-    <Method name="testEvolutionFromTimestamp"/>
-  </Match>
-  <Match>
-    <Bug pattern="RCN_REDUNDANT_NULLCHECK_WOULD_HAVE_BEEN_A_NPE"/>
-    <Class name="org.apache.orc.impl.TestSchemaEvolution"/>
-    <Method name="testEvolutionToTimestamp"/>
   </Match>
 </FindBugsFilter>
diff --git a/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java b/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
index a6c158b..1ea870a 100644
--- a/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
+++ b/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
@@ -1409,9 +1409,21 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
         seconds = SerializationUtils.convertFromUtc(local, seconds);
       }
       long wholeSec = (long) Math.floor(seconds);
-      timestampColVector.time[elementNum] = wholeSec * 1000;
-      timestampColVector.nanos[elementNum] =
-          1_000_000 * (int) Math.round((seconds - wholeSec) * 1000);
+
+      // overflow
+      double doubleMillis = seconds * 1000;
+      long millis = wholeSec * 1000;
+      if (doubleMillis > Long.MAX_VALUE || doubleMillis < Long.MIN_VALUE ||
+              ((millis >= 0) != (doubleMillis >= 0))) {
+        timestampColVector.time[elementNum] = 0L;
+        timestampColVector.nanos[elementNum] = 0;
+        timestampColVector.isNull[elementNum] = true;
+        timestampColVector.noNulls = false;
+      } else {
+        timestampColVector.time[elementNum] = wholeSec * 1000;
+        timestampColVector.nanos[elementNum] =
+            1_000_000 * (int) Math.round((seconds - wholeSec) * 1000);
+      }
     }
 
     @Override
diff --git a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
index 1dda07e..304ee4b 100644
--- a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
+++ b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
@@ -2263,4 +2263,105 @@ public class TestSchemaEvolution {
       TimeZone.setDefault(oldDefault);
     }
   }
+
+  @Test
+  public void doubleToTimeStampOverflow() throws Exception {
+    floatAndDoubleToTimeStampOverflow("double",
+        340282347000000000000000000000000000000000.0,
+        1e16,
+        9223372036854775.0,
+        9000000000000000.1,
+        10000000000.0,
+        10000000.123,
+        -1000000.123,
+        -10000000000.0,
+        -9000000000000000.1,
+        -9223372036854775.0,
+        -1e16,
+        -340282347000000000000000000000000000000000.0);
+  }
+
+  @Test
+  public void floatToTimeStampPositiveOverflow() throws Exception {
+    floatAndDoubleToTimeStampOverflow("float",
+        340282347000000000000000000000000000000000.0,
+        1e16,
+        9223372036854775.0,
+        9000000000000000.1,
+        10000000000.0,
+        10000000.123,
+        -1000000.123,
+        -10000000000.0,
+        -9000000000000000.1,
+        -9223372036854775.0,
+        -1e16,
+        -340282347000000000000000000000000000000000.0);
+  }
+
+  private void floatAndDoubleToTimeStampOverflow(String typeInFileSchema,
+                                                 double... values) throws Exception {
+    boolean isFloat = typeInFileSchema.equals("float");
+    TypeDescription fileSchema =
+        TypeDescription.fromString(String.format("struct<c1:%s>", typeInFileSchema));
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .setSchema(fileSchema)
+            .stripeSize(10000)
+            .useUTCTimestamp(true));
+
+    VectorizedRowBatch batch = fileSchema.createRowBatchV2();
+    DoubleColumnVector fl1 = (DoubleColumnVector) batch.cols[0];
+
+    for (double v : values) {
+      int row = batch.size++;
+      fl1.vector[row] = v;
+
+      if (batch.size == batch.getMaxSize()) {
+        writer.addRowBatch(batch);
+        batch.reset();
+      }
+    }
+    if (batch.size != 0) {
+      writer.addRowBatch(batch);
+    }
+    writer.close();
+
+    TypeDescription readerSchema = TypeDescription.fromString("struct<c1:timestamp>");
+    VectorizedRowBatch batchTimeStamp = readerSchema.createRowBatchV2();
+    TimestampColumnVector t1 = (TimestampColumnVector) batchTimeStamp.cols[0];
+
+    OrcFile.ReaderOptions options = OrcFile
+                                        .readerOptions(conf)
+                                        .useUTCTimestamp(true);
+
+    try (Reader reader = OrcFile.createReader(testFilePath, options);
+         RecordReader rows = reader.rows(reader.options().schema(readerSchema))) {
+      int value = 0;
+      while (value < values.length) {
+        assertTrue("value " + value, rows.nextBatch(batchTimeStamp));
+        for(int row=0; row < batchTimeStamp.size; ++row) {
+          double expected = values[value + row];
+          String rowName = String.format("value %d", value + row);
+          boolean isPositive = ((long)Math.floor(expected) * 1000) >= 0;
+          if (expected * 1000 < Long.MIN_VALUE ||
+                  expected * 1000 > Long.MAX_VALUE ||
+                  ((expected >= 0) != isPositive)) {
+            assertFalse(rowName, t1.noNulls);
+            assertTrue(rowName, t1.isNull[row]);
+          } else {
+            double actual = t1.time[row] / 1000.0 + t1.nanos[row] / 1_000_000_000.0;
+            assertEquals(rowName, expected, actual,
+                Math.abs(expected * (isFloat ? 0.000001 : 0.0000000000000001)));
+            assertFalse(rowName, t1.isNull[row]);
+            assertTrue(String.format(
+                "%s nanos should be 0 to 1,000,000,000 instead it's: %d",
+                rowName, t1.nanos[row]),
+                t1.nanos[row] >= 0 && t1.nanos[row] < 1_000_000_000);
+          }
+        }
+        value += batchTimeStamp.size;
+      }
+      assertFalse(rows.nextBatch(batchTimeStamp));
+    }
+  }
 }