You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by do...@apache.org on 2021/03/23 19:52:38 UTC

[orc] branch branch-1.6 updated: ORC-771: ORC timestamp consistency Test for sql.Timestamps close to epoch (#666)

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-1.6
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/branch-1.6 by this push:
     new af5eb61  ORC-771: ORC timestamp consistency Test for sql.Timestamps close to epoch (#666)
af5eb61 is described below

commit af5eb610aca5540973b9bad17e98291ef41f0c78
Author: Panagiotis Garefalakis <pg...@apache.org>
AuthorDate: Tue Mar 23 19:50:04 2021 +0000

    ORC-771: ORC timestamp consistency Test for sql.Timestamps close to epoch (#666)
    
    ### What changes were proposed in this pull request?
    ORC-763 has a long discussion about the sql.Timestamp bug and how ORC goes around it.
    
    ### Why are the changes needed?
    This ticket introduces a Test to ensure consistency with the expected behaviour – as we have on C++ side of things.
    
    ### How was this patch tested?
    TestVectorOrcFile.testTimestampBug
    
    (cherry picked from commit 9904b8e00575d4427beb23339f2049535c52e5fc)
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
 .../src/test/org/apache/orc/TestVectorOrcFile.java | 57 ++++++++++++++++++++++
 1 file changed, 57 insertions(+)

diff --git a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
index 485f979..a5a179d 100644
--- a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
+++ b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
@@ -79,6 +79,7 @@ import java.security.MessageDigest;
 import java.security.NoSuchAlgorithmException;
 import java.sql.Date;
 import java.sql.Timestamp;
+import java.time.Instant;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
@@ -381,6 +382,62 @@ public class TestVectorOrcFile {
   }
 
   @Test
+  public void testTimestampBug() throws IOException {
+    TypeDescription schema = TypeDescription.createTimestamp();
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf).setSchema(schema).stripeSize(100000)
+            .bufferSize(10000).version(fileFormat));
+    int batchCount = 5;
+    VectorizedRowBatch batch = schema.createRowBatch(batchCount * 2);;
+    TimestampColumnVector vec = (TimestampColumnVector) batch.cols[0];
+    int[] seconds = new int[]{ -2, -1, 0, 1, 2 };
+    // write 1st batch with nanosecond <= 999999
+    int nanos = 999_999;
+    for (int i = 0; i < batchCount; i++) {
+      Timestamp curr = Timestamp.from(Instant.ofEpochSecond(seconds[i]));
+      curr.setNanos(nanos);
+      vec.set(i, curr);
+    }
+
+    batch.size = batchCount;
+    writer.addRowBatch(batch);
+
+    nanos = 1_000_000;
+    // write 2nd batch with nanosecond > 999999
+    for (int i = 0; i < batchCount; i++) {
+      Timestamp curr = Timestamp.from(Instant.ofEpochSecond(seconds[i]));
+      curr.setNanos(nanos);
+      vec.set(i, curr);
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch(batchCount);
+    TimestampColumnVector timestamps = (TimestampColumnVector) batch.cols[0];
+    rows.nextBatch(batch);
+    // read 1st batch with nanosecond <= 999999
+    for (int r=0; r < batchCount; ++r) {
+      assertEquals(seconds[r], timestamps.getTimestampAsLong(r));
+      assertEquals(999_999, timestamps.nanos[r]);
+    }
+    rows.nextBatch(batch);
+    // read 2nd batch with nanosecond > 999999
+    for (int r=0; r < batchCount; ++r) {
+      if (seconds[r] == -1) {
+        // reproduce the JDK bug of java.sql.Timestamp see ORC-763
+        // Wrong extra second: 1969-12-31 23.59.59.001 -> 1970-01-01 00.00.00.001
+        assertEquals(0, timestamps.getTimestampAsLong(r));
+      } else {
+        assertEquals(seconds[r], timestamps.getTimestampAsLong(r));
+      }
+      assertEquals(1_000_000, timestamps.nanos[r]);
+    }
+  }
+
+  @Test
   public void testTimestamp() throws Exception {
     TypeDescription schema = TypeDescription.createTimestamp();
     Writer writer = OrcFile.createWriter(testFilePath,