You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2019/08/26 16:05:45 UTC

[orc] branch branch-1.4 updated: ORC-546. Fix reading timestamps with duplicated millis within a second.

This is an automated email from the ASF dual-hosted git repository.

omalley pushed a commit to branch branch-1.4
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/branch-1.4 by this push:
     new 37245bb  ORC-546. Fix reading timestamps with duplicated millis within a second.
37245bb is described below

commit 37245bbfb526c978118e2f1c1dded557a7f029fd
Author: Owen O'Malley <om...@apache.org>
AuthorDate: Fri Aug 23 10:09:13 2019 -0700

    ORC-546. Fix reading timestamps with duplicated millis within a second.
    
    This caused SPARK-27594.
    
    Fixes #420
    
    Signed-off-by: Owen O'Malley <om...@apache.org>
---
 .../src/java/org/apache/orc/impl/TreeReaderFactory.java    |  4 ++--
 java/core/src/test/org/apache/orc/TestVectorOrcFile.java   | 14 ++++++++------
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
index 08a4359..2ac9fb4 100644
--- a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
+++ b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
@@ -975,9 +975,9 @@ public class TreeReaderFactory {
 
       for (int i = 0; i < batchSize; i++) {
         if (result.noNulls || !result.isNull[i]) {
-          final int newNanos = parseNanos(nanos.next());
+          int newNanos = parseNanos(nanos.next());
           long millis = (data.next() + base_timestamp)
-              * WriterImpl.MILLIS_PER_SECOND + newNanos / 1_000_000;
+              * WriterImpl.MILLIS_PER_SECOND;
           if (millis < 0 && newNanos > 999_999) {
             millis -= WriterImpl.MILLIS_PER_SECOND;
           }
diff --git a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
index 182f969..5611bb0 100644
--- a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
+++ b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
@@ -67,6 +67,7 @@ import java.nio.ByteBuffer;
 import java.nio.charset.StandardCharsets;
 import java.sql.Date;
 import java.sql.Timestamp;
+import java.time.format.DateTimeFormatter;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
@@ -1396,16 +1397,17 @@ public class TestVectorOrcFile {
     batch = reader.getSchema().createRowBatch(1000);
     TimestampColumnVector times = (TimestampColumnVector) batch.cols[0];
     LongColumnVector dates = (LongColumnVector) batch.cols[1];
+    DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSSS");
     for (int year = minYear; year < maxYear; ++year) {
       rows.nextBatch(batch);
       assertEquals(1000, batch.size);
       for(int row = 0; row < 1000; ++row) {
-        Timestamp expected = Timestamp.valueOf(
-            String.format("%04d-05-05 12:34:56.%04d", year, 2*row));
-        assertEquals("ms row " + row + " " + expected, expected.getTime(),
-            times.time[row]);
-        assertEquals("nanos row " + row + " " + expected, expected.getNanos(),
-            times.nanos[row]);
+        String expectedStr = String.format("%04d-05-05 12:34:56.%04d", year, 2*row);
+        assertEquals("row " + row, expectedStr,
+            formatter.format(times.asScratchTimestamp(row).toLocalDateTime()));
+        assertEquals(0, times.time[row] % 1000);
+        assertTrue("nano " + row + " = " + times.nanos[row],
+            times.nanos[row] >= 0 && times.nanos[row] < 1_000_000_000);
         assertEquals("year " + year + " row " + row,
             Integer.toString(year) + "-12-25",
             new DateWritable((int) dates.vector[row]).toString());