You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by do...@apache.org on 2021/03/23 19:52:38 UTC
[orc] branch branch-1.6 updated: ORC-771: ORC timestamp consistency
Test for sql.Timestamps close to epoch (#666)
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-1.6
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/branch-1.6 by this push:
new af5eb61 ORC-771: ORC timestamp consistency Test for sql.Timestamps close to epoch (#666)
af5eb61 is described below
commit af5eb610aca5540973b9bad17e98291ef41f0c78
Author: Panagiotis Garefalakis <pg...@apache.org>
AuthorDate: Tue Mar 23 19:50:04 2021 +0000
ORC-771: ORC timestamp consistency Test for sql.Timestamps close to epoch (#666)
### What changes were proposed in this pull request?
ORC-763 has a long discussion about the sql.Timestamp bug and how ORC goes around it.
### Why are the changes needed?
This ticket introduces a Test to ensure consistency with the expected behaviour – as we have on C++ side of things.
### How was this patch tested?
TestVectorOrcFile.testTimestampBug
(cherry picked from commit 9904b8e00575d4427beb23339f2049535c52e5fc)
Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
.../src/test/org/apache/orc/TestVectorOrcFile.java | 57 ++++++++++++++++++++++
1 file changed, 57 insertions(+)
diff --git a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
index 485f979..a5a179d 100644
--- a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
+++ b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
@@ -79,6 +79,7 @@ import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.sql.Date;
import java.sql.Timestamp;
+import java.time.Instant;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
@@ -381,6 +382,62 @@ public class TestVectorOrcFile {
}
@Test
+ public void testTimestampBug() throws IOException {
+ TypeDescription schema = TypeDescription.createTimestamp();
+ Writer writer = OrcFile.createWriter(testFilePath,
+ OrcFile.writerOptions(conf).setSchema(schema).stripeSize(100000)
+ .bufferSize(10000).version(fileFormat));
+ int batchCount = 5;
+ VectorizedRowBatch batch = schema.createRowBatch(batchCount * 2);;
+ TimestampColumnVector vec = (TimestampColumnVector) batch.cols[0];
+ int[] seconds = new int[]{ -2, -1, 0, 1, 2 };
+ // write 1st batch with nanosecond <= 999999
+ int nanos = 999_999;
+ for (int i = 0; i < batchCount; i++) {
+ Timestamp curr = Timestamp.from(Instant.ofEpochSecond(seconds[i]));
+ curr.setNanos(nanos);
+ vec.set(i, curr);
+ }
+
+ batch.size = batchCount;
+ writer.addRowBatch(batch);
+
+ nanos = 1_000_000;
+ // write 2nd batch with nanosecond > 999999
+ for (int i = 0; i < batchCount; i++) {
+ Timestamp curr = Timestamp.from(Instant.ofEpochSecond(seconds[i]));
+ curr.setNanos(nanos);
+ vec.set(i, curr);
+ }
+ writer.addRowBatch(batch);
+ writer.close();
+
+ Reader reader = OrcFile.createReader(testFilePath,
+ OrcFile.readerOptions(conf).filesystem(fs));
+ RecordReader rows = reader.rows();
+ batch = reader.getSchema().createRowBatch(batchCount);
+ TimestampColumnVector timestamps = (TimestampColumnVector) batch.cols[0];
+ rows.nextBatch(batch);
+ // read 1st batch with nanosecond <= 999999
+ for (int r=0; r < batchCount; ++r) {
+ assertEquals(seconds[r], timestamps.getTimestampAsLong(r));
+ assertEquals(999_999, timestamps.nanos[r]);
+ }
+ rows.nextBatch(batch);
+ // read 2nd batch with nanosecond > 999999
+ for (int r=0; r < batchCount; ++r) {
+ if (seconds[r] == -1) {
+ // reproduce the JDK bug of java.sql.Timestamp see ORC-763
+ // Wrong extra second: 1969-12-31 23.59.59.001 -> 1970-01-01 00.00.00.001
+ assertEquals(0, timestamps.getTimestampAsLong(r));
+ } else {
+ assertEquals(seconds[r], timestamps.getTimestampAsLong(r));
+ }
+ assertEquals(1_000_000, timestamps.nanos[r]);
+ }
+ }
+
+ @Test
public void testTimestamp() throws Exception {
TypeDescription schema = TypeDescription.createTimestamp();
Writer writer = OrcFile.createWriter(testFilePath,