You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2019/10/02 20:33:36 UTC
[orc] 02/02: ORC-554: Float to timestamp schema evolution should
handle overflow.
This is an automated email from the ASF dual-hosted git repository.
omalley pushed a commit to branch branch-1.6
in repository https://gitbox.apache.org/repos/asf/orc.git
commit 2f1cc766ee19e655bd7db16f36f6790f6ea93ccf
Author: Laszlo Bodor <bo...@gmail.com>
AuthorDate: Thu Sep 12 09:21:46 2019 +0200
ORC-554: Float to timestamp schema evolution should handle overflow.
Fixes #431
Signed-off-by: Owen O'Malley <om...@apache.org>
---
java/core/src/findbugs/exclude.xml | 6 --
.../apache/orc/impl/ConvertTreeReaderFactory.java | 18 +++-
.../org/apache/orc/impl/TestSchemaEvolution.java | 101 +++++++++++++++++++++
3 files changed, 116 insertions(+), 9 deletions(-)
diff --git a/java/core/src/findbugs/exclude.xml b/java/core/src/findbugs/exclude.xml
index 76d395a..6112afd 100644
--- a/java/core/src/findbugs/exclude.xml
+++ b/java/core/src/findbugs/exclude.xml
@@ -60,11 +60,5 @@
<Match>
<Bug pattern="RCN_REDUNDANT_NULLCHECK_WOULD_HAVE_BEEN_A_NPE"/>
<Class name="org.apache.orc.impl.TestSchemaEvolution"/>
- <Method name="testEvolutionFromTimestamp"/>
- </Match>
- <Match>
- <Bug pattern="RCN_REDUNDANT_NULLCHECK_WOULD_HAVE_BEEN_A_NPE"/>
- <Class name="org.apache.orc.impl.TestSchemaEvolution"/>
- <Method name="testEvolutionToTimestamp"/>
</Match>
</FindBugsFilter>
diff --git a/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java b/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
index a6c158b..1ea870a 100644
--- a/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
+++ b/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
@@ -1409,9 +1409,21 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
seconds = SerializationUtils.convertFromUtc(local, seconds);
}
long wholeSec = (long) Math.floor(seconds);
- timestampColVector.time[elementNum] = wholeSec * 1000;
- timestampColVector.nanos[elementNum] =
- 1_000_000 * (int) Math.round((seconds - wholeSec) * 1000);
+
+ // overflow
+ double doubleMillis = seconds * 1000;
+ long millis = wholeSec * 1000;
+ if (doubleMillis > Long.MAX_VALUE || doubleMillis < Long.MIN_VALUE ||
+ ((millis >= 0) != (doubleMillis >= 0))) {
+ timestampColVector.time[elementNum] = 0L;
+ timestampColVector.nanos[elementNum] = 0;
+ timestampColVector.isNull[elementNum] = true;
+ timestampColVector.noNulls = false;
+ } else {
+ timestampColVector.time[elementNum] = wholeSec * 1000;
+ timestampColVector.nanos[elementNum] =
+ 1_000_000 * (int) Math.round((seconds - wholeSec) * 1000);
+ }
}
@Override
diff --git a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
index 1dda07e..304ee4b 100644
--- a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
+++ b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
@@ -2263,4 +2263,105 @@ public class TestSchemaEvolution {
TimeZone.setDefault(oldDefault);
}
}
+
+ @Test
+ public void doubleToTimeStampOverflow() throws Exception {
+ floatAndDoubleToTimeStampOverflow("double",
+ 340282347000000000000000000000000000000000.0,
+ 1e16,
+ 9223372036854775.0,
+ 9000000000000000.1,
+ 10000000000.0,
+ 10000000.123,
+ -1000000.123,
+ -10000000000.0,
+ -9000000000000000.1,
+ -9223372036854775.0,
+ -1e16,
+ -340282347000000000000000000000000000000000.0);
+ }
+
+ @Test
+ public void floatToTimeStampPositiveOverflow() throws Exception {
+ floatAndDoubleToTimeStampOverflow("float",
+ 340282347000000000000000000000000000000000.0,
+ 1e16,
+ 9223372036854775.0,
+ 9000000000000000.1,
+ 10000000000.0,
+ 10000000.123,
+ -1000000.123,
+ -10000000000.0,
+ -9000000000000000.1,
+ -9223372036854775.0,
+ -1e16,
+ -340282347000000000000000000000000000000000.0);
+ }
+
+ private void floatAndDoubleToTimeStampOverflow(String typeInFileSchema,
+ double... values) throws Exception {
+ boolean isFloat = typeInFileSchema.equals("float");
+ TypeDescription fileSchema =
+ TypeDescription.fromString(String.format("struct<c1:%s>", typeInFileSchema));
+ Writer writer = OrcFile.createWriter(testFilePath,
+ OrcFile.writerOptions(conf)
+ .setSchema(fileSchema)
+ .stripeSize(10000)
+ .useUTCTimestamp(true));
+
+ VectorizedRowBatch batch = fileSchema.createRowBatchV2();
+ DoubleColumnVector fl1 = (DoubleColumnVector) batch.cols[0];
+
+ for (double v : values) {
+ int row = batch.size++;
+ fl1.vector[row] = v;
+
+ if (batch.size == batch.getMaxSize()) {
+ writer.addRowBatch(batch);
+ batch.reset();
+ }
+ }
+ if (batch.size != 0) {
+ writer.addRowBatch(batch);
+ }
+ writer.close();
+
+ TypeDescription readerSchema = TypeDescription.fromString("struct<c1:timestamp>");
+ VectorizedRowBatch batchTimeStamp = readerSchema.createRowBatchV2();
+ TimestampColumnVector t1 = (TimestampColumnVector) batchTimeStamp.cols[0];
+
+ OrcFile.ReaderOptions options = OrcFile
+ .readerOptions(conf)
+ .useUTCTimestamp(true);
+
+ try (Reader reader = OrcFile.createReader(testFilePath, options);
+ RecordReader rows = reader.rows(reader.options().schema(readerSchema))) {
+ int value = 0;
+ while (value < values.length) {
+ assertTrue("value " + value, rows.nextBatch(batchTimeStamp));
+ for(int row=0; row < batchTimeStamp.size; ++row) {
+ double expected = values[value + row];
+ String rowName = String.format("value %d", value + row);
+ boolean isPositive = ((long)Math.floor(expected) * 1000) >= 0;
+ if (expected * 1000 < Long.MIN_VALUE ||
+ expected * 1000 > Long.MAX_VALUE ||
+ ((expected >= 0) != isPositive)) {
+ assertFalse(rowName, t1.noNulls);
+ assertTrue(rowName, t1.isNull[row]);
+ } else {
+ double actual = t1.time[row] / 1000.0 + t1.nanos[row] / 1_000_000_000.0;
+ assertEquals(rowName, expected, actual,
+ Math.abs(expected * (isFloat ? 0.000001 : 0.0000000000000001)));
+ assertFalse(rowName, t1.isNull[row]);
+ assertTrue(String.format(
+ "%s nanos should be 0 to 1,000,000,000 instead it's: %d",
+ rowName, t1.nanos[row]),
+ t1.nanos[row] >= 0 && t1.nanos[row] < 1_000_000_000);
+ }
+ }
+ value += batchTimeStamp.size;
+ }
+ assertFalse(rows.nextBatch(batchTimeStamp));
+ }
+ }
}