You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2015/09/16 21:18:32 UTC
[14/50] [abbrv] hive git commit: HIVE-11771: Parquet timestamp
conversion errors (Jimmy, reviewed by Szehon)
HIVE-11771: Parquet timestamp conversion errors (Jimmy, reviewed by Szehon)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1e97b161
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1e97b161
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1e97b161
Branch: refs/heads/spark
Commit: 1e97b16181941f8c21684f4b7a4958b890ef7738
Parents: b1fffd5
Author: Jimmy Xiang <jx...@cloudera.com>
Authored: Wed Sep 9 13:26:06 2015 -0700
Committer: Jimmy Xiang <jx...@cloudera.com>
Committed: Sat Sep 12 14:43:14 2015 -0700
----------------------------------------------------------------------
.../ql/io/parquet/timestamp/NanoTimeUtils.java | 23 +++++++++---
.../serde/TestParquetTimestampUtils.java | 38 +++++++++++++++++++-
2 files changed, 56 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/1e97b161/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java
index 59c9b4a..aace48e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java
@@ -15,6 +15,7 @@ package org.apache.hadoop.hive.ql.io.parquet.timestamp;
import java.sql.Timestamp;
import java.util.Calendar;
+import java.util.GregorianCalendar;
import java.util.TimeZone;
import java.util.concurrent.TimeUnit;
@@ -28,6 +29,7 @@ public class NanoTimeUtils {
static final long NANOS_PER_HOUR = TimeUnit.HOURS.toNanos(1);
static final long NANOS_PER_MINUTE = TimeUnit.MINUTES.toNanos(1);
static final long NANOS_PER_SECOND = TimeUnit.SECONDS.toNanos(1);
+ static final long NANOS_PER_DAY = TimeUnit.DAYS.toNanos(1);
private static final ThreadLocal<Calendar> parquetGMTCalendar = new ThreadLocal<Calendar>();
private static final ThreadLocal<Calendar> parquetLocalCalendar = new ThreadLocal<Calendar>();
@@ -48,14 +50,20 @@ public class NanoTimeUtils {
}
private static Calendar getCalendar(boolean skipConversion) {
- return skipConversion ? getLocalCalendar() : getGMTCalendar();
+ Calendar calendar = skipConversion ? getLocalCalendar() : getGMTCalendar();
+ calendar.clear(); // Reset all fields before reusing this instance
+ return calendar;
}
public static NanoTime getNanoTime(Timestamp ts, boolean skipConversion) {
Calendar calendar = getCalendar(skipConversion);
calendar.setTime(ts);
- JDateTime jDateTime = new JDateTime(calendar.get(Calendar.YEAR),
+ int year = calendar.get(Calendar.YEAR);
+ if (calendar.get(Calendar.ERA) == GregorianCalendar.BC) {
+ year = 1 - year;
+ }
+ JDateTime jDateTime = new JDateTime(year,
calendar.get(Calendar.MONTH) + 1, //java calendar index starting at 1.
calendar.get(Calendar.DAY_OF_MONTH));
int days = jDateTime.getJulianDayNumber();
@@ -74,13 +82,20 @@ public class NanoTimeUtils {
int julianDay = nt.getJulianDay();
long nanosOfDay = nt.getTimeOfDayNanos();
+ long remainder = nanosOfDay;
+ julianDay += remainder / NANOS_PER_DAY;
+ remainder %= NANOS_PER_DAY;
+ if (remainder < 0) {
+ remainder += NANOS_PER_DAY;
+ julianDay--;
+ }
+
JDateTime jDateTime = new JDateTime((double) julianDay);
Calendar calendar = getCalendar(skipConversion);
calendar.set(Calendar.YEAR, jDateTime.getYear());
- calendar.set(Calendar.MONTH, jDateTime.getMonth() - 1); //java calender index starting at 1.
+ calendar.set(Calendar.MONTH, jDateTime.getMonth() - 1); //java calendar index starting at 1.
calendar.set(Calendar.DAY_OF_MONTH, jDateTime.getDay());
- long remainder = nanosOfDay;
int hour = (int) (remainder / (NANOS_PER_HOUR));
remainder = remainder % (NANOS_PER_HOUR);
int minutes = (int) (remainder / (NANOS_PER_MINUTE));
http://git-wip-us.apache.org/repos/asf/hive/blob/1e97b161/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java
index 510ffd1..ec6def5 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java
@@ -16,7 +16,9 @@ package org.apache.hadoop.hive.ql.io.parquet.serde;
import java.sql.Timestamp;
import java.util.Calendar;
import java.util.Date;
+import java.util.GregorianCalendar;
import java.util.TimeZone;
+import java.util.concurrent.TimeUnit;
import junit.framework.Assert;
import junit.framework.TestCase;
@@ -74,7 +76,36 @@ public class TestParquetTimestampUtils extends TestCase {
Timestamp ts2Fetched = NanoTimeUtils.getTimestamp(nt2, false);
Assert.assertEquals(ts2Fetched, ts2);
Assert.assertEquals(nt2.getJulianDay() - nt1.getJulianDay(), 30);
- }
+
+ //check if 1464305 Julian Days between Jan 1, 2005 BC and Jan 31, 2005.
+ cal1 = Calendar.getInstance();
+ cal1.set(Calendar.ERA, GregorianCalendar.BC);
+ cal1.set(Calendar.YEAR, 2005);
+ cal1.set(Calendar.MONTH, Calendar.JANUARY);
+ cal1.set(Calendar.DAY_OF_MONTH, 1);
+ cal1.set(Calendar.HOUR_OF_DAY, 0);
+ cal1.setTimeZone(TimeZone.getTimeZone("GMT"));
+
+ ts1 = new Timestamp(cal1.getTimeInMillis());
+ nt1 = NanoTimeUtils.getNanoTime(ts1, false);
+
+ ts1Fetched = NanoTimeUtils.getTimestamp(nt1, false);
+ Assert.assertEquals(ts1Fetched, ts1);
+
+ cal2 = Calendar.getInstance();
+ cal2.set(Calendar.YEAR, 2005);
+ cal2.set(Calendar.MONTH, Calendar.JANUARY);
+ cal2.set(Calendar.DAY_OF_MONTH, 31);
+ cal2.set(Calendar.HOUR_OF_DAY, 0);
+ cal2.setTimeZone(TimeZone.getTimeZone("UTC"));
+
+ ts2 = new Timestamp(cal2.getTimeInMillis());
+ nt2 = NanoTimeUtils.getNanoTime(ts2, false);
+
+ ts2Fetched = NanoTimeUtils.getTimestamp(nt2, false);
+ Assert.assertEquals(ts2Fetched, ts2);
+ Assert.assertEquals(nt2.getJulianDay() - nt1.getJulianDay(), 1464305);
+}
public void testNanos() {
//case 1: 01:01:01.0000000001
@@ -136,6 +167,11 @@ public class TestParquetTimestampUtils extends TestCase {
NanoTime n1 = NanoTimeUtils.getNanoTime(ts1, false);
Assert.assertEquals(n2.getTimeOfDayNanos() - n1.getTimeOfDayNanos(), 600000000009L);
+
+ NanoTime n3 = new NanoTime(n1.getJulianDay() - 1, n1.getTimeOfDayNanos() + TimeUnit.DAYS.toNanos(1));
+ Assert.assertEquals(ts1, NanoTimeUtils.getTimestamp(n3, false));
+ n3 = new NanoTime(n1.getJulianDay() + 3, n1.getTimeOfDayNanos() - TimeUnit.DAYS.toNanos(3));
+ Assert.assertEquals(ts1, NanoTimeUtils.getTimestamp(n3, false));
}
public void testTimezone() {