You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2015/09/16 21:18:32 UTC

[14/50] [abbrv] hive git commit: HIVE-11771: Parquet timestamp conversion errors (Jimmy, reviewed by Szehon)

HIVE-11771: Parquet timestamp conversion errors (Jimmy, reviewed by Szehon)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1e97b161
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1e97b161
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1e97b161

Branch: refs/heads/spark
Commit: 1e97b16181941f8c21684f4b7a4958b890ef7738
Parents: b1fffd5
Author: Jimmy Xiang <jx...@cloudera.com>
Authored: Wed Sep 9 13:26:06 2015 -0700
Committer: Jimmy Xiang <jx...@cloudera.com>
Committed: Sat Sep 12 14:43:14 2015 -0700

----------------------------------------------------------------------
 .../ql/io/parquet/timestamp/NanoTimeUtils.java  | 23 +++++++++---
 .../serde/TestParquetTimestampUtils.java        | 38 +++++++++++++++++++-
 2 files changed, 56 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/1e97b161/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java
index 59c9b4a..aace48e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java
@@ -15,6 +15,7 @@ package org.apache.hadoop.hive.ql.io.parquet.timestamp;
 
 import java.sql.Timestamp;
 import java.util.Calendar;
+import java.util.GregorianCalendar;
 import java.util.TimeZone;
 import java.util.concurrent.TimeUnit;
 
@@ -28,6 +29,7 @@ public class NanoTimeUtils {
    static final long NANOS_PER_HOUR = TimeUnit.HOURS.toNanos(1);
    static final long NANOS_PER_MINUTE = TimeUnit.MINUTES.toNanos(1);
    static final long NANOS_PER_SECOND = TimeUnit.SECONDS.toNanos(1);
+   static final long NANOS_PER_DAY = TimeUnit.DAYS.toNanos(1);
 
    private static final ThreadLocal<Calendar> parquetGMTCalendar = new ThreadLocal<Calendar>();
    private static final ThreadLocal<Calendar> parquetLocalCalendar = new ThreadLocal<Calendar>();
@@ -48,14 +50,20 @@ public class NanoTimeUtils {
    }
 
    private static Calendar getCalendar(boolean skipConversion) {
-     return skipConversion ? getLocalCalendar() : getGMTCalendar();
+     Calendar calendar = skipConversion ? getLocalCalendar() : getGMTCalendar();
+     calendar.clear(); // Reset all fields before reusing this instance
+     return calendar;
    }
 
    public static NanoTime getNanoTime(Timestamp ts, boolean skipConversion) {
 
      Calendar calendar = getCalendar(skipConversion);
      calendar.setTime(ts);
-     JDateTime jDateTime = new JDateTime(calendar.get(Calendar.YEAR),
+     int year = calendar.get(Calendar.YEAR);
+     if (calendar.get(Calendar.ERA) == GregorianCalendar.BC) {
+       year = 1 - year;
+     }
+     JDateTime jDateTime = new JDateTime(year,
        calendar.get(Calendar.MONTH) + 1,  //java calendar index starting at 1.
        calendar.get(Calendar.DAY_OF_MONTH));
      int days = jDateTime.getJulianDayNumber();
@@ -74,13 +82,20 @@ public class NanoTimeUtils {
      int julianDay = nt.getJulianDay();
      long nanosOfDay = nt.getTimeOfDayNanos();
 
+     long remainder = nanosOfDay;
+     julianDay += remainder / NANOS_PER_DAY;
+     remainder %= NANOS_PER_DAY;
+     if (remainder < 0) {
+       remainder += NANOS_PER_DAY;
+       julianDay--;
+     }
+
      JDateTime jDateTime = new JDateTime((double) julianDay);
      Calendar calendar = getCalendar(skipConversion);
      calendar.set(Calendar.YEAR, jDateTime.getYear());
-     calendar.set(Calendar.MONTH, jDateTime.getMonth() - 1); //java calender index starting at 1.
+     calendar.set(Calendar.MONTH, jDateTime.getMonth() - 1); //java calendar index starting at 1.
      calendar.set(Calendar.DAY_OF_MONTH, jDateTime.getDay());
 
-     long remainder = nanosOfDay;
      int hour = (int) (remainder / (NANOS_PER_HOUR));
      remainder = remainder % (NANOS_PER_HOUR);
      int minutes = (int) (remainder / (NANOS_PER_MINUTE));

http://git-wip-us.apache.org/repos/asf/hive/blob/1e97b161/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java
index 510ffd1..ec6def5 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java
@@ -16,7 +16,9 @@ package org.apache.hadoop.hive.ql.io.parquet.serde;
 import java.sql.Timestamp;
 import java.util.Calendar;
 import java.util.Date;
+import java.util.GregorianCalendar;
 import java.util.TimeZone;
+import java.util.concurrent.TimeUnit;
 
 import junit.framework.Assert;
 import junit.framework.TestCase;
@@ -74,7 +76,36 @@ public class TestParquetTimestampUtils extends TestCase {
     Timestamp ts2Fetched = NanoTimeUtils.getTimestamp(nt2, false);
     Assert.assertEquals(ts2Fetched, ts2);
     Assert.assertEquals(nt2.getJulianDay() - nt1.getJulianDay(), 30);
-  }
+
+    //check if 1464305 Julian Days between Jan 1, 2005 BC and Jan 31, 2005.
+    cal1 = Calendar.getInstance();
+    cal1.set(Calendar.ERA,  GregorianCalendar.BC);
+    cal1.set(Calendar.YEAR,  2005);
+    cal1.set(Calendar.MONTH, Calendar.JANUARY);
+    cal1.set(Calendar.DAY_OF_MONTH, 1);
+    cal1.set(Calendar.HOUR_OF_DAY, 0);
+    cal1.setTimeZone(TimeZone.getTimeZone("GMT"));
+
+    ts1 = new Timestamp(cal1.getTimeInMillis());
+    nt1 = NanoTimeUtils.getNanoTime(ts1, false);
+
+    ts1Fetched = NanoTimeUtils.getTimestamp(nt1, false);
+    Assert.assertEquals(ts1Fetched, ts1);
+
+    cal2 = Calendar.getInstance();
+    cal2.set(Calendar.YEAR,  2005);
+    cal2.set(Calendar.MONTH, Calendar.JANUARY);
+    cal2.set(Calendar.DAY_OF_MONTH, 31);
+    cal2.set(Calendar.HOUR_OF_DAY, 0);
+    cal2.setTimeZone(TimeZone.getTimeZone("UTC"));
+
+    ts2 = new Timestamp(cal2.getTimeInMillis());
+    nt2 = NanoTimeUtils.getNanoTime(ts2, false);
+
+    ts2Fetched = NanoTimeUtils.getTimestamp(nt2, false);
+    Assert.assertEquals(ts2Fetched, ts2);
+    Assert.assertEquals(nt2.getJulianDay() - nt1.getJulianDay(), 1464305);
+}
 
   public void testNanos() {
     //case 1: 01:01:01.0000000001
@@ -136,6 +167,11 @@ public class TestParquetTimestampUtils extends TestCase {
     NanoTime n1 = NanoTimeUtils.getNanoTime(ts1, false);
 
     Assert.assertEquals(n2.getTimeOfDayNanos() - n1.getTimeOfDayNanos(), 600000000009L);
+
+    NanoTime n3 = new NanoTime(n1.getJulianDay() - 1, n1.getTimeOfDayNanos() + TimeUnit.DAYS.toNanos(1));
+    Assert.assertEquals(ts1, NanoTimeUtils.getTimestamp(n3, false));
+    n3 = new NanoTime(n1.getJulianDay() + 3, n1.getTimeOfDayNanos() - TimeUnit.DAYS.toNanos(3));
+    Assert.assertEquals(ts1, NanoTimeUtils.getTimestamp(n3, false));
   }
 
   public void testTimezone() {