You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2016/06/10 19:48:26 UTC

[1/2] orc git commit: HIVE-13948. Incorrect timezone handling in Writable results in wrong dates in queries

Repository: orc
Updated Branches:
  refs/heads/branch-1.1 3b778c8cc -> 662938edc


HIVE-13948. Incorrect timezone handling in Writable results in wrong dates in
queries


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/f79238ec
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/f79238ec
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/f79238ec

Branch: refs/heads/branch-1.1
Commit: f79238ec69fe89ff84b8ea9aba8936e116a0a23c
Parents: 3b778c8
Author: Owen O'Malley <om...@apache.org>
Authored: Fri Jun 10 12:18:54 2016 -0700
Committer: Owen O'Malley <om...@apache.org>
Committed: Fri Jun 10 12:18:54 2016 -0700

----------------------------------------------------------------------
 .../hadoop/hive/serde2/io/DateWritable.java     | 68 ++++++++++++++++----
 1 file changed, 57 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/orc/blob/f79238ec/java/storage-api/src/java/org/apache/hadoop/hive/serde2/io/DateWritable.java
----------------------------------------------------------------------
diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/serde2/io/DateWritable.java b/java/storage-api/src/java/org/apache/hadoop/hive/serde2/io/DateWritable.java
index dd2b1d9..637720a 100644
--- a/java/storage-api/src/java/org/apache/hadoop/hive/serde2/io/DateWritable.java
+++ b/java/storage-api/src/java/org/apache/hadoop/hive/serde2/io/DateWritable.java
@@ -22,6 +22,7 @@ import java.io.DataOutput;
 import java.io.IOException;
 import java.sql.Date;
 import java.util.Calendar;
+import java.util.GregorianCalendar;
 import java.util.TimeZone;
 import java.util.concurrent.TimeUnit;
 
@@ -41,7 +42,7 @@ public class DateWritable implements WritableComparable<DateWritable> {
 
   private static final long MILLIS_PER_DAY = TimeUnit.DAYS.toMillis(1);
 
-  // Local time zone.
+  // Local time zone. Store separately because Calendar would clone it.
   // Java TimeZone has no mention of thread safety. Use thread local instance to be safe.
   private static final ThreadLocal<TimeZone> LOCAL_TIMEZONE = new ThreadLocal<TimeZone>() {
     @Override
@@ -50,6 +51,19 @@ public class DateWritable implements WritableComparable<DateWritable> {
     }
   };
 
+  private static final ThreadLocal<Calendar> UTC_CALENDAR = new ThreadLocal<Calendar>() {
+    @Override
+    protected Calendar initialValue() {
+      return new GregorianCalendar(TimeZone.getTimeZone("UTC"));
+    }
+  };
+  private static final ThreadLocal<Calendar> LOCAL_CALENDAR = new ThreadLocal<Calendar>() {
+    @Override
+    protected Calendar initialValue() {
+      return Calendar.getInstance();
+    }
+  };
+
   // Internal representation is an integer representing day offset from our epoch value 1970-01-01
   private int daysSinceEpoch = 0;
 
@@ -95,11 +109,16 @@ public class DateWritable implements WritableComparable<DateWritable> {
   }
 
   /**
-   *
    * @return Date value corresponding to the date in the local time zone
    */
   public Date get() {
-    return new Date(daysToMillis(daysSinceEpoch));
+    return get(true);
+  }
+
+  // TODO: we should call this more often. In theory, for DATE type, time should never matter, but
+  //       it's hard to tell w/some code paths like UDFs/OIs etc. that are used in many places.
+  public Date get(boolean doesTimeMatter) {
+    return new Date(daysToMillis(daysSinceEpoch, doesTimeMatter));
   }
 
   public int getDays() {
@@ -119,21 +138,47 @@ public class DateWritable implements WritableComparable<DateWritable> {
   }
 
   public static long daysToMillis(int d) {
-    // Convert from day offset to ms in UTC, then apply local timezone offset.
-    long millisUtc = d * MILLIS_PER_DAY;
-    long tmp =  millisUtc - LOCAL_TIMEZONE.get().getOffset(millisUtc);
-    // Between millisUtc and tmp, the time zone offset may have changed due to DST.
-    // Look up the offset again.
-    return millisUtc - LOCAL_TIMEZONE.get().getOffset(tmp);
+    return daysToMillis(d, true);
+  }
+
+  public static long daysToMillis(int d, boolean doesTimeMatter) {
+    // What we are trying to get is the equivalent of new Date(ymd).getTime() in the local tz,
+    // where ymd is whatever d represents. How it "works" is this.
+    // First we get the UTC midnight for that day (which always exists, a small island of sanity).
+    long utcMidnight = d * MILLIS_PER_DAY;
+    // Now we take a local TZ offset at midnight UTC. Say we are in -4; that means (surprise
+    // surprise) that at midnight UTC it was 20:00 in local. So far we are on firm ground.
+    long utcMidnightOffset = LOCAL_TIMEZONE.get().getOffset(utcMidnight);
+    // And now we wander straight into the swamp, when instead of adding, we subtract it from UTC
+    // midnight to supposedly get local midnight (in the above case, 4:00 UTC). Of course, given
+    // all the insane DST variations, where we actually end up is anyone's guess.
+    long hopefullyMidnight = utcMidnight - utcMidnightOffset;
+    // Then we determine the local TZ offset at that magical time.
+    long offsetAtHM = LOCAL_TIMEZONE.get().getOffset(hopefullyMidnight);
+    // If the offsets are the same, we assume our initial jump did not cross any DST boundaries,
+    // and is thus valid. Both times flowed at the same pace. We congratulate ourselves and bail.
+    if (utcMidnightOffset == offsetAtHM) return hopefullyMidnight;
+    // Alas, we crossed some DST boundary. If the time of day doesn't matter to the caller, we'll
+    // simply get the next day and go back half a day. This is not ideal but seems to work.
+    if (!doesTimeMatter) return daysToMillis(d + 1) - (MILLIS_PER_DAY >> 1);
+    // Now, we could get previous and next day, figure our how many hours were inserted or removed,
+    // and from which of the days, etc. But at this point our gun is pointing straight at our foot,
+    // so let's just go the safe, expensive way.
+    Calendar utc = UTC_CALENDAR.get(), local = LOCAL_CALENDAR.get();
+    utc.setTimeInMillis(utcMidnight);
+    local.set(utc.get(Calendar.YEAR), utc.get(Calendar.MONTH), utc.get(Calendar.DAY_OF_MONTH));
+    return local.getTimeInMillis();
   }
 
   public static int millisToDays(long millisLocal) {
+    // We assume millisLocal is midnight of some date. What we are basically trying to do
+    // here is go from local-midnight to UTC-midnight (or whatever time that happens to be).
     long millisUtc = millisLocal + LOCAL_TIMEZONE.get().getOffset(millisLocal);
     int days;
     if (millisUtc >= 0L) {
       days = (int) (millisUtc / MILLIS_PER_DAY);
     } else {
-      days = (int) ((millisUtc - 86399999) / MILLIS_PER_DAY);
+      days = (int) ((millisUtc - 86399999 /*(MILLIS_PER_DAY - 1)*/) / MILLIS_PER_DAY);
     }
     return days;
   }
@@ -169,7 +214,8 @@ public class DateWritable implements WritableComparable<DateWritable> {
 
   @Override
   public String toString() {
-    return get().toString();
+    // For toString, the time does not matter
+    return get(false).toString();
   }
 
   @Override


[2/2] orc git commit: Getting ready for the 1.1.1 release canidate.

Posted by om...@apache.org.
Getting ready for the 1.1.1 release canidate.

Signed-off-by: Owen O'Malley <om...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/662938ed
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/662938ed
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/662938ed

Branch: refs/heads/branch-1.1
Commit: 662938edc362ca9ad24c3404a9de8d54adec6bec
Parents: f79238e
Author: Owen O'Malley <om...@apache.org>
Authored: Fri Jun 10 12:46:24 2016 -0700
Committer: Owen O'Malley <om...@apache.org>
Committed: Fri Jun 10 12:47:19 2016 -0700

----------------------------------------------------------------------
 CMakeLists.txt         | 2 +-
 java/core/pom.xml      | 2 +-
 java/mapreduce/pom.xml | 4 ++--
 java/pom.xml           | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/orc/blob/662938ed/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f2d07f4..eb31982 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -17,7 +17,7 @@ project(ORC)
 # Version number of package
 SET(CPACK_PACKAGE_VERSION_MAJOR "1")
 SET(CPACK_PACKAGE_VERSION_MINOR "1")
-SET(CPACK_PACKAGE_VERSION_PATCH "1-SNAPSHOT")
+SET(CPACK_PACKAGE_VERSION_PATCH "1")
 SET(ORC_VERSION "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}")
 
 # Make sure that a build type is selected

http://git-wip-us.apache.org/repos/asf/orc/blob/662938ed/java/core/pom.xml
----------------------------------------------------------------------
diff --git a/java/core/pom.xml b/java/core/pom.xml
index 1829ade..8d39e8a 100644
--- a/java/core/pom.xml
+++ b/java/core/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <groupId>org.apache.orc</groupId>
     <artifactId>orc</artifactId>
-    <version>1.1.1-SNAPSHOT</version>
+    <version>1.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/orc/blob/662938ed/java/mapreduce/pom.xml
----------------------------------------------------------------------
diff --git a/java/mapreduce/pom.xml b/java/mapreduce/pom.xml
index ffa5f68..67ec52f 100644
--- a/java/mapreduce/pom.xml
+++ b/java/mapreduce/pom.xml
@@ -19,7 +19,7 @@
   <parent>
     <groupId>org.apache.orc</groupId>
     <artifactId>orc</artifactId>
-    <version>1.1.1-SNAPSHOT</version>
+    <version>1.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
@@ -36,7 +36,7 @@
     <dependency>
       <groupId>org.apache.orc</groupId>
       <artifactId>orc-core</artifactId>
-      <version>1.1.1-SNAPSHOT</version>
+      <version>1.1.1</version>
     </dependency>
 
     <!-- inter-project -->

http://git-wip-us.apache.org/repos/asf/orc/blob/662938ed/java/pom.xml
----------------------------------------------------------------------
diff --git a/java/pom.xml b/java/pom.xml
index f2f1237..51f9262 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -21,7 +21,7 @@
   </parent>
   <groupId>org.apache.orc</groupId>
   <artifactId>orc</artifactId>
-  <version>1.1.1-SNAPSHOT</version>
+  <version>1.1.1</version>
   <packaging>pom</packaging>
 
   <name>Apache ORC</name>