You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2016/06/10 19:48:26 UTC
[1/2] orc git commit: HIVE-13948. Incorrect timezone handling in
Writable results in wrong dates in queries
Repository: orc
Updated Branches:
refs/heads/branch-1.1 3b778c8cc -> 662938edc
HIVE-13948. Incorrect timezone handling in Writable results in wrong dates in
queries
Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/f79238ec
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/f79238ec
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/f79238ec
Branch: refs/heads/branch-1.1
Commit: f79238ec69fe89ff84b8ea9aba8936e116a0a23c
Parents: 3b778c8
Author: Owen O'Malley <om...@apache.org>
Authored: Fri Jun 10 12:18:54 2016 -0700
Committer: Owen O'Malley <om...@apache.org>
Committed: Fri Jun 10 12:18:54 2016 -0700
----------------------------------------------------------------------
.../hadoop/hive/serde2/io/DateWritable.java | 68 ++++++++++++++++----
1 file changed, 57 insertions(+), 11 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/orc/blob/f79238ec/java/storage-api/src/java/org/apache/hadoop/hive/serde2/io/DateWritable.java
----------------------------------------------------------------------
diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/serde2/io/DateWritable.java b/java/storage-api/src/java/org/apache/hadoop/hive/serde2/io/DateWritable.java
index dd2b1d9..637720a 100644
--- a/java/storage-api/src/java/org/apache/hadoop/hive/serde2/io/DateWritable.java
+++ b/java/storage-api/src/java/org/apache/hadoop/hive/serde2/io/DateWritable.java
@@ -22,6 +22,7 @@ import java.io.DataOutput;
import java.io.IOException;
import java.sql.Date;
import java.util.Calendar;
+import java.util.GregorianCalendar;
import java.util.TimeZone;
import java.util.concurrent.TimeUnit;
@@ -41,7 +42,7 @@ public class DateWritable implements WritableComparable<DateWritable> {
private static final long MILLIS_PER_DAY = TimeUnit.DAYS.toMillis(1);
- // Local time zone.
+ // Local time zone. Store separately because Calendar would clone it.
// Java TimeZone has no mention of thread safety. Use thread local instance to be safe.
private static final ThreadLocal<TimeZone> LOCAL_TIMEZONE = new ThreadLocal<TimeZone>() {
@Override
@@ -50,6 +51,19 @@ public class DateWritable implements WritableComparable<DateWritable> {
}
};
+ private static final ThreadLocal<Calendar> UTC_CALENDAR = new ThreadLocal<Calendar>() {
+ @Override
+ protected Calendar initialValue() {
+ return new GregorianCalendar(TimeZone.getTimeZone("UTC"));
+ }
+ };
+ private static final ThreadLocal<Calendar> LOCAL_CALENDAR = new ThreadLocal<Calendar>() {
+ @Override
+ protected Calendar initialValue() {
+ return Calendar.getInstance();
+ }
+ };
+
// Internal representation is an integer representing day offset from our epoch value 1970-01-01
private int daysSinceEpoch = 0;
@@ -95,11 +109,16 @@ public class DateWritable implements WritableComparable<DateWritable> {
}
/**
- *
* @return Date value corresponding to the date in the local time zone
*/
public Date get() {
- return new Date(daysToMillis(daysSinceEpoch));
+ return get(true);
+ }
+
+ // TODO: we should call this more often. In theory, for DATE type, time should never matter, but
+ // it's hard to tell w/some code paths like UDFs/OIs etc. that are used in many places.
+ public Date get(boolean doesTimeMatter) {
+ return new Date(daysToMillis(daysSinceEpoch, doesTimeMatter));
}
public int getDays() {
@@ -119,21 +138,47 @@ public class DateWritable implements WritableComparable<DateWritable> {
}
public static long daysToMillis(int d) {
- // Convert from day offset to ms in UTC, then apply local timezone offset.
- long millisUtc = d * MILLIS_PER_DAY;
- long tmp = millisUtc - LOCAL_TIMEZONE.get().getOffset(millisUtc);
- // Between millisUtc and tmp, the time zone offset may have changed due to DST.
- // Look up the offset again.
- return millisUtc - LOCAL_TIMEZONE.get().getOffset(tmp);
+ return daysToMillis(d, true);
+ }
+
+ public static long daysToMillis(int d, boolean doesTimeMatter) {
+ // What we are trying to get is the equivalent of new Date(ymd).getTime() in the local tz,
+ // where ymd is whatever d represents. How it "works" is this.
+ // First we get the UTC midnight for that day (which always exists, a small island of sanity).
+ long utcMidnight = d * MILLIS_PER_DAY;
+ // Now we take a local TZ offset at midnight UTC. Say we are in -4; that means (surprise
+ // surprise) that at midnight UTC it was 20:00 in local. So far we are on firm ground.
+ long utcMidnightOffset = LOCAL_TIMEZONE.get().getOffset(utcMidnight);
+ // And now we wander straight into the swamp, when instead of adding, we subtract it from UTC
+ // midnight to supposedly get local midnight (in the above case, 4:00 UTC). Of course, given
+ // all the insane DST variations, where we actually end up is anyone's guess.
+ long hopefullyMidnight = utcMidnight - utcMidnightOffset;
+ // Then we determine the local TZ offset at that magical time.
+ long offsetAtHM = LOCAL_TIMEZONE.get().getOffset(hopefullyMidnight);
+ // If the offsets are the same, we assume our initial jump did not cross any DST boundaries,
+ // and is thus valid. Both times flowed at the same pace. We congratulate ourselves and bail.
+ if (utcMidnightOffset == offsetAtHM) return hopefullyMidnight;
+ // Alas, we crossed some DST boundary. If the time of day doesn't matter to the caller, we'll
+ // simply get the next day and go back half a day. This is not ideal but seems to work.
+ if (!doesTimeMatter) return daysToMillis(d + 1) - (MILLIS_PER_DAY >> 1);
+ // Now, we could get previous and next day, figure our how many hours were inserted or removed,
+ // and from which of the days, etc. But at this point our gun is pointing straight at our foot,
+ // so let's just go the safe, expensive way.
+ Calendar utc = UTC_CALENDAR.get(), local = LOCAL_CALENDAR.get();
+ utc.setTimeInMillis(utcMidnight);
+ local.set(utc.get(Calendar.YEAR), utc.get(Calendar.MONTH), utc.get(Calendar.DAY_OF_MONTH));
+ return local.getTimeInMillis();
}
public static int millisToDays(long millisLocal) {
+ // We assume millisLocal is midnight of some date. What we are basically trying to do
+ // here is go from local-midnight to UTC-midnight (or whatever time that happens to be).
long millisUtc = millisLocal + LOCAL_TIMEZONE.get().getOffset(millisLocal);
int days;
if (millisUtc >= 0L) {
days = (int) (millisUtc / MILLIS_PER_DAY);
} else {
- days = (int) ((millisUtc - 86399999) / MILLIS_PER_DAY);
+ days = (int) ((millisUtc - 86399999 /*(MILLIS_PER_DAY - 1)*/) / MILLIS_PER_DAY);
}
return days;
}
@@ -169,7 +214,8 @@ public class DateWritable implements WritableComparable<DateWritable> {
@Override
public String toString() {
- return get().toString();
+ // For toString, the time does not matter
+ return get(false).toString();
}
@Override
[2/2] orc git commit: Getting ready for the 1.1.1 release canidate.
Posted by om...@apache.org.
Getting ready for the 1.1.1 release canidate.
Signed-off-by: Owen O'Malley <om...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/662938ed
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/662938ed
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/662938ed
Branch: refs/heads/branch-1.1
Commit: 662938edc362ca9ad24c3404a9de8d54adec6bec
Parents: f79238e
Author: Owen O'Malley <om...@apache.org>
Authored: Fri Jun 10 12:46:24 2016 -0700
Committer: Owen O'Malley <om...@apache.org>
Committed: Fri Jun 10 12:47:19 2016 -0700
----------------------------------------------------------------------
CMakeLists.txt | 2 +-
java/core/pom.xml | 2 +-
java/mapreduce/pom.xml | 4 ++--
java/pom.xml | 2 +-
4 files changed, 5 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/orc/blob/662938ed/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f2d07f4..eb31982 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -17,7 +17,7 @@ project(ORC)
# Version number of package
SET(CPACK_PACKAGE_VERSION_MAJOR "1")
SET(CPACK_PACKAGE_VERSION_MINOR "1")
-SET(CPACK_PACKAGE_VERSION_PATCH "1-SNAPSHOT")
+SET(CPACK_PACKAGE_VERSION_PATCH "1")
SET(ORC_VERSION "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}")
# Make sure that a build type is selected
http://git-wip-us.apache.org/repos/asf/orc/blob/662938ed/java/core/pom.xml
----------------------------------------------------------------------
diff --git a/java/core/pom.xml b/java/core/pom.xml
index 1829ade..8d39e8a 100644
--- a/java/core/pom.xml
+++ b/java/core/pom.xml
@@ -19,7 +19,7 @@
<parent>
<groupId>org.apache.orc</groupId>
<artifactId>orc</artifactId>
- <version>1.1.1-SNAPSHOT</version>
+ <version>1.1.1</version>
<relativePath>../pom.xml</relativePath>
</parent>
http://git-wip-us.apache.org/repos/asf/orc/blob/662938ed/java/mapreduce/pom.xml
----------------------------------------------------------------------
diff --git a/java/mapreduce/pom.xml b/java/mapreduce/pom.xml
index ffa5f68..67ec52f 100644
--- a/java/mapreduce/pom.xml
+++ b/java/mapreduce/pom.xml
@@ -19,7 +19,7 @@
<parent>
<groupId>org.apache.orc</groupId>
<artifactId>orc</artifactId>
- <version>1.1.1-SNAPSHOT</version>
+ <version>1.1.1</version>
<relativePath>../pom.xml</relativePath>
</parent>
@@ -36,7 +36,7 @@
<dependency>
<groupId>org.apache.orc</groupId>
<artifactId>orc-core</artifactId>
- <version>1.1.1-SNAPSHOT</version>
+ <version>1.1.1</version>
</dependency>
<!-- inter-project -->
http://git-wip-us.apache.org/repos/asf/orc/blob/662938ed/java/pom.xml
----------------------------------------------------------------------
diff --git a/java/pom.xml b/java/pom.xml
index f2f1237..51f9262 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -21,7 +21,7 @@
</parent>
<groupId>org.apache.orc</groupId>
<artifactId>orc</artifactId>
- <version>1.1.1-SNAPSHOT</version>
+ <version>1.1.1</version>
<packaging>pom</packaging>
<name>Apache ORC</name>