You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2020/02/28 18:39:29 UTC
[hive] branch master updated: HIVE-22840: Race condition in
formatters of TimestampColumnVector and DateColumnVector (Shubham Chaurasia,
reviewed by Jesus Camacho Rodriguez)
This is an automated email from the ASF dual-hosted git repository.
jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new c072847 HIVE-22840: Race condition in formatters of TimestampColumnVector and DateColumnVector (Shubham Chaurasia, reviewed by Jesus Camacho Rodriguez)
c072847 is described below
commit c0728470eb4fbe446b7a1da711913ca55737e967
Author: Shubham Chaurasia <sc...@cloudera.com>
AuthorDate: Fri Feb 28 10:38:37 2020 -0800
HIVE-22840: Race condition in formatters of TimestampColumnVector and DateColumnVector (Shubham Chaurasia, reviewed by Jesus Camacho Rodriguez)
Close apache/hive#922
---
ql/pom.xml | 4 ++
serde/pom.xml | 4 ++
.../hadoop/hive/common/type/CalendarUtils.java | 41 ++++++++++++++-------
.../hive/ql/exec/vector/DateColumnVector.java | 42 +++------------------
.../hive/ql/exec/vector/TimestampColumnVector.java | 34 ++---------------
.../hive/ql/exec/vector/TestDateColumnVector.java | 43 ++++++++++++++++++++++
.../ql/exec/vector/TestTimestampColumnVector.java | 36 ++++++++++++++++++
7 files changed, 123 insertions(+), 81 deletions(-)
diff --git a/ql/pom.xml b/ql/pom.xml
index 8b0c02b..d5c83b6 100644
--- a/ql/pom.xml
+++ b/ql/pom.xml
@@ -98,6 +98,10 @@
<artifactId>hive-spark-client</artifactId>
<version>${project.version}</version>
</dependency>
+ <dependency>
+ <groupId>org.apache.hive</groupId>
+ <artifactId>hive-storage-api</artifactId>
+ </dependency>
<!-- inter-project -->
<dependency>
<groupId>com.esotericsoftware</groupId>
diff --git a/serde/pom.xml b/serde/pom.xml
index 10fa1b7..ca4fee2 100644
--- a/serde/pom.xml
+++ b/serde/pom.xml
@@ -49,6 +49,10 @@
<artifactId>hive-shims</artifactId>
<version>${project.version}</version>
</dependency>
+ <dependency>
+ <groupId>org.apache.hive</groupId>
+ <artifactId>hive-storage-api</artifactId>
+ </dependency>
<!-- inter-project -->
<dependency>
<groupId>com.google.code.findbugs</groupId>
diff --git a/common/src/java/org/apache/hadoop/hive/common/type/CalendarUtils.java b/storage-api/src/java/org/apache/hadoop/hive/common/type/CalendarUtils.java
similarity index 84%
rename from common/src/java/org/apache/hadoop/hive/common/type/CalendarUtils.java
rename to storage-api/src/java/org/apache/hadoop/hive/common/type/CalendarUtils.java
index 9b491d0..7555d16 100644
--- a/common/src/java/org/apache/hadoop/hive/common/type/CalendarUtils.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/common/type/CalendarUtils.java
@@ -37,9 +37,15 @@ import java.util.concurrent.TimeUnit;
*/
public class CalendarUtils {
- private static SimpleDateFormat createFormatter(String fmt,
- GregorianCalendar calendar) {
+ public static final long SWITCHOVER_MILLIS;
+ public static final long SWITCHOVER_DAYS;
+
+ private static SimpleDateFormat createFormatter(String fmt, boolean proleptic) {
SimpleDateFormat result = new SimpleDateFormat(fmt);
+ GregorianCalendar calendar = new GregorianCalendar(UTC);
+ if (proleptic) {
+ calendar.setGregorianChange(new Date(Long.MIN_VALUE));
+ }
result.setCalendar(calendar);
return result;
}
@@ -47,24 +53,17 @@ public class CalendarUtils {
private static final String DATE = "yyyy-MM-dd";
private static final String TIME = DATE + " HH:mm:ss.SSS";
private static final TimeZone UTC = TimeZone.getTimeZone("UTC");
- private static final GregorianCalendar HYBRID = new GregorianCalendar();
private static final ThreadLocal<SimpleDateFormat> HYBRID_DATE_FORMAT =
- ThreadLocal.withInitial(() -> createFormatter(DATE, HYBRID));
+ ThreadLocal.withInitial(() -> createFormatter(DATE, false));
private static final ThreadLocal<SimpleDateFormat> HYBRID_TIME_FORMAT =
- ThreadLocal.withInitial(() -> createFormatter(TIME, HYBRID));
- private static final long SWITCHOVER_MILLIS;
- private static final long SWITCHOVER_DAYS;
- private static final GregorianCalendar PROLEPTIC = new GregorianCalendar();
+ ThreadLocal.withInitial(() -> createFormatter(TIME, false));
+
private static final ThreadLocal<SimpleDateFormat> PROLEPTIC_DATE_FORMAT =
- ThreadLocal.withInitial(() -> createFormatter(DATE, PROLEPTIC));
+ ThreadLocal.withInitial(() -> createFormatter(DATE, true));
private static final ThreadLocal<SimpleDateFormat> PROLEPTIC_TIME_FORMAT =
- ThreadLocal.withInitial(() -> createFormatter(TIME, PROLEPTIC));
+ ThreadLocal.withInitial(() -> createFormatter(TIME, true));
static {
- HYBRID.setTimeZone(UTC);
- PROLEPTIC.setTimeZone(UTC);
- PROLEPTIC.setGregorianChange(new Date(Long.MIN_VALUE));
-
// Get the last day where the two calendars agree with each other.
try {
SWITCHOVER_MILLIS = HYBRID_DATE_FORMAT.get().parse("1582-10-15").getTime();
@@ -177,6 +176,20 @@ public class CalendarUtils {
return hybrid;
}
+ /**
+ *
+ * Formats epoch day to date according to proleptic or hybrid calendar
+ *
+ * @param epochDay epoch day
+ * @param useProleptic if true - uses proleptic formatter, else uses hybrid formatter
+ * @return formatted date
+ */
+ public static String formatDate(long epochDay, boolean useProleptic) {
+ long millis = TimeUnit.DAYS.toMillis(epochDay);
+ return useProleptic ? PROLEPTIC_DATE_FORMAT.get().format(millis)
+ : HYBRID_DATE_FORMAT.get().format(millis);
+ }
+
private CalendarUtils() {
throw new UnsupportedOperationException();
}
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DateColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DateColumnVector.java
index 3dac667..6608a6e 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DateColumnVector.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DateColumnVector.java
@@ -17,37 +17,13 @@
*/
package org.apache.hadoop.hive.ql.exec.vector;
-import java.text.SimpleDateFormat;
-import java.util.GregorianCalendar;
-import java.util.TimeZone;
-import java.util.concurrent.TimeUnit;
+import org.apache.hadoop.hive.common.type.CalendarUtils;
/**
* This class extends LongColumnVector in order to introduce some date-specific semantics. In
* DateColumnVector, the elements of vector[] represent the days since 1970-01-01
*/
public class DateColumnVector extends LongColumnVector {
- private static final TimeZone UTC = TimeZone.getTimeZone("UTC");
- private static final GregorianCalendar PROLEPTIC_GREGORIAN_CALENDAR = new GregorianCalendar(UTC);
- private static final GregorianCalendar GREGORIAN_CALENDAR = new GregorianCalendar(UTC);
-
- private static final SimpleDateFormat PROLEPTIC_GREGORIAN_DATE_FORMATTER =
- new SimpleDateFormat("yyyy-MM-dd");
- private static final SimpleDateFormat GREGORIAN_DATE_FORMATTER =
- new SimpleDateFormat("yyyy-MM-dd");
-
- /**
- * -141427: hybrid: 1582-10-15 proleptic: 1582-10-15
- * -141428: hybrid: 1582-10-04 proleptic: 1582-10-14
- */
- private static final int CUTOVER_DAY_EPOCH = -141427; // it's 1582-10-15 in both calendars
-
- static {
- PROLEPTIC_GREGORIAN_CALENDAR.setGregorianChange(new java.util.Date(Long.MIN_VALUE));
-
- PROLEPTIC_GREGORIAN_DATE_FORMATTER.setCalendar(PROLEPTIC_GREGORIAN_CALENDAR);
- GREGORIAN_DATE_FORMATTER.setCalendar(GREGORIAN_CALENDAR);
- }
private boolean usingProlepticCalendar = false;
@@ -76,24 +52,16 @@ public class DateColumnVector extends LongColumnVector {
private void updateDataAccordingProlepticSetting() throws Exception {
for (int i = 0; i < vector.length; i++) {
- if (vector[i] >= CUTOVER_DAY_EPOCH) { // no need for conversion
+ if (vector[i] >= CalendarUtils.SWITCHOVER_DAYS) { // no need for conversion
continue;
}
- long millis = TimeUnit.DAYS.toMillis(vector[i]);
- String originalFormatted = usingProlepticCalendar ? GREGORIAN_DATE_FORMATTER.format(millis)
- : PROLEPTIC_GREGORIAN_DATE_FORMATTER.format(millis);
-
- millis = (usingProlepticCalendar ? PROLEPTIC_GREGORIAN_DATE_FORMATTER.parse(originalFormatted)
- : GREGORIAN_DATE_FORMATTER.parse(originalFormatted)).getTime();
-
- vector[i] = TimeUnit.MILLISECONDS.toDays(millis);
+ vector[i] = usingProlepticCalendar ? CalendarUtils.convertDateToProleptic((int) vector[i]) : CalendarUtils
+ .convertDateToHybrid((int) vector[i]);
}
}
public String formatDate(int i) {
- long millis = TimeUnit.DAYS.toMillis(vector[i]);
- return usingProlepticCalendar ? PROLEPTIC_GREGORIAN_DATE_FORMATTER.format(millis)
- : GREGORIAN_DATE_FORMATTER.format(millis);
+ return CalendarUtils.formatDate(vector[i], usingProlepticCalendar);
}
public DateColumnVector setUsingProlepticCalendar(boolean usingProlepticCalendar) {
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java
index d5dfc92..7807e69 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java
@@ -18,14 +18,12 @@
package org.apache.hadoop.hive.ql.exec.vector;
import java.sql.Timestamp;
-import java.text.SimpleDateFormat;
import java.time.Instant;
import java.time.LocalDateTime;
import java.time.ZoneOffset;
import java.util.Arrays;
-import java.util.GregorianCalendar;
-import java.util.TimeZone;
+import org.apache.hadoop.hive.common.type.CalendarUtils;
import org.apache.hadoop.io.Writable;
/**
@@ -41,26 +39,6 @@ import org.apache.hadoop.io.Writable;
* using the scratch timestamp, and then perhaps update the column vector row with a result.
*/
public class TimestampColumnVector extends ColumnVector {
- private static final TimeZone UTC = TimeZone.getTimeZone("UTC");
- private static final GregorianCalendar PROLEPTIC_GREGORIAN_CALENDAR_UTC =
- new GregorianCalendar(UTC);
- private static final GregorianCalendar GREGORIAN_CALENDAR_UTC =
- new GregorianCalendar(UTC);
-
- private static final SimpleDateFormat PROLEPTIC_GREGORIAN_TIMESTAMP_FORMATTER_UTC =
- new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- private static final SimpleDateFormat GREGORIAN_TIMESTAMP_FORMATTER_UTC =
- new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
-
- static {
- PROLEPTIC_GREGORIAN_CALENDAR_UTC.setGregorianChange(new java.util.Date(Long.MIN_VALUE));
-
- PROLEPTIC_GREGORIAN_TIMESTAMP_FORMATTER_UTC.setCalendar(PROLEPTIC_GREGORIAN_CALENDAR_UTC);
- GREGORIAN_TIMESTAMP_FORMATTER_UTC.setCalendar(GREGORIAN_CALENDAR_UTC);
- }
-
- // it's 1582-10-15 in both calendars
- private static final int CUTOVER_MILLIS_EPOCH = -141427 * 24 * 60 * 60 * 1000;
/*
* The storage arrays for this column vector corresponds to the storage of a Timestamp:
@@ -594,18 +572,14 @@ public class TimestampColumnVector extends ColumnVector {
private void updateDataAccordingProlepticSetting() throws Exception {
for (int i = 0; i < nanos.length; i++) {
- if (time[i] >= CUTOVER_MILLIS_EPOCH) { // no need for conversion
+ if (time[i] >= CalendarUtils.SWITCHOVER_MILLIS) { // no need for conversion
continue;
}
asScratchTimestamp(i);
long offset = 0;
- String formatted =
- usingProlepticCalendar ? GREGORIAN_TIMESTAMP_FORMATTER_UTC.format(scratchTimestamp)
- : PROLEPTIC_GREGORIAN_TIMESTAMP_FORMATTER_UTC.format(scratchTimestamp);
- long millis = usingProlepticCalendar
- ? PROLEPTIC_GREGORIAN_TIMESTAMP_FORMATTER_UTC.parse(formatted).getTime()
- : GREGORIAN_TIMESTAMP_FORMATTER_UTC.parse(formatted).getTime();
+ long millis = usingProlepticCalendar ? CalendarUtils.convertTimeToProleptic(scratchTimestamp.getTime())
+ : CalendarUtils.convertTimeToHybrid(scratchTimestamp.getTime());
Timestamp newTimeStamp = Timestamp.from(Instant.ofEpochMilli(millis));
diff --git a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestDateColumnVector.java b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestDateColumnVector.java
index 0d4dc5d..9f53245 100644
--- a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestDateColumnVector.java
+++ b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestDateColumnVector.java
@@ -17,6 +17,9 @@
*/
package org.apache.hadoop.hive.ql.exec.vector;
+import java.util.ArrayList;
+import java.util.List;
+
import org.junit.Assert;
import org.junit.Test;
@@ -77,4 +80,44 @@ public class TestDateColumnVector {
" new = " + newUseProleptic,
expectedDateString, dateColumnVector.formatDate(0));
}
+
+ @Test(timeout = 300_000)
+ public void testMultiThreaded() throws Exception {
+
+ //when java DateTimeFormatter/GregorianCalendar race was not handled, used to throw exceptions like -
+
+ // 1) java.lang.NumberFormatException: For input string: "" OR java.lang.NumberFormatException: For input string: ".821582E.821582E44"
+
+ // 2) Caused by: java.lang.ArrayIndexOutOfBoundsException: -5325980
+ // at sun.util.calendar.BaseCalendar.getCalendarDateFromFixedDate(BaseCalendar.java:453)
+ // at java.util.GregorianCalendar.computeFields(GregorianCalendar.java:2397)
+
+ // create 5 threads and start manipulating vectors, should not throw any exceptions now.
+
+ List<Thread> threads = new ArrayList<>();
+
+ threads.add(startVectorManipulationThread(50000, -141428));
+ threads.add(startVectorManipulationThread(50000, -141430));
+ threads.add(startVectorManipulationThread(50000, -16768));
+ threads.add(startVectorManipulationThread(50000, -499952));
+ threads.add(startVectorManipulationThread(50000, -499955));
+
+ for (Thread thread : threads) {
+ thread.join();
+ }
+
+ }
+
+ private Thread startVectorManipulationThread(final int vectorLength, final int epochDay) {
+ Thread thread = new Thread(() -> {
+ DateColumnVector columnVector = new DateColumnVector(vectorLength).setUsingProlepticCalendar(true);
+ for (int i = 0; i < vectorLength; i++) {
+ columnVector.vector[i] = epochDay;
+ }
+ columnVector.changeCalendar(false, true);
+ });
+ thread.start();
+ return thread;
+ }
+
}
diff --git a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java
index 333a5b5..2d85b11 100644
--- a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java
+++ b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java
@@ -24,7 +24,9 @@ import java.sql.Timestamp;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.time.Instant;
+import java.util.ArrayList;
import java.util.GregorianCalendar;
+import java.util.List;
import java.util.Random;
import java.util.TimeZone;
import java.util.concurrent.TimeUnit;
@@ -210,4 +212,38 @@ public class TestTimestampColumnVector {
return testFormatter;
}
+
+
+ @Test(timeout = 300_000)
+ public void testMultiThreaded() throws Exception {
+
+ // similar to TestDateColumnVector#testMultiThreaded
+
+ List<Thread> threads = new ArrayList<>();
+
+ threads.add(startVectorManipulationThread(50000, -141428));
+ threads.add(startVectorManipulationThread(50000, -141430));
+ threads.add(startVectorManipulationThread(50000, -16768));
+ threads.add(startVectorManipulationThread(50000, -499952));
+ threads.add(startVectorManipulationThread(50000, -499955));
+
+ for (Thread thread : threads) {
+ thread.join();
+ }
+
+ }
+
+ private Thread startVectorManipulationThread(final int vectorLength, final long millis) {
+ Thread thread = new Thread(() -> {
+ TimestampColumnVector columnVector = new TimestampColumnVector(vectorLength).setUsingProlepticCalendar(true);
+ for (int i = 0; i < vectorLength; i++) {
+ columnVector.time[i] = millis;
+ columnVector.nanos[i] = 1;
+ }
+ columnVector.changeCalendar(false, true);
+ });
+ thread.start();
+ return thread;
+ }
+
}