You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2016/12/12 09:51:45 UTC
[4/4] hive git commit: HIVE-15338: Wrong result from non-vectorized
DATEDIFF with scalar parameter of type DATE/TIMESTAMP (Matt McCline,
reviewed by Jason Dere)
HIVE-15338: Wrong result from non-vectorized DATEDIFF with scalar parameter of type DATE/TIMESTAMP (Matt McCline, reviewed by Jason Dere)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6ef0b507
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6ef0b507
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6ef0b507
Branch: refs/heads/master
Commit: 6ef0b5078448f0589b85f7d6165c86961bf3eacf
Parents: 0ae2475
Author: Matt McCline <mm...@hortonworks.com>
Authored: Mon Dec 12 01:51:24 2016 -0800
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Mon Dec 12 01:51:24 2016 -0800
----------------------------------------------------------------------
.../ql/exec/vector/VectorizationContext.java | 4 +-
.../expressions/VectorUDFDateAddScalarCol.java | 2 +
.../expressions/VectorUDFDateDiffColScalar.java | 2 +
.../expressions/VectorUDFDateDiffScalarCol.java | 2 +
.../hive/ql/udf/generic/GenericUDFDateDiff.java | 14 +-
.../TestVectorGenericDateExpressions.java | 2 +-
ql/src/test/queries/clientpositive/date_udf.q | 15 +-
.../clientpositive/vectorized_date_funcs.q | 67 +-
.../test/results/clientpositive/date_udf.q.out | 34 +-
.../llap/vectorized_date_funcs.q.out | 979 +++++++++++--------
.../results/clientpositive/spark/date_udf.q.out | 34 +-
.../clientpositive/vectorized_date_funcs.q.out | 979 +++++++++++--------
12 files changed, 1244 insertions(+), 890 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/6ef0b507/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index d9acdf5..d2f5408 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -1709,7 +1709,7 @@ public class VectorizationContext {
cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterLongColumnInList.class : LongColumnInList.class);
long[] inVals = new long[childrenForInList.size()];
for (int i = 0; i != inVals.length; i++) {
- inVals[i] = (Integer) getVectorTypeScalarValue((ExprNodeConstantDesc) childrenForInList.get(i));
+ inVals[i] = (Long) getVectorTypeScalarValue((ExprNodeConstantDesc) childrenForInList.get(i));
}
expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType);
((ILongInExpr) expr).setInListValues(inVals);
@@ -2326,7 +2326,7 @@ public class VectorizationContext {
Object scalarValue = getScalarValue(constDesc);
switch (type) {
case DATE:
- return DateWritable.dateToDays((Date) scalarValue);
+ return new Long(DateWritable.dateToDays((Date) scalarValue));
case INTERVAL_YEAR_MONTH:
return ((HiveIntervalYearMonth) scalarValue).getTotalMonths();
default:
http://git-wip-us.apache.org/repos/asf/hive/blob/6ef0b507/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java
index 724ea45..74ef6a6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java
@@ -56,6 +56,8 @@ public class VectorUDFDateAddScalarCol extends VectorExpression {
this.timestampValue = (Timestamp) object;
} else if (object instanceof byte []) {
this.stringValue = (byte[]) object;
+ } else {
+ throw new RuntimeException("Unexpected scalar object " + object.getClass().getName() + " " + object.toString());
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/6ef0b507/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java
index 71b3887..15edbdf 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java
@@ -57,6 +57,8 @@ public class VectorUDFDateDiffColScalar extends VectorExpression {
this.timestampValue = (Timestamp) object;
} else if (object instanceof byte []) {
this.stringValue = (byte []) object;
+ } else {
+ throw new RuntimeException("Unexpected scalar object " + object.getClass().getName() + " " + object.toString());
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/6ef0b507/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java
index c733bc9..371537a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java
@@ -56,6 +56,8 @@ public class VectorUDFDateDiffScalarCol extends VectorExpression {
this.timestampValue = (Timestamp) object;
} else if (object instanceof byte []) {
this.stringValue = (byte[]) object;
+ } else {
+ throw new RuntimeException("Unexpected scalar object " + object.getClass().getName() + " " + object.toString());
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/6ef0b507/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateDiff.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateDiff.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateDiff.java
index 403cf11..ac48f01 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateDiff.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateDiff.java
@@ -20,7 +20,7 @@ package org.apache.hadoop.hive.ql.udf.generic;
import java.sql.Timestamp;
import java.text.ParseException;
import java.text.SimpleDateFormat;
-import java.util.Date;
+import java.sql.Date;
import java.util.TimeZone;
import org.apache.hadoop.hive.ql.exec.Description;
@@ -70,7 +70,6 @@ public class GenericUDFDateDiff extends GenericUDF {
private IntWritable result = new IntWritable();
public GenericUDFDateDiff() {
- formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
}
@Override
@@ -106,14 +105,14 @@ public class GenericUDFDateDiff extends GenericUDF {
if (argument.get() == null) {
return null;
}
- Date date = new Date();
+ Date date = new Date(0);
switch (inputType) {
case STRING:
case VARCHAR:
case CHAR:
String dateString = converter.convert(argument.get()).toString();
try {
- date = formatter.parse(dateString);
+ date.setTime(formatter.parse(dateString).getTime());
} catch (ParseException e) {
return null;
}
@@ -171,11 +170,8 @@ public class GenericUDFDateDiff extends GenericUDF {
if (date == null || date2 == null) {
return null;
}
- // NOTE: This implementation avoids the extra-second problem
- // by comparing with UTC epoch and integer division.
- // 86400 is the number of seconds in a day
- long diffInMilliSeconds = date.getTime() - date2.getTime();
- result.set((int) (diffInMilliSeconds / (86400 * 1000)));
+
+ result.set(DateWritable.dateToDays(date) - DateWritable.dateToDays(date2));
return result;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/6ef0b507/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorGenericDateExpressions.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorGenericDateExpressions.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorGenericDateExpressions.java
index 13bfdd7..e25dcdf 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorGenericDateExpressions.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorGenericDateExpressions.java
@@ -511,7 +511,7 @@ public class TestVectorGenericDateExpressions {
byte[] bytes = "error".getBytes(utf8);
VectorizedRowBatch batch = new VectorizedRowBatch(2, 1);
- udf = new VectorUDFDateDiffColScalar(0, 0, 1);
+ udf = new VectorUDFDateDiffColScalar(0, 0L, 1);
udf.setInputTypes(VectorExpression.Type.TIMESTAMP, VectorExpression.Type.STRING);
batch.cols[0] = new BytesColumnVector(1);
batch.cols[1] = new LongColumnVector(1);
http://git-wip-us.apache.org/repos/asf/hive/blob/6ef0b507/ql/src/test/queries/clientpositive/date_udf.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/date_udf.q b/ql/src/test/queries/clientpositive/date_udf.q
index c55b9f9..3b82265 100644
--- a/ql/src/test/queries/clientpositive/date_udf.q
+++ b/ql/src/test/queries/clientpositive/date_udf.q
@@ -39,9 +39,20 @@ select unix_timestamp(d), year(d), month(d), day(d), dayofmonth(d),
select date_add(d, 5), date_sub(d, 10) from date_udf_string;
-select datediff(d, d), datediff(d, '2002-03-21'), datediff('2002-03-21', d),
+select
+ datediff(d, d),
+ datediff(d, '2002-03-21'),
+ datediff(d, date '2002-03-21'),
+ datediff('2002-03-21', d),
+ datediff(date '2002-03-21', d),
datediff('2002-03-21 00:00:00', d),
- datediff(d, '2002-03-21 00:00:00')
+ datediff(timestamp '2002-03-21 00:00:00', d),
+ datediff(d, '2002-03-21 00:00:00'),
+ datediff(d, timestamp '2002-03-21 00:00:00'),
+ datediff('2002-03-21 08:01:59', d),
+ datediff(timestamp '2002-03-21 08:01:59', d),
+ datediff(d, '2002-03-21 08:01:59'),
+ datediff(d, timestamp '2002-03-21 08:01:59')
from date_udf_string;
select
http://git-wip-us.apache.org/repos/asf/hive/blob/6ef0b507/ql/src/test/queries/clientpositive/vectorized_date_funcs.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vectorized_date_funcs.q b/ql/src/test/queries/clientpositive/vectorized_date_funcs.q
index 7d7b1cf..899e922 100644
--- a/ql/src/test/queries/clientpositive/vectorized_date_funcs.q
+++ b/ql/src/test/queries/clientpositive/vectorized_date_funcs.q
@@ -1,6 +1,7 @@
set hive.mapred.mode=nonstrict;
set hive.explain.user=false;
SET hive.vectorized.execution.enabled = true;
+set hive.cli.print.header=true;
-- SORT_QUERY_RESULTS
@@ -25,6 +26,7 @@ INSERT INTO TABLE date_udf_flight_orc SELECT fl_date, to_utc_timestamp(fl_date,
SELECT * FROM date_udf_flight_orc;
EXPLAIN SELECT
+ fl_time,
to_unix_timestamp(fl_time),
year(fl_time),
month(fl_time),
@@ -36,10 +38,18 @@ EXPLAIN SELECT
to_date(fl_time),
date_add(fl_time, 2),
date_sub(fl_time, 2),
- datediff(fl_time, "2000-01-01")
+ datediff(fl_time, "2000-01-01"),
+ datediff(fl_time, date "2000-01-01"),
+ datediff(fl_time, timestamp "2000-01-01 00:00:00"),
+ datediff(fl_time, timestamp "2000-01-01 11:13:09"),
+ datediff(fl_time, "2007-03-14"),
+ datediff(fl_time, date "2007-03-14"),
+ datediff(fl_time, timestamp "2007-03-14 00:00:00"),
+ datediff(fl_time, timestamp "2007-03-14 08:21:59")
FROM date_udf_flight_orc;
SELECT
+ fl_time,
to_unix_timestamp(fl_time),
year(fl_time),
month(fl_time),
@@ -51,10 +61,18 @@ SELECT
to_date(fl_time),
date_add(fl_time, 2),
date_sub(fl_time, 2),
- datediff(fl_time, "2000-01-01")
+ datediff(fl_time, "2000-01-01"),
+ datediff(fl_time, date "2000-01-01"),
+ datediff(fl_time, timestamp "2000-01-01 00:00:00"),
+ datediff(fl_time, timestamp "2000-01-01 11:13:09"),
+ datediff(fl_time, "2007-03-14"),
+ datediff(fl_time, date "2007-03-14"),
+ datediff(fl_time, timestamp "2007-03-14 00:00:00"),
+ datediff(fl_time, timestamp "2007-03-14 08:21:59")
FROM date_udf_flight_orc;
EXPLAIN SELECT
+ fl_date,
to_unix_timestamp(fl_date),
year(fl_date),
month(fl_date),
@@ -66,10 +84,18 @@ EXPLAIN SELECT
to_date(fl_date),
date_add(fl_date, 2),
date_sub(fl_date, 2),
- datediff(fl_date, "2000-01-01")
+ datediff(fl_date, "2000-01-01"),
+ datediff(fl_date, date "2000-01-01"),
+ datediff(fl_date, timestamp "2000-01-01 00:00:00"),
+ datediff(fl_date, timestamp "2000-01-01 11:13:09"),
+ datediff(fl_date, "2007-03-14"),
+ datediff(fl_date, date "2007-03-14"),
+ datediff(fl_date, timestamp "2007-03-14 00:00:00"),
+ datediff(fl_date, timestamp "2007-03-14 08:21:59")
FROM date_udf_flight_orc;
SELECT
+ fl_date,
to_unix_timestamp(fl_date),
year(fl_date),
month(fl_date),
@@ -81,10 +107,19 @@ SELECT
to_date(fl_date),
date_add(fl_date, 2),
date_sub(fl_date, 2),
- datediff(fl_date, "2000-01-01")
+ datediff(fl_date, "2000-01-01"),
+ datediff(fl_date, date "2000-01-01"),
+ datediff(fl_date, timestamp "2000-01-01 00:00:00"),
+ datediff(fl_date, timestamp "2000-01-01 11:13:09"),
+ datediff(fl_date, "2007-03-14"),
+ datediff(fl_date, date "2007-03-14"),
+ datediff(fl_date, timestamp "2007-03-14 00:00:00"),
+ datediff(fl_date, timestamp "2007-03-14 08:21:59")
FROM date_udf_flight_orc;
EXPLAIN SELECT
+ fl_time,
+ fl_date,
year(fl_time) = year(fl_date),
month(fl_time) = month(fl_date),
day(fl_time) = day(fl_date),
@@ -95,11 +130,22 @@ EXPLAIN SELECT
to_date(fl_time) = to_date(fl_date),
date_add(fl_time, 2) = date_add(fl_date, 2),
date_sub(fl_time, 2) = date_sub(fl_date, 2),
- datediff(fl_time, "2000-01-01") = datediff(fl_date, "2000-01-01")
+ datediff(fl_time, "2000-01-01") = datediff(fl_date, "2000-01-01"),
+ datediff(fl_time, date "2000-01-01") = datediff(fl_date, date "2000-01-01"),
+ datediff(fl_time, timestamp "2000-01-01 00:00:00") = datediff(fl_date, timestamp "2000-01-01 00:00:00"),
+ datediff(fl_time, timestamp "2000-01-01 11:13:09") = datediff(fl_date, timestamp "2000-01-01 11:13:09"),
+ datediff(fl_time, "2007-03-14") = datediff(fl_date, "2007-03-14"),
+ datediff(fl_time, date "2007-03-14") = datediff(fl_date, date "2007-03-14"),
+ datediff(fl_time, timestamp "2007-03-14 00:00:00") = datediff(fl_date, timestamp "2007-03-14 00:00:00"),
+ datediff(fl_time, timestamp "2007-03-14 08:21:59") = datediff(fl_date, timestamp "2007-03-14 08:21:59"),
+ datediff(fl_date, "2000-01-01") = datediff(fl_date, date "2000-01-01"),
+ datediff(fl_date, "2007-03-14") = datediff(fl_date, date "2007-03-14")
FROM date_udf_flight_orc;
-- Should all be true or NULL
SELECT
+ fl_time,
+ fl_date,
year(fl_time) = year(fl_date),
month(fl_time) = month(fl_date),
day(fl_time) = day(fl_date),
@@ -110,7 +156,16 @@ SELECT
to_date(fl_time) = to_date(fl_date),
date_add(fl_time, 2) = date_add(fl_date, 2),
date_sub(fl_time, 2) = date_sub(fl_date, 2),
- datediff(fl_time, "2000-01-01") = datediff(fl_date, "2000-01-01")
+ datediff(fl_time, "2000-01-01") = datediff(fl_date, "2000-01-01"),
+ datediff(fl_time, date "2000-01-01") = datediff(fl_date, date "2000-01-01"),
+ datediff(fl_time, timestamp "2000-01-01 00:00:00") = datediff(fl_date, timestamp "2000-01-01 00:00:00"),
+ datediff(fl_time, timestamp "2000-01-01 11:13:09") = datediff(fl_date, timestamp "2000-01-01 11:13:09"),
+ datediff(fl_time, "2007-03-14") = datediff(fl_date, "2007-03-14"),
+ datediff(fl_time, date "2007-03-14") = datediff(fl_date, date "2007-03-14"),
+ datediff(fl_time, timestamp "2007-03-14 00:00:00") = datediff(fl_date, timestamp "2007-03-14 00:00:00"),
+ datediff(fl_time, timestamp "2007-03-14 08:21:59") = datediff(fl_date, timestamp "2007-03-14 08:21:59"),
+ datediff(fl_date, "2000-01-01") = datediff(fl_date, date "2000-01-01"),
+ datediff(fl_date, "2007-03-14") = datediff(fl_date, date "2007-03-14")
FROM date_udf_flight_orc;
EXPLAIN SELECT
http://git-wip-us.apache.org/repos/asf/hive/blob/6ef0b507/ql/src/test/results/clientpositive/date_udf.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/date_udf.q.out b/ql/src/test/results/clientpositive/date_udf.q.out
index 9b37da6..2037367 100644
--- a/ql/src/test/results/clientpositive/date_udf.q.out
+++ b/ql/src/test/results/clientpositive/date_udf.q.out
@@ -114,7 +114,7 @@ POSTHOOK: query: select datediff(d, d), datediff(d, '2002-03-21'), datediff('200
POSTHOOK: type: QUERY
POSTHOOK: Input: default@date_udf
#### A masked pattern was here ####
-0 3333 -3333 -3332 3332
+0 3333 -3333 -3333 3333
PREHOOK: query: -- Test UDFs with string input
select unix_timestamp(d), year(d), month(d), day(d), dayofmonth(d),
weekofyear(d), to_date(d)
@@ -139,21 +139,43 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@date_udf_string
#### A masked pattern was here ####
2011-05-11 2011-04-26
-PREHOOK: query: select datediff(d, d), datediff(d, '2002-03-21'), datediff('2002-03-21', d),
+PREHOOK: query: select
+ datediff(d, d),
+ datediff(d, '2002-03-21'),
+ datediff(d, date '2002-03-21'),
+ datediff('2002-03-21', d),
+ datediff(date '2002-03-21', d),
datediff('2002-03-21 00:00:00', d),
- datediff(d, '2002-03-21 00:00:00')
+ datediff(timestamp '2002-03-21 00:00:00', d),
+ datediff(d, '2002-03-21 00:00:00'),
+ datediff(d, timestamp '2002-03-21 00:00:00'),
+ datediff('2002-03-21 08:01:59', d),
+ datediff(timestamp '2002-03-21 08:01:59', d),
+ datediff(d, '2002-03-21 08:01:59'),
+ datediff(d, timestamp '2002-03-21 08:01:59')
from date_udf_string
PREHOOK: type: QUERY
PREHOOK: Input: default@date_udf_string
#### A masked pattern was here ####
-POSTHOOK: query: select datediff(d, d), datediff(d, '2002-03-21'), datediff('2002-03-21', d),
+POSTHOOK: query: select
+ datediff(d, d),
+ datediff(d, '2002-03-21'),
+ datediff(d, date '2002-03-21'),
+ datediff('2002-03-21', d),
+ datediff(date '2002-03-21', d),
datediff('2002-03-21 00:00:00', d),
- datediff(d, '2002-03-21 00:00:00')
+ datediff(timestamp '2002-03-21 00:00:00', d),
+ datediff(d, '2002-03-21 00:00:00'),
+ datediff(d, timestamp '2002-03-21 00:00:00'),
+ datediff('2002-03-21 08:01:59', d),
+ datediff(timestamp '2002-03-21 08:01:59', d),
+ datediff(d, '2002-03-21 08:01:59'),
+ datediff(d, timestamp '2002-03-21 08:01:59')
from date_udf_string
POSTHOOK: type: QUERY
POSTHOOK: Input: default@date_udf_string
#### A masked pattern was here ####
-0 3333 -3333 -3333 3333
+0 3333 3333 -3333 -3333 -3333 -3333 3333 3333 -3333 -3333 3333 3333
PREHOOK: query: select
to_utc_timestamp(date '1970-01-01', 'America/Los_Angeles'),
from_utc_timestamp(date '1970-01-01', 'America/Los_Angeles'),