You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by vg...@apache.org on 2020/07/14 21:03:59 UTC
[hive] branch branch-2 updated: HIVE-23807 Wrong results with
vectorization enabled (Vineet Garg,
reviewed by Jesus Camacho Rodriguez) (#1234)
This is an automated email from the ASF dual-hosted git repository.
vgarg pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/branch-2 by this push:
new 70bcaa8 HIVE-23807 Wrong results with vectorization enabled (Vineet Garg, reviewed by Jesus Camacho Rodriguez) (#1234)
70bcaa8 is described below
commit 70bcaa85eda16d3876bb4e3781c2ac030b821faa
Author: Vineet G <vg...@apache.org>
AuthorDate: Tue Jul 14 14:03:47 2020 -0700
HIVE-23807 Wrong results with vectorization enabled (Vineet Garg, reviewed by Jesus Camacho Rodriguez) (#1234)
---
.../test/resources/testconfiguration.properties | 1 +
.../exec/vector/expressions/CastStringToDate.java | 14 ++-
.../vectorization_cast_string_to_date.q | 33 ++++++
.../llap/vectorization_cast_string_to_date.q.out | 118 +++++++++++++++++++++
4 files changed, 161 insertions(+), 5 deletions(-)
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index deffd85..a1c7825 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -411,6 +411,7 @@ minillap.query.files=acid_bucket_pruning.q,\
bucket5.q,\
bucket6.q,\
except_distinct.q,\
+ vectorization_cast_string_to_date.q,\
explainuser_2.q,\
empty_dir_in_table.q,\
intersect_all.q,\
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java
index 4b176ae..2667708 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java
@@ -25,6 +25,8 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hive.common.util.DateParser;
+import java.sql.Date;
+
import java.nio.charset.StandardCharsets;
/**
@@ -116,14 +118,16 @@ public class CastStringToDate extends VectorExpression {
private void evaluate(LongColumnVector outV, BytesColumnVector inV, int i) {
String dateString = new String(inV.vector[i], inV.start[i], inV.length[i], StandardCharsets.UTF_8);
- if (dateParser.parseDate(dateString, sqlDate)) {
+ try {
+ Date utilDate = Date.valueOf(dateString);
+ sqlDate.setTime(utilDate.getTime());
outV.vector[i] = DateWritable.dateToDays(sqlDate);
return;
+ } catch (IllegalArgumentException e) {
+ outV.vector[i] = 1;
+ outV.isNull[i] = true;
+ outV.noNulls = false;
}
-
- outV.vector[i] = 1;
- outV.isNull[i] = true;
- outV.noNulls = false;
}
@Override
diff --git a/ql/src/test/queries/clientpositive/vectorization_cast_string_to_date.q b/ql/src/test/queries/clientpositive/vectorization_cast_string_to_date.q
new file mode 100644
index 0000000..0bc39dd
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vectorization_cast_string_to_date.q
@@ -0,0 +1,33 @@
+CREATE TABLE `test13`(
+ `portfolio_valuation_date` string,
+ `price_cut_off_datetime` string,
+ `portfolio_id_valuation_source` string,
+ `contributor_full_path` string,
+ `position_market_value` double,
+ `mandate_name` string)
+STORED AS ORC;
+
+INSERT INTO test13 values (
+"2020-01-31", "2020-02-07T03:14:48.007Z", "37", NULL, -0.26, "foo");
+
+INSERT INTO test13 values (
+"2020-01-31", "2020-02-07T03:14:48.007Z", "37", NULL, 0.33, "foo");
+
+INSERT INTO test13 values (
+"2020-01-31", "2020-02-07T03:14:48.007Z", "37", NULL, -0.03, "foo");
+
+INSERT INTO test13 values (
+"2020-01-31", "2020-02-07T03:14:48.007Z", "37", NULL, 0.16, "foo");
+
+INSERT INTO test13 values (
+"2020-01-31", "2020-02-07T03:14:48.007Z", "37", NULL, 0.08, "foo");
+
+set hive.fetch.task.conversion=none;
+set hive.explain.user=false;
+
+set hive.vectorized.execution.enabled=false;
+select Cast(`test13`.`price_cut_off_datetime` AS date) from test13;
+
+
+set hive.vectorized.execution.enabled=true;
+select Cast(`test13`.`price_cut_off_datetime` AS date) from test13;
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_cast_string_to_date.q.out b/ql/src/test/results/clientpositive/llap/vectorization_cast_string_to_date.q.out
new file mode 100644
index 0000000..4a5bd63
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vectorization_cast_string_to_date.q.out
@@ -0,0 +1,118 @@
+PREHOOK: query: CREATE TABLE `test13`(
+ `portfolio_valuation_date` string,
+ `price_cut_off_datetime` string,
+ `portfolio_id_valuation_source` string,
+ `contributor_full_path` string,
+ `position_market_value` double,
+ `mandate_name` string)
+STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test13
+POSTHOOK: query: CREATE TABLE `test13`(
+ `portfolio_valuation_date` string,
+ `price_cut_off_datetime` string,
+ `portfolio_id_valuation_source` string,
+ `contributor_full_path` string,
+ `position_market_value` double,
+ `mandate_name` string)
+STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test13
+PREHOOK: query: INSERT INTO test13 values (
+"2020-01-31", "2020-02-07T03:14:48.007Z", "37", NULL, -0.26, "foo")
+PREHOOK: type: QUERY
+PREHOOK: Output: default@test13
+POSTHOOK: query: INSERT INTO test13 values (
+"2020-01-31", "2020-02-07T03:14:48.007Z", "37", NULL, -0.26, "foo")
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@test13
+POSTHOOK: Lineage: test13.contributor_full_path SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ]
+POSTHOOK: Lineage: test13.mandate_name SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col6, type:string, comment:), ]
+POSTHOOK: Lineage: test13.portfolio_id_valuation_source SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+POSTHOOK: Lineage: test13.portfolio_valuation_date SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: test13.position_market_value EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col5, type:string, comment:), ]
+POSTHOOK: Lineage: test13.price_cut_off_datetime SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: INSERT INTO test13 values (
+"2020-01-31", "2020-02-07T03:14:48.007Z", "37", NULL, 0.33, "foo")
+PREHOOK: type: QUERY
+PREHOOK: Output: default@test13
+POSTHOOK: query: INSERT INTO test13 values (
+"2020-01-31", "2020-02-07T03:14:48.007Z", "37", NULL, 0.33, "foo")
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@test13
+POSTHOOK: Lineage: test13.contributor_full_path SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ]
+POSTHOOK: Lineage: test13.mandate_name SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col6, type:string, comment:), ]
+POSTHOOK: Lineage: test13.portfolio_id_valuation_source SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+POSTHOOK: Lineage: test13.portfolio_valuation_date SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: test13.position_market_value EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col5, type:string, comment:), ]
+POSTHOOK: Lineage: test13.price_cut_off_datetime SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: INSERT INTO test13 values (
+"2020-01-31", "2020-02-07T03:14:48.007Z", "37", NULL, -0.03, "foo")
+PREHOOK: type: QUERY
+PREHOOK: Output: default@test13
+POSTHOOK: query: INSERT INTO test13 values (
+"2020-01-31", "2020-02-07T03:14:48.007Z", "37", NULL, -0.03, "foo")
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@test13
+POSTHOOK: Lineage: test13.contributor_full_path SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ]
+POSTHOOK: Lineage: test13.mandate_name SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col6, type:string, comment:), ]
+POSTHOOK: Lineage: test13.portfolio_id_valuation_source SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+POSTHOOK: Lineage: test13.portfolio_valuation_date SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: test13.position_market_value EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col5, type:string, comment:), ]
+POSTHOOK: Lineage: test13.price_cut_off_datetime SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: INSERT INTO test13 values (
+"2020-01-31", "2020-02-07T03:14:48.007Z", "37", NULL, 0.16, "foo")
+PREHOOK: type: QUERY
+PREHOOK: Output: default@test13
+POSTHOOK: query: INSERT INTO test13 values (
+"2020-01-31", "2020-02-07T03:14:48.007Z", "37", NULL, 0.16, "foo")
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@test13
+POSTHOOK: Lineage: test13.contributor_full_path SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col4, type:string, comment:), ]
+POSTHOOK: Lineage: test13.mandate_name SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col6, type:string, comment:), ]
+POSTHOOK: Lineage: test13.portfolio_id_valuation_source SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+POSTHOOK: Lineage: test13.portfolio_valuation_date SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: test13.position_market_value EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col5, type:string, comment:), ]
+POSTHOOK: Lineage: test13.price_cut_off_datetime SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: INSERT INTO test13 values (
+"2020-01-31", "2020-02-07T03:14:48.007Z", "37", NULL, 0.08, "foo")
+PREHOOK: type: QUERY
+PREHOOK: Output: default@test13
+POSTHOOK: query: INSERT INTO test13 values (
+"2020-01-31", "2020-02-07T03:14:48.007Z", "37", NULL, 0.08, "foo")
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@test13
+POSTHOOK: Lineage: test13.contributor_full_path SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col4, type:string, comment:), ]
+POSTHOOK: Lineage: test13.mandate_name SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col6, type:string, comment:), ]
+POSTHOOK: Lineage: test13.portfolio_id_valuation_source SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+POSTHOOK: Lineage: test13.portfolio_valuation_date SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: test13.position_market_value EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col5, type:string, comment:), ]
+POSTHOOK: Lineage: test13.price_cut_off_datetime SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: select Cast(`test13`.`price_cut_off_datetime` AS date) from test13
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test13
+#### A masked pattern was here ####
+POSTHOOK: query: select Cast(`test13`.`price_cut_off_datetime` AS date) from test13
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test13
+#### A masked pattern was here ####
+NULL
+NULL
+NULL
+NULL
+NULL
+PREHOOK: query: select Cast(`test13`.`price_cut_off_datetime` AS date) from test13
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test13
+#### A masked pattern was here ####
+POSTHOOK: query: select Cast(`test13`.`price_cut_off_datetime` AS date) from test13
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test13
+#### A masked pattern was here ####
+NULL
+NULL
+NULL
+NULL
+NULL