You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by vg...@apache.org on 2020/07/14 21:03:59 UTC

[hive] branch branch-2 updated: HIVE-23807 Wrong results with vectorization enabled (Vineet Garg, reviewed by Jesus Camacho Rodriguez) (#1234)

This is an automated email from the ASF dual-hosted git repository.

vgarg pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/branch-2 by this push:
     new 70bcaa8  HIVE-23807 Wrong results with vectorization enabled (Vineet Garg, reviewed by Jesus Camacho Rodriguez) (#1234)
70bcaa8 is described below

commit 70bcaa85eda16d3876bb4e3781c2ac030b821faa
Author: Vineet G <vg...@apache.org>
AuthorDate: Tue Jul 14 14:03:47 2020 -0700

    HIVE-23807 Wrong results with vectorization enabled (Vineet Garg, reviewed by Jesus Camacho Rodriguez) (#1234)
---
 .../test/resources/testconfiguration.properties    |   1 +
 .../exec/vector/expressions/CastStringToDate.java  |  14 ++-
 .../vectorization_cast_string_to_date.q            |  33 ++++++
 .../llap/vectorization_cast_string_to_date.q.out   | 118 +++++++++++++++++++++
 4 files changed, 161 insertions(+), 5 deletions(-)

diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index deffd85..a1c7825 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -411,6 +411,7 @@ minillap.query.files=acid_bucket_pruning.q,\
   bucket5.q,\
   bucket6.q,\
   except_distinct.q,\
+  vectorization_cast_string_to_date.q,\
   explainuser_2.q,\
   empty_dir_in_table.q,\
   intersect_all.q,\
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java
index 4b176ae..2667708 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java
@@ -25,6 +25,8 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.serde2.io.DateWritable;
 import org.apache.hive.common.util.DateParser;
 
+import java.sql.Date;
+
 import java.nio.charset.StandardCharsets;
 
 /**
@@ -116,14 +118,16 @@ public class CastStringToDate extends VectorExpression {
 
   private void evaluate(LongColumnVector outV, BytesColumnVector inV, int i) {
     String dateString = new String(inV.vector[i], inV.start[i], inV.length[i], StandardCharsets.UTF_8);
-    if (dateParser.parseDate(dateString, sqlDate)) {
+    try {
+      Date utilDate = Date.valueOf(dateString);
+      sqlDate.setTime(utilDate.getTime());
       outV.vector[i] = DateWritable.dateToDays(sqlDate);
       return;
+    } catch (IllegalArgumentException e) {
+      outV.vector[i] = 1;
+      outV.isNull[i] = true;
+      outV.noNulls = false;
     }
-
-    outV.vector[i] = 1;
-    outV.isNull[i] = true;
-    outV.noNulls = false;
   }
 
   @Override
diff --git a/ql/src/test/queries/clientpositive/vectorization_cast_string_to_date.q b/ql/src/test/queries/clientpositive/vectorization_cast_string_to_date.q
new file mode 100644
index 0000000..0bc39dd
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vectorization_cast_string_to_date.q
@@ -0,0 +1,33 @@
+CREATE TABLE `test13`(
+  `portfolio_valuation_date` string,
+  `price_cut_off_datetime` string,
+  `portfolio_id_valuation_source` string,
+  `contributor_full_path` string,
+  `position_market_value` double,
+  `mandate_name` string)
+STORED AS ORC;
+
+INSERT INTO test13 values (
+"2020-01-31",	"2020-02-07T03:14:48.007Z",	"37",	NULL,	-0.26,	"foo");
+
+INSERT INTO test13 values (
+"2020-01-31",	"2020-02-07T03:14:48.007Z",	"37",	NULL,	0.33,	"foo");
+
+INSERT INTO test13 values (
+"2020-01-31",	"2020-02-07T03:14:48.007Z",	"37",	NULL,	-0.03,	"foo");
+
+INSERT INTO test13 values (
+"2020-01-31",	"2020-02-07T03:14:48.007Z",	"37",	NULL,	0.16,	"foo");
+
+INSERT INTO test13 values (
+"2020-01-31",	"2020-02-07T03:14:48.007Z",	"37",	NULL,	0.08,	"foo");
+
+set hive.fetch.task.conversion=none;
+set hive.explain.user=false;
+
+set hive.vectorized.execution.enabled=false;
+select Cast(`test13`.`price_cut_off_datetime` AS date) from test13;
+
+
+set hive.vectorized.execution.enabled=true;
+select Cast(`test13`.`price_cut_off_datetime` AS date) from test13;
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_cast_string_to_date.q.out b/ql/src/test/results/clientpositive/llap/vectorization_cast_string_to_date.q.out
new file mode 100644
index 0000000..4a5bd63
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vectorization_cast_string_to_date.q.out
@@ -0,0 +1,118 @@
+PREHOOK: query: CREATE TABLE `test13`(
+  `portfolio_valuation_date` string,
+  `price_cut_off_datetime` string,
+  `portfolio_id_valuation_source` string,
+  `contributor_full_path` string,
+  `position_market_value` double,
+  `mandate_name` string)
+STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test13
+POSTHOOK: query: CREATE TABLE `test13`(
+  `portfolio_valuation_date` string,
+  `price_cut_off_datetime` string,
+  `portfolio_id_valuation_source` string,
+  `contributor_full_path` string,
+  `position_market_value` double,
+  `mandate_name` string)
+STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test13
+PREHOOK: query: INSERT INTO test13 values (
+"2020-01-31",	"2020-02-07T03:14:48.007Z",	"37",	NULL,	-0.26,	"foo")
+PREHOOK: type: QUERY
+PREHOOK: Output: default@test13
+POSTHOOK: query: INSERT INTO test13 values (
+"2020-01-31",	"2020-02-07T03:14:48.007Z",	"37",	NULL,	-0.26,	"foo")
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@test13
+POSTHOOK: Lineage: test13.contributor_full_path SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ]
+POSTHOOK: Lineage: test13.mandate_name SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col6, type:string, comment:), ]
+POSTHOOK: Lineage: test13.portfolio_id_valuation_source SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+POSTHOOK: Lineage: test13.portfolio_valuation_date SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: test13.position_market_value EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col5, type:string, comment:), ]
+POSTHOOK: Lineage: test13.price_cut_off_datetime SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: INSERT INTO test13 values (
+"2020-01-31",	"2020-02-07T03:14:48.007Z",	"37",	NULL,	0.33,	"foo")
+PREHOOK: type: QUERY
+PREHOOK: Output: default@test13
+POSTHOOK: query: INSERT INTO test13 values (
+"2020-01-31",	"2020-02-07T03:14:48.007Z",	"37",	NULL,	0.33,	"foo")
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@test13
+POSTHOOK: Lineage: test13.contributor_full_path SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ]
+POSTHOOK: Lineage: test13.mandate_name SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col6, type:string, comment:), ]
+POSTHOOK: Lineage: test13.portfolio_id_valuation_source SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+POSTHOOK: Lineage: test13.portfolio_valuation_date SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: test13.position_market_value EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col5, type:string, comment:), ]
+POSTHOOK: Lineage: test13.price_cut_off_datetime SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: INSERT INTO test13 values (
+"2020-01-31",	"2020-02-07T03:14:48.007Z",	"37",	NULL,	-0.03,	"foo")
+PREHOOK: type: QUERY
+PREHOOK: Output: default@test13
+POSTHOOK: query: INSERT INTO test13 values (
+"2020-01-31",	"2020-02-07T03:14:48.007Z",	"37",	NULL,	-0.03,	"foo")
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@test13
+POSTHOOK: Lineage: test13.contributor_full_path SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ]
+POSTHOOK: Lineage: test13.mandate_name SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col6, type:string, comment:), ]
+POSTHOOK: Lineage: test13.portfolio_id_valuation_source SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+POSTHOOK: Lineage: test13.portfolio_valuation_date SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: test13.position_market_value EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col5, type:string, comment:), ]
+POSTHOOK: Lineage: test13.price_cut_off_datetime SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: INSERT INTO test13 values (
+"2020-01-31",	"2020-02-07T03:14:48.007Z",	"37",	NULL,	0.16,	"foo")
+PREHOOK: type: QUERY
+PREHOOK: Output: default@test13
+POSTHOOK: query: INSERT INTO test13 values (
+"2020-01-31",	"2020-02-07T03:14:48.007Z",	"37",	NULL,	0.16,	"foo")
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@test13
+POSTHOOK: Lineage: test13.contributor_full_path SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col4, type:string, comment:), ]
+POSTHOOK: Lineage: test13.mandate_name SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col6, type:string, comment:), ]
+POSTHOOK: Lineage: test13.portfolio_id_valuation_source SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+POSTHOOK: Lineage: test13.portfolio_valuation_date SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: test13.position_market_value EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col5, type:string, comment:), ]
+POSTHOOK: Lineage: test13.price_cut_off_datetime SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: INSERT INTO test13 values (
+"2020-01-31",	"2020-02-07T03:14:48.007Z",	"37",	NULL,	0.08,	"foo")
+PREHOOK: type: QUERY
+PREHOOK: Output: default@test13
+POSTHOOK: query: INSERT INTO test13 values (
+"2020-01-31",	"2020-02-07T03:14:48.007Z",	"37",	NULL,	0.08,	"foo")
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@test13
+POSTHOOK: Lineage: test13.contributor_full_path SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col4, type:string, comment:), ]
+POSTHOOK: Lineage: test13.mandate_name SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col6, type:string, comment:), ]
+POSTHOOK: Lineage: test13.portfolio_id_valuation_source SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+POSTHOOK: Lineage: test13.portfolio_valuation_date SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: test13.position_market_value EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col5, type:string, comment:), ]
+POSTHOOK: Lineage: test13.price_cut_off_datetime SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: select Cast(`test13`.`price_cut_off_datetime` AS date) from test13
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test13
+#### A masked pattern was here ####
+POSTHOOK: query: select Cast(`test13`.`price_cut_off_datetime` AS date) from test13
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test13
+#### A masked pattern was here ####
+NULL
+NULL
+NULL
+NULL
+NULL
+PREHOOK: query: select Cast(`test13`.`price_cut_off_datetime` AS date) from test13
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test13
+#### A masked pattern was here ####
+POSTHOOK: query: select Cast(`test13`.`price_cut_off_datetime` AS date) from test13
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test13
+#### A masked pattern was here ####
+NULL
+NULL
+NULL
+NULL
+NULL