You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kg...@apache.org on 2018/08/27 09:54:02 UTC

[2/4] hive git commit: HIVE-20013: Add an Implicit cast to date type for to_date function (Nishant Bangarwa via Ashutosh Chauhan)

HIVE-20013: Add an Implicit cast to date type for to_date function (Nishant Bangarwa via Ashutosh Chauhan)

Signed-off-by: Zoltan Haindrich <ki...@rxd.hu>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/826e5532
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/826e5532
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/826e5532

Branch: refs/heads/master
Commit: 826e5532aae16e57cfc82eb4f39a96122a88486b
Parents: fb7a676
Author: Nishant Bangarwa <ni...@gmail.com>
Authored: Mon Aug 27 11:06:09 2018 +0200
Committer: Zoltan Haindrich <ki...@rxd.hu>
Committed: Mon Aug 27 11:53:49 2018 +0200

----------------------------------------------------------------------
 .../calcite/translator/RexNodeConverter.java    | 19 +++++
 .../clientpositive/druidmini_expressions.q      | 22 ++++++
 .../druid/druidmini_expressions.q.out           | 79 ++++++++++++++++++++
 .../clientpositive/fold_eq_with_case_when.q.out |  4 +-
 .../test/results/clientpositive/llap/kryo.q.out |  2 +-
 5 files changed, 123 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/826e5532/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
index bc47969..f60091b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
@@ -62,6 +62,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.Unsu
 import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExtractDate;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFloorDate;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveToDateSqlOperator;
 import org.apache.hadoop.hive.ql.parse.ParseUtils;
 import org.apache.hadoop.hive.ql.parse.RowResolver;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
@@ -353,6 +354,8 @@ public class RexNodeConverter {
           childRexNodeLst = rewriteInClauseChildren(calciteOp, childRexNodeLst);
           calciteOp = SqlStdOperatorTable.OR;
         }
+      } else if (calciteOp == HiveToDateSqlOperator.INSTANCE) {
+        childRexNodeLst = rewriteToDateChildren(childRexNodeLst);
       }
       expr = cluster.getRexBuilder().makeCall(retType, calciteOp, childRexNodeLst);
     } else {
@@ -534,6 +537,22 @@ public class RexNodeConverter {
     return newChildRexNodeLst;
   }
 
+
+  private List<RexNode> rewriteToDateChildren(List<RexNode> childRexNodeLst) {
+    List<RexNode> newChildRexNodeLst = new ArrayList<RexNode>();
+    assert childRexNodeLst.size() == 1;
+    RexNode child = childRexNodeLst.get(0);
+    if (SqlTypeUtil.isDatetime(child.getType()) || SqlTypeUtil.isInterval(
+            child.getType())) {
+      newChildRexNodeLst.add(child);
+    } else {
+      newChildRexNodeLst.add(
+              cluster.getRexBuilder().makeCast(cluster.getTypeFactory().createSqlType(SqlTypeName.TIMESTAMP),
+                      child));
+    }
+    return newChildRexNodeLst;
+  }
+
   private List<RexNode> rewriteInClauseChildren(SqlOperator op, List<RexNode> childRexNodeLst)
       throws SemanticException {
     assert op.getKind() == SqlKind.IN;

http://git-wip-us.apache.org/repos/asf/hive/blob/826e5532/ql/src/test/queries/clientpositive/druidmini_expressions.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/druidmini_expressions.q b/ql/src/test/queries/clientpositive/druidmini_expressions.q
index 273c803..9723585 100644
--- a/ql/src/test/queries/clientpositive/druidmini_expressions.q
+++ b/ql/src/test/queries/clientpositive/druidmini_expressions.q
@@ -118,6 +118,7 @@ EXPLAIN SELECT SUM((`druid_table_alias`.`cdouble` * `druid_table_alias`.`cdouble
 FROM `default`.`druid_table_n0` `druid_table_alias`
 GROUP BY CAST(TRUNC(CAST(`druid_table_alias`.`__time` AS TIMESTAMP),'MM') AS DATE);
 
+
 SELECT SUM((`druid_table_alias`.`cdouble` * `druid_table_alias`.`cdouble`)) AS `sum_calculation_4998925219892510720_ok`,
   CAST(TRUNC(CAST(`druid_table_alias`.`__time` AS TIMESTAMP),'MM') AS DATE) AS `tmn___time_ok`
 FROM `default`.`druid_table_n0` `druid_table_alias`
@@ -139,3 +140,24 @@ SELECT DATE_ADD(cast(`__time` as date), CAST((cdouble / 1000) AS INT)) as date_1
  EXPLAIN SELECT ctinyint > 2, count(*) from druid_table_n0 GROUP BY ctinyint > 2;
 
 DROP TABLE druid_table_n0;
+
+-- Tests for testing handling of date/time funtions on druid dimensions stored as strings
+CREATE TABLE druid_table_n1
+STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler'
+TBLPROPERTIES ("druid.segment.granularity" = "HOUR", "druid.query.granularity" = "MINUTE")
+AS
+  SELECT cast (current_timestamp() as timestamp with local time zone) as `__time`,
+cast(datetime1 as string) as datetime1,
+cast(date1 as string) as date1,
+cast(time1 as string) as time1
+FROM TABLE (
+VALUES
+('2004-04-09 22:20:14', '2004-04-09','22:20:14'),
+('2004-04-04 22:50:16', '2004-04-04', '22:50:16'),
+('2004-04-12 04:40:49', '2004-04-12', '04:40:49'),
+('2004-04-11 00:00:00', '2004-04-11', null),
+('00:00:00 18:58:41', null, '18:58:41')) as q (datetime1, date1, time1);
+
+EXPLAIN SELECT TO_DATE(date1), TO_DATE(datetime1) FROM druid_table_n1;
+
+SELECT TO_DATE(date1), TO_DATE(datetime1) FROM druid_table_n1;

http://git-wip-us.apache.org/repos/asf/hive/blob/826e5532/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out b/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out
index 9ffcdd8..45f2f4d 100644
--- a/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out
+++ b/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out
@@ -1404,3 +1404,82 @@ POSTHOOK: query: DROP TABLE druid_table_n0
 POSTHOOK: type: DROPTABLE
 POSTHOOK: Input: default@druid_table_n0
 POSTHOOK: Output: default@druid_table_n0
+PREHOOK: query: CREATE TABLE druid_table_n1
+STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler'
+TBLPROPERTIES ("druid.segment.granularity" = "HOUR", "druid.query.granularity" = "MINUTE")
+AS
+  SELECT cast (current_timestamp() as timestamp with local time zone) as `__time`,
+cast(datetime1 as string) as datetime1,
+cast(date1 as string) as date1,
+cast(time1 as string) as time1
+FROM TABLE (
+VALUES
+('2004-04-09 22:20:14', '2004-04-09','22:20:14'),
+('2004-04-04 22:50:16', '2004-04-04', '22:50:16'),
+('2004-04-12 04:40:49', '2004-04-12', '04:40:49'),
+('2004-04-11 00:00:00', '2004-04-11', null),
+('00:00:00 18:58:41', null, '18:58:41')) as q (datetime1, date1, time1)
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: database:default
+PREHOOK: Output: default@druid_table_n1
+POSTHOOK: query: CREATE TABLE druid_table_n1
+STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler'
+TBLPROPERTIES ("druid.segment.granularity" = "HOUR", "druid.query.granularity" = "MINUTE")
+AS
+  SELECT cast (current_timestamp() as timestamp with local time zone) as `__time`,
+cast(datetime1 as string) as datetime1,
+cast(date1 as string) as date1,
+cast(time1 as string) as time1
+FROM TABLE (
+VALUES
+('2004-04-09 22:20:14', '2004-04-09','22:20:14'),
+('2004-04-04 22:50:16', '2004-04-04', '22:50:16'),
+('2004-04-12 04:40:49', '2004-04-12', '04:40:49'),
+('2004-04-11 00:00:00', '2004-04-11', null),
+('00:00:00 18:58:41', null, '18:58:41')) as q (datetime1, date1, time1)
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@druid_table_n1
+POSTHOOK: Lineage: druid_table_n1.__time SIMPLE []
+POSTHOOK: Lineage: druid_table_n1.date1 SCRIPT []
+POSTHOOK: Lineage: druid_table_n1.datetime1 SCRIPT []
+POSTHOOK: Lineage: druid_table_n1.time1 SCRIPT []
+PREHOOK: query: EXPLAIN SELECT TO_DATE(date1), TO_DATE(datetime1) FROM druid_table_n1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT TO_DATE(date1), TO_DATE(datetime1) FROM druid_table_n1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: druid_table_n1
+          properties:
+            druid.fieldNames vc,vc0
+            druid.fieldTypes date,date
+            druid.query.json {"queryType":"scan","dataSource":"default.druid_table_n1","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_floor(timestamp_parse(\"date1\",'','UTC'),'P1D','','UTC')","outputType":"LONG"},{"type":"expression","name":"vc0","expression":"timestamp_floor(timestamp_parse(\"datetime1\",'','UTC'),'P1D','','UTC')","outputType":"LONG"}],"columns":["vc","vc0"],"resultFormat":"compactedList"}
+            druid.query.type scan
+          Select Operator
+            expressions: vc (type: date), vc0 (type: date)
+            outputColumnNames: _col0, _col1
+            ListSink
+
+PREHOOK: query: SELECT TO_DATE(date1), TO_DATE(datetime1) FROM druid_table_n1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@druid_table_n1
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT TO_DATE(date1), TO_DATE(datetime1) FROM druid_table_n1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@druid_table_n1
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+1970-01-01	1970-01-01
+2004-04-04	2004-04-04
+2004-04-09	2004-04-09
+2004-04-11	2004-04-11
+2004-04-12	2004-04-12

http://git-wip-us.apache.org/repos/asf/hive/blob/826e5532/ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out b/ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out
index cb4d65c..0e1f2c6 100644
--- a/ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out
+++ b/ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out
@@ -40,10 +40,10 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: lineitem
-            filterExpr: ((DATE'1996-03-30' = to_date(l_shipdate)) and (l_shipmode = 'RAIL')) (type: boolean)
+            filterExpr: ((DATE'1996-03-30' = to_date(CAST( l_shipdate AS TIMESTAMP))) and (l_shipmode = 'RAIL')) (type: boolean)
             Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: ((DATE'1996-03-30' = to_date(l_shipdate)) and (l_shipmode = 'RAIL')) (type: boolean)
+              predicate: ((DATE'1996-03-30' = to_date(CAST( l_shipdate AS TIMESTAMP))) and (l_shipmode = 'RAIL')) (type: boolean)
               Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: l_orderkey (type: int), (UDFToDouble(l_partkey) / 1000000.0D) (type: double)

http://git-wip-us.apache.org/repos/asf/hive/blob/826e5532/ql/src/test/results/clientpositive/llap/kryo.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/kryo.q.out b/ql/src/test/results/clientpositive/llap/kryo.q.out
index 764a914..81da38a 100644
--- a/ql/src/test/results/clientpositive/llap/kryo.q.out
+++ b/ql/src/test/results/clientpositive/llap/kryo.q.out
@@ -50,7 +50,7 @@ STAGE PLANS:
                     predicate: (id) IN (1, 2, 3, 4, 5, 6) (type: boolean)
                     Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: if(((id = 1) or (id = 2)), if((id = 1), date_, date_), if((id = 3), CASE WHEN ((date_ is null or to_date(datetime) is null)) THEN (null) WHEN ((CAST( date_ AS DATE) > to_date(datetime))) THEN (date_) ELSE (to_date(datetime)) END, null)) (type: string), id (type: int), CASE WHEN ((id = 6)) THEN (CASE WHEN ((concat(date_, ' 00:00:00') is null or datetime is null)) THEN (null) WHEN ((concat(date_, ' 00:00:00') > datetime)) THEN (concat(date_, ' 00:00:00')) ELSE (datetime) END) WHEN ((id = 5)) THEN (CASE WHEN ((date_ is null or datetime is null)) THEN (null) WHEN ((date_ > datetime)) THEN (date_) ELSE (datetime) END) WHEN ((id = 3)) THEN (concat(date_, ' 00:00:00')) WHEN ((id = 4)) THEN (concat(date_, ' 00:00:00')) WHEN ((id = 1)) THEN (date_) WHEN ((id = 2)) THEN (date_) ELSE (null) END (type: string)
+                      expressions: if(((id = 1) or (id = 2)), if((id = 1), date_, date_), if((id = 3), CASE WHEN ((date_ is null or to_date(CAST( datetime AS TIMESTAMP)) is null)) THEN (null) WHEN ((CAST( date_ AS DATE) > to_date(CAST( datetime AS TIMESTAMP)))) THEN (date_) ELSE (to_date(CAST( datetime AS TIMESTAMP))) END, null)) (type: string), id (type: int), CASE WHEN ((id = 6)) THEN (CASE WHEN ((concat(date_, ' 00:00:00') is null or datetime is null)) THEN (null) WHEN ((concat(date_, ' 00:00:00') > datetime)) THEN (concat(date_, ' 00:00:00')) ELSE (datetime) END) WHEN ((id = 5)) THEN (CASE WHEN ((date_ is null or datetime is null)) THEN (null) WHEN ((date_ > datetime)) THEN (date_) ELSE (datetime) END) WHEN ((id = 3)) THEN (concat(date_, ' 00:00:00')) WHEN ((id = 4)) THEN (concat(date_, ' 00:00:00')) WHEN ((id = 1)) THEN (date_) WHEN ((id = 2)) THEN (date_) ELSE (null) END (type: string)
                       outputColumnNames: _col0, _col1, _col2
                       Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator