You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2019/04/29 19:54:53 UTC

[hive] branch master updated: HIVE-21619: Print timestamp type without precision in SQL explain extended (Jesus Camacho Rodriguez, reviewed by Gopal V)

This is an automated email from the ASF dual-hosted git repository.

jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 0c38cee  HIVE-21619: Print timestamp type without precision in SQL explain extended (Jesus Camacho Rodriguez, reviewed by Gopal V)
0c38cee is described below

commit 0c38cee599d6da6b3bbdfda2a2c1bb4c91759f47
Author: Jesus Camacho Rodriguez <jc...@apache.org>
AuthorDate: Tue Apr 16 13:27:09 2019 -0700

    HIVE-21619: Print timestamp type without precision in SQL explain extended (Jesus Camacho Rodriguez, reviewed by Gopal V)
---
 .../hadoop/hive/ql/parse/CalcitePlanner.java       |  12 +-
 ql/src/test/queries/clientpositive/timestamp.q     |   2 +
 .../kafka/kafka_storage_handler.q.out              |   8 +-
 .../llap/current_date_timestamp.q.out              |   2 +-
 ql/src/test/results/clientpositive/timestamp.q.out | 172 +++++++++++++++++++++
 .../test/results/clientpositive/udf_reflect2.q.out |   2 +-
 6 files changed, 190 insertions(+), 8 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index a7cdddb..f5c5a10 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -26,6 +26,7 @@ import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Multimap;
 
+import java.util.regex.Pattern;
 import org.antlr.runtime.ClassicToken;
 import org.antlr.runtime.CommonToken;
 import org.antlr.runtime.tree.Tree;
@@ -49,7 +50,6 @@ import org.apache.calcite.plan.RelOptCost;
 import org.apache.calcite.plan.RelOptMaterialization;
 import org.apache.calcite.plan.RelOptPlanner;
 import org.apache.calcite.plan.RelOptRule;
-import org.apache.calcite.plan.RelOptRuleCall;
 import org.apache.calcite.plan.RelOptSchema;
 import org.apache.calcite.plan.RelOptUtil;
 import org.apache.calcite.plan.RelTraitSet;
@@ -342,6 +342,12 @@ public class CalcitePlanner extends SemanticAnalyzer {
   private static final CommonToken SUBQUERY_TOKEN =
       new ImmutableCommonToken(HiveParser.TOK_SUBQUERY, "TOK_SUBQUERY");
 
+  private static final Pattern PATTERN_VARCHAR =
+      Pattern.compile("VARCHAR\\(2147483647\\)");
+  private static final Pattern PATTERN_TIMESTAMP =
+      Pattern.compile("TIMESTAMP\\(9\\)");
+
+
   public CalcitePlanner(QueryState queryState) throws SemanticException {
     super(queryState);
     if (!HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_ENABLED)) {
@@ -1516,7 +1522,9 @@ public class CalcitePlanner extends SemanticAnalyzer {
               .getTypeFactory());
       final JdbcImplementor.Result result = jdbcImplementor.visitChild(0, optimizedOptiqPlan);
       String sql = result.asStatement().toSqlString(dialect).getSql();
-      return sql.replaceAll("VARCHAR\\(2147483647\\)", "STRING");
+      sql = PATTERN_VARCHAR.matcher(sql).replaceAll("STRING"); // VARCHAR(INTEGER.MAX) -> STRING
+      sql = PATTERN_TIMESTAMP.matcher(sql).replaceAll("TIMESTAMP"); // TIMESTAMP(9) -> TIMESTAMP
+      return sql;
     } catch (Exception ex) {
       LOG.warn("Rel2SQL Rewrite threw error", ex);
     }
diff --git a/ql/src/test/queries/clientpositive/timestamp.q b/ql/src/test/queries/clientpositive/timestamp.q
index 1453cf8..f3f09b3 100644
--- a/ql/src/test/queries/clientpositive/timestamp.q
+++ b/ql/src/test/queries/clientpositive/timestamp.q
@@ -4,6 +4,8 @@ set hive.mapred.mode=nonstrict;
 
 explain select cast('2011-01-01 01:01:01' as timestamp) as c from src union select cast('2011-01-01 01:01:01' as timestamp) as c from src limit 5;
 
+explain extended select cast('2011-01-01 01:01:01' as timestamp) as c from src union select cast('2011-01-01 01:01:01' as timestamp) as c from src limit 5;
+
 select cast('2011-01-01 01:01:01' as timestamp) as c from src union select cast('2011-01-01 01:01:01' as timestamp) as c from src limit 5;
 
 explain select cast('2011-01-01 01:01:01.123' as timestamp) as c from src union select cast('2011-01-01 01:01:01.123' as timestamp) as c from src limit 5;
diff --git a/ql/src/test/results/clientpositive/kafka/kafka_storage_handler.q.out b/ql/src/test/results/clientpositive/kafka/kafka_storage_handler.q.out
index e9f9648..7b71851 100644
--- a/ql/src/test/results/clientpositive/kafka/kafka_storage_handler.q.out
+++ b/ql/src/test/results/clientpositive/kafka/kafka_storage_handler.q.out
@@ -1075,9 +1075,9 @@ POSTHOOK: query: explain extended select distinct `__offset`, cast(`__timestamp`
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@wiki_kafka_avro_table
 POSTHOOK: Output: hdfs://### HDFS PATH ###
-OPTIMIZED SQL: SELECT `__offset` AS `$f0`, CAST(`__timestamp` AS TIMESTAMP(9)) AS `$f1`, `__key` AS `$f2`
+OPTIMIZED SQL: SELECT `__offset` AS `$f0`, CAST(`__timestamp` AS TIMESTAMP) AS `$f1`, `__key` AS `$f2`
 FROM `default`.`wiki_kafka_avro_table`
-GROUP BY `__offset`, CAST(`__timestamp` AS TIMESTAMP(9)), `__key`
+GROUP BY `__offset`, CAST(`__timestamp` AS TIMESTAMP), `__key`
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -1398,9 +1398,9 @@ POSTHOOK: query: explain extended select distinct `__offset`, cast(`__timestamp`
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@wiki_kafka_avro_table
 POSTHOOK: Output: hdfs://### HDFS PATH ###
-OPTIMIZED SQL: SELECT `__offset` AS `$f0`, CAST(`__timestamp` AS TIMESTAMP(9)) AS `$f1`, `__key` AS `$f2`
+OPTIMIZED SQL: SELECT `__offset` AS `$f0`, CAST(`__timestamp` AS TIMESTAMP) AS `$f1`, `__key` AS `$f2`
 FROM `default`.`wiki_kafka_avro_table`
-GROUP BY `__offset`, CAST(`__timestamp` AS TIMESTAMP(9)), `__key`
+GROUP BY `__offset`, CAST(`__timestamp` AS TIMESTAMP), `__key`
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
diff --git a/ql/src/test/results/clientpositive/llap/current_date_timestamp.q.out b/ql/src/test/results/clientpositive/llap/current_date_timestamp.q.out
index 12c0a6e..2f0e9e1 100644
--- a/ql/src/test/results/clientpositive/llap/current_date_timestamp.q.out
+++ b/ql/src/test/results/clientpositive/llap/current_date_timestamp.q.out
@@ -59,7 +59,7 @@ POSTHOOK: query: explain extended select current_timestamp() from alltypesorc
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@alltypesorc
 #### A masked pattern was here ####
-OPTIMIZED SQL: SELECT CAST(TIMESTAMP '2012-01-01 01:02:03.000000000' AS TIMESTAMP(9)) AS `$f0`
+OPTIMIZED SQL: SELECT CAST(TIMESTAMP '2012-01-01 01:02:03.000000000' AS TIMESTAMP) AS `$f0`
 FROM `default`.`alltypesorc`
 STAGE DEPENDENCIES:
   Stage-0 is a root stage
diff --git a/ql/src/test/results/clientpositive/timestamp.q.out b/ql/src/test/results/clientpositive/timestamp.q.out
index c59b518..aaa9998 100644
--- a/ql/src/test/results/clientpositive/timestamp.q.out
+++ b/ql/src/test/results/clientpositive/timestamp.q.out
@@ -83,6 +83,178 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+PREHOOK: query: explain extended select cast('2011-01-01 01:01:01' as timestamp) as c from src union select cast('2011-01-01 01:01:01' as timestamp) as c from src limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: explain extended select cast('2011-01-01 01:01:01' as timestamp) as c from src union select cast('2011-01-01 01:01:01' as timestamp) as c from src limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+OPTIMIZED SQL: SELECT CAST(TIMESTAMP '2011-01-01 01:01:01.000000000' AS TIMESTAMP) AS `c`
+FROM (SELECT CAST(TIMESTAMP '2011-01-01 01:01:01.000000000' AS TIMESTAMP) AS `$f0`
+FROM `default`.`src`
+UNION ALL
+SELECT CAST(TIMESTAMP '2011-01-01 01:01:01.000000000' AS TIMESTAMP) AS `$f0`
+FROM `default`.`src`) AS `t1`
+GROUP BY TRUE
+LIMIT 5
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
+            GatherStats: false
+            Select Operator
+              Statistics: Num rows: 500 Data size: 20000 Basic stats: COMPLETE Column stats: COMPLETE
+              Union
+                Statistics: Num rows: 1000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  Statistics: Num rows: 1000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    keys: true (type: boolean)
+                    minReductionHashAggr: 0.99
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: boolean)
+                      null sort order: a
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: boolean)
+                      Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+                      tag: -1
+                      TopN: 5
+                      TopN Hash Memory Usage: 0.1
+                      auto parallelism: false
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
+            GatherStats: false
+            Select Operator
+              Statistics: Num rows: 500 Data size: 20000 Basic stats: COMPLETE Column stats: COMPLETE
+              Union
+                Statistics: Num rows: 1000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  Statistics: Num rows: 1000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    keys: true (type: boolean)
+                    minReductionHashAggr: 0.99
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: boolean)
+                      null sort order: a
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: boolean)
+                      Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+                      tag: -1
+                      TopN: 5
+                      TopN Hash Memory Usage: 0.1
+                      auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: src
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            properties:
+              COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
+              bucket_count -1
+              bucketing_version 2
+              column.name.delimiter ,
+              columns key,value
+              columns.comments 'default','default'
+              columns.types string:string
+#### A masked pattern was here ####
+              name default.src
+              numFiles 1
+              numRows 500
+              rawDataSize 5312
+              serialization.ddl struct src { string key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 5812
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
+                bucket_count -1
+                bucketing_version 2
+                column.name.delimiter ,
+                columns key,value
+                columns.comments 'default','default'
+                columns.types string:string
+#### A masked pattern was here ####
+                name default.src
+                numFiles 1
+                numRows 500
+                rawDataSize 5312
+                serialization.ddl struct src { string key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                totalSize 5812
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.src
+            name: default.src
+      Truncated Path -> Alias:
+        /src [$hdt$_0:$hdt$_0-subquery1:src, $hdt$_0:$hdt$_0-subquery2:src]
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: boolean)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+          Select Operator
+            expressions: TIMESTAMP'2011-01-01 01:01:01' (type: timestamp)
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+            Limit
+              Number of rows: 5
+              Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+#### A masked pattern was here ####
+                NumFilesPerFileSink: 1
+                Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    properties:
+                      columns _col0
+                      columns.types timestamp
+                      escape.delim \
+                      hive.serialization.extend.additional.nesting.levels true
+                      serialization.escape.crlf true
+                      serialization.format 1
+                      serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                TotalFiles: 1
+                GatherStats: false
+                MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 5
+      Processor Tree:
+        ListSink
+
 PREHOOK: query: select cast('2011-01-01 01:01:01' as timestamp) as c from src union select cast('2011-01-01 01:01:01' as timestamp) as c from src limit 5
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
diff --git a/ql/src/test/results/clientpositive/udf_reflect2.q.out b/ql/src/test/results/clientpositive/udf_reflect2.q.out
index 02461fd..7f03ec7 100644
--- a/ql/src/test/results/clientpositive/udf_reflect2.q.out
+++ b/ql/src/test/results/clientpositive/udf_reflect2.q.out
@@ -84,7 +84,7 @@ FROM (select cast(key as int) key, value, cast('2013-02-15 19:41:20' as timestam
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 #### A masked pattern was here ####
-OPTIMIZED SQL: SELECT CAST(`key` AS INTEGER) AS `key`, REFLECT2(CAST(`key` AS INTEGER), 'byteValue') AS `_o__c1`, REFLECT2(CAST(`key` AS INTEGER), 'shortValue') AS `_o__c2`, REFLECT2(CAST(`key` AS INTEGER), 'intValue') AS `_o__c3`, REFLECT2(CAST(`key` AS INTEGER), 'longValue') AS `_o__c4`, REFLECT2(CAST(`key` AS INTEGER), 'floatValue') AS `_o__c5`, REFLECT2(CAST(`key` AS INTEGER), 'doubleValue') AS `_o__c6`, REFLECT2(CAST(`key` AS INTEGER), 'toString') AS `_o__c7`, `value`, REFLECT2(`val [...]
+OPTIMIZED SQL: SELECT CAST(`key` AS INTEGER) AS `key`, REFLECT2(CAST(`key` AS INTEGER), 'byteValue') AS `_o__c1`, REFLECT2(CAST(`key` AS INTEGER), 'shortValue') AS `_o__c2`, REFLECT2(CAST(`key` AS INTEGER), 'intValue') AS `_o__c3`, REFLECT2(CAST(`key` AS INTEGER), 'longValue') AS `_o__c4`, REFLECT2(CAST(`key` AS INTEGER), 'floatValue') AS `_o__c5`, REFLECT2(CAST(`key` AS INTEGER), 'doubleValue') AS `_o__c6`, REFLECT2(CAST(`key` AS INTEGER), 'toString') AS `_o__c7`, `value`, REFLECT2(`val [...]
 FROM `default`.`src`
 LIMIT 5
 STAGE DEPENDENCIES: