You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2019/04/29 19:54:53 UTC
[hive] branch master updated: HIVE-21619: Print timestamp type
without precision in SQL explain extended (Jesus Camacho Rodriguez,
reviewed by Gopal V)
This is an automated email from the ASF dual-hosted git repository.
jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 0c38cee HIVE-21619: Print timestamp type without precision in SQL explain extended (Jesus Camacho Rodriguez, reviewed by Gopal V)
0c38cee is described below
commit 0c38cee599d6da6b3bbdfda2a2c1bb4c91759f47
Author: Jesus Camacho Rodriguez <jc...@apache.org>
AuthorDate: Tue Apr 16 13:27:09 2019 -0700
HIVE-21619: Print timestamp type without precision in SQL explain extended (Jesus Camacho Rodriguez, reviewed by Gopal V)
---
.../hadoop/hive/ql/parse/CalcitePlanner.java | 12 +-
ql/src/test/queries/clientpositive/timestamp.q | 2 +
.../kafka/kafka_storage_handler.q.out | 8 +-
.../llap/current_date_timestamp.q.out | 2 +-
ql/src/test/results/clientpositive/timestamp.q.out | 172 +++++++++++++++++++++
.../test/results/clientpositive/udf_reflect2.q.out | 2 +-
6 files changed, 190 insertions(+), 8 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index a7cdddb..f5c5a10 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -26,6 +26,7 @@ import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Multimap;
+import java.util.regex.Pattern;
import org.antlr.runtime.ClassicToken;
import org.antlr.runtime.CommonToken;
import org.antlr.runtime.tree.Tree;
@@ -49,7 +50,6 @@ import org.apache.calcite.plan.RelOptCost;
import org.apache.calcite.plan.RelOptMaterialization;
import org.apache.calcite.plan.RelOptPlanner;
import org.apache.calcite.plan.RelOptRule;
-import org.apache.calcite.plan.RelOptRuleCall;
import org.apache.calcite.plan.RelOptSchema;
import org.apache.calcite.plan.RelOptUtil;
import org.apache.calcite.plan.RelTraitSet;
@@ -342,6 +342,12 @@ public class CalcitePlanner extends SemanticAnalyzer {
private static final CommonToken SUBQUERY_TOKEN =
new ImmutableCommonToken(HiveParser.TOK_SUBQUERY, "TOK_SUBQUERY");
+ private static final Pattern PATTERN_VARCHAR =
+ Pattern.compile("VARCHAR\\(2147483647\\)");
+ private static final Pattern PATTERN_TIMESTAMP =
+ Pattern.compile("TIMESTAMP\\(9\\)");
+
+
public CalcitePlanner(QueryState queryState) throws SemanticException {
super(queryState);
if (!HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_ENABLED)) {
@@ -1516,7 +1522,9 @@ public class CalcitePlanner extends SemanticAnalyzer {
.getTypeFactory());
final JdbcImplementor.Result result = jdbcImplementor.visitChild(0, optimizedOptiqPlan);
String sql = result.asStatement().toSqlString(dialect).getSql();
- return sql.replaceAll("VARCHAR\\(2147483647\\)", "STRING");
+ sql = PATTERN_VARCHAR.matcher(sql).replaceAll("STRING"); // VARCHAR(INTEGER.MAX) -> STRING
+ sql = PATTERN_TIMESTAMP.matcher(sql).replaceAll("TIMESTAMP"); // TIMESTAMP(9) -> TIMESTAMP
+ return sql;
} catch (Exception ex) {
LOG.warn("Rel2SQL Rewrite threw error", ex);
}
diff --git a/ql/src/test/queries/clientpositive/timestamp.q b/ql/src/test/queries/clientpositive/timestamp.q
index 1453cf8..f3f09b3 100644
--- a/ql/src/test/queries/clientpositive/timestamp.q
+++ b/ql/src/test/queries/clientpositive/timestamp.q
@@ -4,6 +4,8 @@ set hive.mapred.mode=nonstrict;
explain select cast('2011-01-01 01:01:01' as timestamp) as c from src union select cast('2011-01-01 01:01:01' as timestamp) as c from src limit 5;
+explain extended select cast('2011-01-01 01:01:01' as timestamp) as c from src union select cast('2011-01-01 01:01:01' as timestamp) as c from src limit 5;
+
select cast('2011-01-01 01:01:01' as timestamp) as c from src union select cast('2011-01-01 01:01:01' as timestamp) as c from src limit 5;
explain select cast('2011-01-01 01:01:01.123' as timestamp) as c from src union select cast('2011-01-01 01:01:01.123' as timestamp) as c from src limit 5;
diff --git a/ql/src/test/results/clientpositive/kafka/kafka_storage_handler.q.out b/ql/src/test/results/clientpositive/kafka/kafka_storage_handler.q.out
index e9f9648..7b71851 100644
--- a/ql/src/test/results/clientpositive/kafka/kafka_storage_handler.q.out
+++ b/ql/src/test/results/clientpositive/kafka/kafka_storage_handler.q.out
@@ -1075,9 +1075,9 @@ POSTHOOK: query: explain extended select distinct `__offset`, cast(`__timestamp`
POSTHOOK: type: QUERY
POSTHOOK: Input: default@wiki_kafka_avro_table
POSTHOOK: Output: hdfs://### HDFS PATH ###
-OPTIMIZED SQL: SELECT `__offset` AS `$f0`, CAST(`__timestamp` AS TIMESTAMP(9)) AS `$f1`, `__key` AS `$f2`
+OPTIMIZED SQL: SELECT `__offset` AS `$f0`, CAST(`__timestamp` AS TIMESTAMP) AS `$f1`, `__key` AS `$f2`
FROM `default`.`wiki_kafka_avro_table`
-GROUP BY `__offset`, CAST(`__timestamp` AS TIMESTAMP(9)), `__key`
+GROUP BY `__offset`, CAST(`__timestamp` AS TIMESTAMP), `__key`
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -1398,9 +1398,9 @@ POSTHOOK: query: explain extended select distinct `__offset`, cast(`__timestamp`
POSTHOOK: type: QUERY
POSTHOOK: Input: default@wiki_kafka_avro_table
POSTHOOK: Output: hdfs://### HDFS PATH ###
-OPTIMIZED SQL: SELECT `__offset` AS `$f0`, CAST(`__timestamp` AS TIMESTAMP(9)) AS `$f1`, `__key` AS `$f2`
+OPTIMIZED SQL: SELECT `__offset` AS `$f0`, CAST(`__timestamp` AS TIMESTAMP) AS `$f1`, `__key` AS `$f2`
FROM `default`.`wiki_kafka_avro_table`
-GROUP BY `__offset`, CAST(`__timestamp` AS TIMESTAMP(9)), `__key`
+GROUP BY `__offset`, CAST(`__timestamp` AS TIMESTAMP), `__key`
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
diff --git a/ql/src/test/results/clientpositive/llap/current_date_timestamp.q.out b/ql/src/test/results/clientpositive/llap/current_date_timestamp.q.out
index 12c0a6e..2f0e9e1 100644
--- a/ql/src/test/results/clientpositive/llap/current_date_timestamp.q.out
+++ b/ql/src/test/results/clientpositive/llap/current_date_timestamp.q.out
@@ -59,7 +59,7 @@ POSTHOOK: query: explain extended select current_timestamp() from alltypesorc
POSTHOOK: type: QUERY
POSTHOOK: Input: default@alltypesorc
#### A masked pattern was here ####
-OPTIMIZED SQL: SELECT CAST(TIMESTAMP '2012-01-01 01:02:03.000000000' AS TIMESTAMP(9)) AS `$f0`
+OPTIMIZED SQL: SELECT CAST(TIMESTAMP '2012-01-01 01:02:03.000000000' AS TIMESTAMP) AS `$f0`
FROM `default`.`alltypesorc`
STAGE DEPENDENCIES:
Stage-0 is a root stage
diff --git a/ql/src/test/results/clientpositive/timestamp.q.out b/ql/src/test/results/clientpositive/timestamp.q.out
index c59b518..aaa9998 100644
--- a/ql/src/test/results/clientpositive/timestamp.q.out
+++ b/ql/src/test/results/clientpositive/timestamp.q.out
@@ -83,6 +83,178 @@ STAGE PLANS:
Processor Tree:
ListSink
+PREHOOK: query: explain extended select cast('2011-01-01 01:01:01' as timestamp) as c from src union select cast('2011-01-01 01:01:01' as timestamp) as c from src limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: explain extended select cast('2011-01-01 01:01:01' as timestamp) as c from src union select cast('2011-01-01 01:01:01' as timestamp) as c from src limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+OPTIMIZED SQL: SELECT CAST(TIMESTAMP '2011-01-01 01:01:01.000000000' AS TIMESTAMP) AS `c`
+FROM (SELECT CAST(TIMESTAMP '2011-01-01 01:01:01.000000000' AS TIMESTAMP) AS `$f0`
+FROM `default`.`src`
+UNION ALL
+SELECT CAST(TIMESTAMP '2011-01-01 01:01:01.000000000' AS TIMESTAMP) AS `$f0`
+FROM `default`.`src`) AS `t1`
+GROUP BY TRUE
+LIMIT 5
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
+ GatherStats: false
+ Select Operator
+ Statistics: Num rows: 500 Data size: 20000 Basic stats: COMPLETE Column stats: COMPLETE
+ Union
+ Statistics: Num rows: 1000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 1000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: true (type: boolean)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: boolean)
+ null sort order: a
+ sort order: +
+ Map-reduce partition columns: _col0 (type: boolean)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ tag: -1
+ TopN: 5
+ TopN Hash Memory Usage: 0.1
+ auto parallelism: false
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
+ GatherStats: false
+ Select Operator
+ Statistics: Num rows: 500 Data size: 20000 Basic stats: COMPLETE Column stats: COMPLETE
+ Union
+ Statistics: Num rows: 1000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 1000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: true (type: boolean)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: boolean)
+ null sort order: a
+ sort order: +
+ Map-reduce partition columns: _col0 (type: boolean)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ tag: -1
+ TopN: 5
+ TopN Hash Memory Usage: 0.1
+ auto parallelism: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: src
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.src
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ serialization.ddl struct src { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.src
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ serialization.ddl struct src { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.src
+ name: default.src
+ Truncated Path -> Alias:
+ /src [$hdt$_0:$hdt$_0-subquery1:src, $hdt$_0:$hdt$_0-subquery2:src]
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: boolean)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: TIMESTAMP'2011-01-01 01:01:01' (type: timestamp)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 5
+ Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0
+ columns.types timestamp
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 5
+ Processor Tree:
+ ListSink
+
PREHOOK: query: select cast('2011-01-01 01:01:01' as timestamp) as c from src union select cast('2011-01-01 01:01:01' as timestamp) as c from src limit 5
PREHOOK: type: QUERY
PREHOOK: Input: default@src
diff --git a/ql/src/test/results/clientpositive/udf_reflect2.q.out b/ql/src/test/results/clientpositive/udf_reflect2.q.out
index 02461fd..7f03ec7 100644
--- a/ql/src/test/results/clientpositive/udf_reflect2.q.out
+++ b/ql/src/test/results/clientpositive/udf_reflect2.q.out
@@ -84,7 +84,7 @@ FROM (select cast(key as int) key, value, cast('2013-02-15 19:41:20' as timestam
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
-OPTIMIZED SQL: SELECT CAST(`key` AS INTEGER) AS `key`, REFLECT2(CAST(`key` AS INTEGER), 'byteValue') AS `_o__c1`, REFLECT2(CAST(`key` AS INTEGER), 'shortValue') AS `_o__c2`, REFLECT2(CAST(`key` AS INTEGER), 'intValue') AS `_o__c3`, REFLECT2(CAST(`key` AS INTEGER), 'longValue') AS `_o__c4`, REFLECT2(CAST(`key` AS INTEGER), 'floatValue') AS `_o__c5`, REFLECT2(CAST(`key` AS INTEGER), 'doubleValue') AS `_o__c6`, REFLECT2(CAST(`key` AS INTEGER), 'toString') AS `_o__c7`, `value`, REFLECT2(`val [...]
+OPTIMIZED SQL: SELECT CAST(`key` AS INTEGER) AS `key`, REFLECT2(CAST(`key` AS INTEGER), 'byteValue') AS `_o__c1`, REFLECT2(CAST(`key` AS INTEGER), 'shortValue') AS `_o__c2`, REFLECT2(CAST(`key` AS INTEGER), 'intValue') AS `_o__c3`, REFLECT2(CAST(`key` AS INTEGER), 'longValue') AS `_o__c4`, REFLECT2(CAST(`key` AS INTEGER), 'floatValue') AS `_o__c5`, REFLECT2(CAST(`key` AS INTEGER), 'doubleValue') AS `_o__c6`, REFLECT2(CAST(`key` AS INTEGER), 'toString') AS `_o__c7`, `value`, REFLECT2(`val [...]
FROM `default`.`src`
LIMIT 5
STAGE DEPENDENCIES: