You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2019/04/10 23:37:33 UTC

[hive] branch master updated: HIVE-21592: OptimizedSql is not shown when the expression contains CONCAT (Jesus Camacho Rodriguez, reviewed by Gopal V)

This is an automated email from the ASF dual-hosted git repository.

jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 6e7e299  HIVE-21592: OptimizedSql is not shown when the expression contains CONCAT (Jesus Camacho Rodriguez, reviewed by Gopal V)
6e7e299 is described below

commit 6e7e2992e091a15ade49b5e2d8547029f93f5ff3
Author: Jesus Camacho Rodriguez <jc...@apache.org>
AuthorDate: Mon Apr 8 15:46:00 2019 -0700

    HIVE-21592: OptimizedSql is not shown when the expression contains CONCAT (Jesus Camacho Rodriguez, reviewed by Gopal V)
---
 .../optimizer/calcite/reloperators/HiveConcat.java | 32 +++++++++++++++++++++-
 ql/src/test/queries/clientpositive/concat_op.q     |  5 ++++
 ql/src/test/results/clientpositive/concat_op.q.out | 27 ++++++++++++++++++
 .../results/clientpositive/groupby_map_ppr.q.out   |  4 +++
 .../groupby_map_ppr_multi_distinct.q.out           |  4 +++
 .../test/results/clientpositive/groupby_ppr.q.out  |  4 +++
 .../groupby_ppr_multi_distinct.q.out               |  4 +++
 .../clientpositive/spark/groupby_map_ppr.q.out     |  4 +++
 .../spark/groupby_map_ppr_multi_distinct.q.out     |  4 +++
 .../results/clientpositive/spark/groupby_ppr.q.out |  4 +++
 .../spark/groupby_ppr_multi_distinct.q.out         |  4 +++
 11 files changed, 95 insertions(+), 1 deletion(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveConcat.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveConcat.java
index 36c34cc..1d20a6a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveConcat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveConcat.java
@@ -18,8 +18,12 @@
 
 package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators;
 
+import java.util.List;
+import org.apache.calcite.sql.SqlCall;
 import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.SqlNode;
 import org.apache.calcite.sql.SqlSpecialOperator;
+import org.apache.calcite.sql.SqlWriter;
 import org.apache.calcite.sql.type.InferTypes;
 import org.apache.calcite.sql.type.ReturnTypes;
 
@@ -31,5 +35,31 @@ public class HiveConcat extends SqlSpecialOperator {
         InferTypes.RETURN_TYPE, null
     );
   }
-}
 
+  @Override
+  public void unparse(
+      SqlWriter writer,
+      SqlCall call,
+      int leftPrec,
+      int rightPrec) {
+    List<SqlNode> opList = call.getOperandList();
+    assert (opList.size() >= 1);
+    final SqlWriter.Frame frame =
+        writer.startList(SqlWriter.FrameTypeEnum.SIMPLE);
+    SqlNode sqlNode = opList.get(0);
+    sqlNode.unparse(writer, leftPrec, getLeftPrec());
+    for (SqlNode op : opList.subList(1, opList.size() - 1)) {
+      writer.setNeedWhitespace(true);
+      writer.sep("||");
+      writer.setNeedWhitespace(true);
+      op.unparse(writer, 0, 0);
+    }
+    sqlNode = opList.get(opList.size() - 1);
+    writer.setNeedWhitespace(true);
+    writer.sep("||");
+    writer.setNeedWhitespace(true);
+    sqlNode.unparse(writer, getRightPrec(), rightPrec);
+    writer.endList(frame);
+  }
+
+}
diff --git a/ql/src/test/queries/clientpositive/concat_op.q b/ql/src/test/queries/clientpositive/concat_op.q
index 9b0487b..7246e16 100644
--- a/ql/src/test/queries/clientpositive/concat_op.q
+++ b/ql/src/test/queries/clientpositive/concat_op.q
@@ -44,3 +44,8 @@ explain select 'a' || 'b' || 'c';
 -- true and (false or false) and (true or true) => false	should not happen
 select true and false or false and true or true;
 
+explain formatted select key || value from src;
+
+explain formatted select key || value || key from src;
+
+explain formatted select key || value || key || value from src;
diff --git a/ql/src/test/results/clientpositive/concat_op.q.out b/ql/src/test/results/clientpositive/concat_op.q.out
index 7bbb041..9c4fca2 100644
--- a/ql/src/test/results/clientpositive/concat_op.q.out
+++ b/ql/src/test/results/clientpositive/concat_op.q.out
@@ -303,3 +303,30 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: _dummy_database@_dummy_table
 #### A masked pattern was here ####
 true
+PREHOOK: query: explain formatted select key || value from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: explain formatted select key || value from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+{"optimizedSQL":"SELECT `key` || `value` AS `_o__c0`\nFROM `default`.`src`","cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-1":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"src","columns:":["key","value"],"database:":"default","Statistics:":"Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE","table:":"src","isTempTable:":"false","OperatorId:":" [...]
+PREHOOK: query: explain formatted select key || value || key from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: explain formatted select key || value || key from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+{"optimizedSQL":"SELECT `key` || `value` || `key` AS `_o__c0`\nFROM `default`.`src`","cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-1":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"src","columns:":["key","value"],"database:":"default","Statistics:":"Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE","table:":"src","isTempTable:":"false","Opera [...]
+PREHOOK: query: explain formatted select key || value || key || value from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: explain formatted select key || value || key || value from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+{"optimizedSQL":"SELECT `key` || `value` || `key` || `value` AS `_o__c0`\nFROM `default`.`src`","cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-1":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"src","columns:":["key","value"],"database:":"default","Statistics:":"Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE","table:":"src","isTempTable:":"fa [...]
diff --git a/ql/src/test/results/clientpositive/groupby_map_ppr.q.out b/ql/src/test/results/clientpositive/groupby_map_ppr.q.out
index b247113..66988ee 100644
--- a/ql/src/test/results/clientpositive/groupby_map_ppr.q.out
+++ b/ql/src/test/results/clientpositive/groupby_map_ppr.q.out
@@ -28,6 +28,10 @@ POSTHOOK: Input: default@srcpart
 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
 POSTHOOK: Output: default@dest1_n144
+OPTIMIZED SQL: SELECT SUBSTR(`key`, 1, 1) AS `_o__c0`, COUNT(DISTINCT SUBSTR(`value`, 5)) AS `_o__c1`, SUBSTR(`key`, 1, 1) || SUM(SUBSTR(`value`, 5)) AS `_o__c2`
+FROM `default`.`srcpart`
+WHERE `ds` = '2008-04-08'
+GROUP BY SUBSTR(`key`, 1, 1)
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
diff --git a/ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out
index b357d0e..9f2a587 100644
--- a/ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out
+++ b/ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out
@@ -28,6 +28,10 @@ POSTHOOK: Input: default@srcpart
 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
 POSTHOOK: Output: default@dest1_n174
+OPTIMIZED SQL: SELECT SUBSTR(`key`, 1, 1) AS `_o__c0`, COUNT(DISTINCT SUBSTR(`value`, 5)) AS `_o__c1`, SUBSTR(`key`, 1, 1) || SUM(SUBSTR(`value`, 5)) AS `_o__c2`, SUM(DISTINCT SUBSTR(`value`, 5)) AS `_o__c3`, COUNT(DISTINCT `value`) AS `_o__c4`
+FROM `default`.`srcpart`
+WHERE `ds` = '2008-04-08'
+GROUP BY SUBSTR(`key`, 1, 1)
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
diff --git a/ql/src/test/results/clientpositive/groupby_ppr.q.out b/ql/src/test/results/clientpositive/groupby_ppr.q.out
index e9c20af..d84c649 100644
--- a/ql/src/test/results/clientpositive/groupby_ppr.q.out
+++ b/ql/src/test/results/clientpositive/groupby_ppr.q.out
@@ -28,6 +28,10 @@ POSTHOOK: Input: default@srcpart
 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
 POSTHOOK: Output: default@dest1_n79
+OPTIMIZED SQL: SELECT SUBSTR(`key`, 1, 1) AS `_o__c0`, COUNT(DISTINCT SUBSTR(`value`, 5)) AS `_o__c1`, SUBSTR(`key`, 1, 1) || SUM(SUBSTR(`value`, 5)) AS `_o__c2`
+FROM `default`.`srcpart`
+WHERE `ds` = '2008-04-08'
+GROUP BY SUBSTR(`key`, 1, 1)
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
diff --git a/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out
index 2885282..db358d9 100644
--- a/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out
+++ b/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out
@@ -28,6 +28,10 @@ POSTHOOK: Input: default@srcpart
 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
 POSTHOOK: Output: default@dest1
+OPTIMIZED SQL: SELECT SUBSTR(`key`, 1, 1) AS `_o__c0`, COUNT(DISTINCT SUBSTR(`value`, 5)) AS `_o__c1`, SUBSTR(`key`, 1, 1) || SUM(SUBSTR(`value`, 5)) AS `_o__c2`, SUM(DISTINCT SUBSTR(`value`, 5)) AS `_o__c3`, COUNT(DISTINCT `value`) AS `_o__c4`
+FROM `default`.`srcpart`
+WHERE `ds` = '2008-04-08'
+GROUP BY SUBSTR(`key`, 1, 1)
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
diff --git a/ql/src/test/results/clientpositive/spark/groupby_map_ppr.q.out b/ql/src/test/results/clientpositive/spark/groupby_map_ppr.q.out
index 5bc1854..a360c42 100644
--- a/ql/src/test/results/clientpositive/spark/groupby_map_ppr.q.out
+++ b/ql/src/test/results/clientpositive/spark/groupby_map_ppr.q.out
@@ -28,6 +28,10 @@ POSTHOOK: Input: default@srcpart
 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
 POSTHOOK: Output: default@dest1_n144
+OPTIMIZED SQL: SELECT SUBSTR(`key`, 1, 1) AS `_o__c0`, COUNT(DISTINCT SUBSTR(`value`, 5)) AS `_o__c1`, SUBSTR(`key`, 1, 1) || SUM(SUBSTR(`value`, 5)) AS `_o__c2`
+FROM `default`.`srcpart`
+WHERE `ds` = '2008-04-08'
+GROUP BY SUBSTR(`key`, 1, 1)
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
diff --git a/ql/src/test/results/clientpositive/spark/groupby_map_ppr_multi_distinct.q.out b/ql/src/test/results/clientpositive/spark/groupby_map_ppr_multi_distinct.q.out
index 2e96ff4..558e57b 100644
--- a/ql/src/test/results/clientpositive/spark/groupby_map_ppr_multi_distinct.q.out
+++ b/ql/src/test/results/clientpositive/spark/groupby_map_ppr_multi_distinct.q.out
@@ -28,6 +28,10 @@ POSTHOOK: Input: default@srcpart
 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
 POSTHOOK: Output: default@dest1_n174
+OPTIMIZED SQL: SELECT SUBSTR(`key`, 1, 1) AS `_o__c0`, COUNT(DISTINCT SUBSTR(`value`, 5)) AS `_o__c1`, SUBSTR(`key`, 1, 1) || SUM(SUBSTR(`value`, 5)) AS `_o__c2`, SUM(DISTINCT SUBSTR(`value`, 5)) AS `_o__c3`, COUNT(DISTINCT `value`) AS `_o__c4`
+FROM `default`.`srcpart`
+WHERE `ds` = '2008-04-08'
+GROUP BY SUBSTR(`key`, 1, 1)
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
diff --git a/ql/src/test/results/clientpositive/spark/groupby_ppr.q.out b/ql/src/test/results/clientpositive/spark/groupby_ppr.q.out
index f90890c..5fb1666 100644
--- a/ql/src/test/results/clientpositive/spark/groupby_ppr.q.out
+++ b/ql/src/test/results/clientpositive/spark/groupby_ppr.q.out
@@ -28,6 +28,10 @@ POSTHOOK: Input: default@srcpart
 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
 POSTHOOK: Output: default@dest1_n79
+OPTIMIZED SQL: SELECT SUBSTR(`key`, 1, 1) AS `_o__c0`, COUNT(DISTINCT SUBSTR(`value`, 5)) AS `_o__c1`, SUBSTR(`key`, 1, 1) || SUM(SUBSTR(`value`, 5)) AS `_o__c2`
+FROM `default`.`srcpart`
+WHERE `ds` = '2008-04-08'
+GROUP BY SUBSTR(`key`, 1, 1)
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
diff --git a/ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out b/ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out
index afdb17a..338f753 100644
--- a/ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out
+++ b/ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out
@@ -28,6 +28,10 @@ POSTHOOK: Input: default@srcpart
 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
 POSTHOOK: Output: default@dest1
+OPTIMIZED SQL: SELECT SUBSTR(`key`, 1, 1) AS `_o__c0`, COUNT(DISTINCT SUBSTR(`value`, 5)) AS `_o__c1`, SUBSTR(`key`, 1, 1) || SUM(SUBSTR(`value`, 5)) AS `_o__c2`, SUM(DISTINCT SUBSTR(`value`, 5)) AS `_o__c3`, COUNT(DISTINCT `value`) AS `_o__c4`
+FROM `default`.`srcpart`
+WHERE `ds` = '2008-04-08'
+GROUP BY SUBSTR(`key`, 1, 1)
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1