You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2012/12/27 12:23:21 UTC
svn commit: r1426165 - in /hive/trunk:
common/src/java/org/apache/hadoop/hive/conf/ conf/
ql/src/java/org/apache/hadoop/hive/ql/
ql/src/java/org/apache/hadoop/hive/ql/parse/
ql/src/test/queries/clientnegative/ ql/src/test/queries/clientpositive/
ql/src...
Author: namit
Date: Thu Dec 27 11:23:20 2012
New Revision: 1426165
URL: http://svn.apache.org/viewvc?rev=1426165&view=rev
Log:
HIVE-581 improve group by syntax
(Zhenxiao Luo via namit)
Added:
hive/trunk/ql/src/test/queries/clientnegative/groupby_invalid_position.q
hive/trunk/ql/src/test/queries/clientnegative/orderby_invalid_position.q
hive/trunk/ql/src/test/queries/clientnegative/orderby_position_unsupported.q
hive/trunk/ql/src/test/queries/clientpositive/groupby_position.q
hive/trunk/ql/src/test/results/clientnegative/groupby_invalid_position.q.out
hive/trunk/ql/src/test/results/clientnegative/orderby_invalid_position.q.out
hive/trunk/ql/src/test/results/clientnegative/orderby_position_unsupported.q.out
hive/trunk/ql/src/test/results/clientpositive/groupby_position.q.out
Modified:
hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
hive/trunk/conf/hive-default.xml.template
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
Modified: hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1426165&r1=1426164&r2=1426165&view=diff
==============================================================================
--- hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Thu Dec 27 11:23:20 2012
@@ -406,6 +406,7 @@ public class HiveConf extends Configurat
HIVEMAPAGGRHASHMINREDUCTION("hive.map.aggr.hash.min.reduction", (float) 0.5),
HIVEMULTIGROUPBYSINGLEREDUCER("hive.multigroupby.singlereducer", true),
HIVE_MAP_GROUPBY_SORT("hive.map.groupby.sorted", false),
+ HIVE_GROUPBY_ORDERBY_POSITION_ALIAS("hive.groupby.orderby.position.alias", false),
// for hive udtf operator
HIVEUDTFAUTOPROGRESS("hive.udtf.auto.progress", false),
Modified: hive/trunk/conf/hive-default.xml.template
URL: http://svn.apache.org/viewvc/hive/trunk/conf/hive-default.xml.template?rev=1426165&r1=1426164&r2=1426165&view=diff
==============================================================================
--- hive/trunk/conf/hive-default.xml.template (original)
+++ hive/trunk/conf/hive-default.xml.template Thu Dec 27 11:23:20 2012
@@ -1626,5 +1626,11 @@
If the property is not set, then logging will be initialized using hive-exec-log4j.properties found on the classpath.
If the property is set, the value must be a valid URI (java.net.URI, e.g. "file:///tmp/my-logging.properties"), which you can then extract a URL from and pass to PropertyConfigurator.configure(URL).</description>
</property>
+
+<property>
+ <name>hive.groupby.orderby.position.alias</name>
+ <value>false</value>
+ <description>Whether to enable using Column Position Alias in Group By or Order By</description>
+</property>
</configuration>
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java?rev=1426165&r1=1426164&r2=1426165&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java Thu Dec 27 11:23:20 2012
@@ -302,6 +302,12 @@ public enum ErrorMsg {
"ALTER VIEW AS SELECT if the view has paritions\n"),
EXISTING_TABLE_IS_NOT_VIEW(10218,
"Existing table is not a view\n"),
+ NO_SUPPORTED_ORDERBY_ALLCOLREF_POS(10219,
+ "Position in ORDER BY is not supported when using SELECT *"),
+ INVALID_POSITION_ALIAS_IN_GROUPBY(10220,
+ "Invalid position alias in Group By\n"),
+ INVALID_POSITION_ALIAS_IN_ORDERBY(10221,
+ "Invalid position alias in Order By\n"),
SCRIPT_INIT_ERROR(20000, "Unable to initialize custom script."),
SCRIPT_IO_ERROR(20001, "An error occurred while reading or writing to your custom script. "
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1426165&r1=1426164&r2=1426165&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Thu Dec 27 11:23:20 2012
@@ -33,6 +33,7 @@ import java.util.TreeSet;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
+import org.antlr.runtime.tree.BaseTree;
import org.antlr.runtime.tree.Tree;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.fs.ContentSummary;
@@ -8200,6 +8201,9 @@ public class SemanticAnalyzer extends Ba
LOG.info("Starting Semantic Analysis");
+ // analyze and process the position alias
+ processPositionAlias(ast);
+
// analyze create table command
if (ast.getToken().getType() == HiveParser.TOK_CREATETABLE) {
// if it is not CTAS, we don't need to go further and just return
@@ -9134,6 +9138,98 @@ public class SemanticAnalyzer extends Ba
return conf.getIntVar(HiveConf.ConfVars.HADOOPNUMREDUCERS);
}
+ // Process the position alias in GROUPBY and ORDERBY
+ private void processPositionAlias(ASTNode ast) throws SemanticException {
+ if (HiveConf.getBoolVar(conf,
+ HiveConf.ConfVars.HIVE_GROUPBY_ORDERBY_POSITION_ALIAS) == false) {
+ return;
+ }
+
+ if (ast.getChildCount() == 0) {
+ return;
+ }
+
+ boolean isAllCol;
+ ASTNode selectNode = null;
+ ASTNode groupbyNode = null;
+ ASTNode orderbyNode = null;
+
+ // get node type
+ int child_count = ast.getChildCount();
+ for (int child_pos = 0; child_pos < child_count; ++child_pos) {
+ ASTNode node = (ASTNode) ast.getChild(child_pos);
+ int type = node.getToken().getType();
+ if (type == HiveParser.TOK_SELECT) {
+ selectNode = node;
+ } else if (type == HiveParser.TOK_GROUPBY) {
+ groupbyNode = node;
+ } else if (type == HiveParser.TOK_ORDERBY) {
+ orderbyNode = node;
+ }
+ }
+
+ if (selectNode != null) {
+ int selectExpCnt = selectNode.getChildCount();
+
+ // replace each of the position alias in GROUPBY with the actual column name
+ if (groupbyNode != null) {
+ for (int child_pos = 0; child_pos < groupbyNode.getChildCount(); ++child_pos) {
+ ASTNode node = (ASTNode) groupbyNode.getChild(child_pos);
+ if (node.getToken().getType() == HiveParser.Number) {
+ int pos = Integer.parseInt(node.getText());
+ if (pos > 0 && pos <= selectExpCnt) {
+ groupbyNode.setChild(child_pos,
+ (BaseTree) selectNode.getChild(pos - 1).getChild(0));
+ } else {
+ throw new SemanticException(
+ ErrorMsg.INVALID_POSITION_ALIAS_IN_GROUPBY.getMsg(
+ "Position alias: " + pos + " does not exist\n" +
+ "The Select List is indexed from 1 to " + selectExpCnt));
+ }
+ }
+ }
+ }
+
+ // replace each of the position alias in ORDERBY with the actual column name
+ if (orderbyNode != null) {
+ isAllCol = false;
+ for (int child_pos = 0; child_pos < selectNode.getChildCount(); ++child_pos) {
+ ASTNode node = (ASTNode) selectNode.getChild(child_pos).getChild(0);
+ if (node.getToken().getType() == HiveParser.TOK_ALLCOLREF) {
+ isAllCol = true;
+ }
+ }
+ for (int child_pos = 0; child_pos < orderbyNode.getChildCount(); ++child_pos) {
+ ASTNode colNode = (ASTNode) orderbyNode.getChild(child_pos);
+ ASTNode node = (ASTNode) colNode.getChild(0);
+ if (node.getToken().getType() == HiveParser.Number) {
+ if (!isAllCol) {
+ int pos = Integer.parseInt(node.getText());
+ if (pos > 0 && pos <= selectExpCnt) {
+ colNode.setChild(0, (BaseTree) selectNode.getChild(pos - 1).getChild(0));
+ } else {
+ throw new SemanticException(
+ ErrorMsg.INVALID_POSITION_ALIAS_IN_ORDERBY.getMsg(
+ "Position alias: " + pos + " does not exist\n" +
+ "The Select List is indexed from 1 to " + selectExpCnt));
+ }
+ } else {
+ throw new SemanticException(
+ ErrorMsg.NO_SUPPORTED_ORDERBY_ALLCOLREF_POS.getMsg());
+ }
+ }
+ }
+ }
+ }
+
+ // Recursively process through the children ASTNodes
+ for (int child_pos = 0; child_pos < child_count; ++child_pos) {
+ processPositionAlias((ASTNode) ast.getChild(child_pos));
+ }
+ return;
+ }
+
+
public QB getQB() {
return qb;
}
Added: hive/trunk/ql/src/test/queries/clientnegative/groupby_invalid_position.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/groupby_invalid_position.q?rev=1426165&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientnegative/groupby_invalid_position.q (added)
+++ hive/trunk/ql/src/test/queries/clientnegative/groupby_invalid_position.q Thu Dec 27 11:23:20 2012
@@ -0,0 +1,4 @@
+set hive.groupby.orderby.position.alias=true;
+
+-- invalid position alias in group by
+SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY 3;
Added: hive/trunk/ql/src/test/queries/clientnegative/orderby_invalid_position.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/orderby_invalid_position.q?rev=1426165&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientnegative/orderby_invalid_position.q (added)
+++ hive/trunk/ql/src/test/queries/clientnegative/orderby_invalid_position.q Thu Dec 27 11:23:20 2012
@@ -0,0 +1,4 @@
+set hive.groupby.orderby.position.alias=true;
+
+-- invalid position alias in order by
+SELECT src.key, src.value FROM src ORDER BY 0;
Added: hive/trunk/ql/src/test/queries/clientnegative/orderby_position_unsupported.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/orderby_position_unsupported.q?rev=1426165&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientnegative/orderby_position_unsupported.q (added)
+++ hive/trunk/ql/src/test/queries/clientnegative/orderby_position_unsupported.q Thu Dec 27 11:23:20 2012
@@ -0,0 +1,4 @@
+set hive.groupby.orderby.position.alias=true;
+
+-- position alias is not supported when SELECT *
+SELECT src.* FROM src ORDER BY 1;
Added: hive/trunk/ql/src/test/queries/clientpositive/groupby_position.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/groupby_position.q?rev=1426165&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/groupby_position.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/groupby_position.q Thu Dec 27 11:23:20 2012
@@ -0,0 +1,72 @@
+set hive.groupby.orderby.position.alias=true;
+
+CREATE TABLE testTable1(key INT, value STRING) STORED AS TEXTFILE;
+CREATE TABLE testTable2(key INT, val1 STRING, val2 STRING) STORED AS TEXTFILE;
+
+-- Position Alias in GROUP BY and ORDER BY
+
+EXPLAIN
+FROM SRC
+INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1
+INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1, 2;
+
+FROM SRC
+INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1
+INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1, 2;
+
+SELECT key, value FROM testTable1 ORDER BY 1, 2;
+SELECT key, val1, val2 FROM testTable2 ORDER BY 1, 2, 3;
+
+EXPLAIN
+FROM SRC
+INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1
+INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 2, 1;
+
+FROM SRC
+INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1
+INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 2, 1;
+
+SELECT key, value FROM testTable1 ORDER BY 1, 2;
+SELECT key, val1, val2 FROM testTable2 ORDER BY 1, 2, 3;
+
+-- Position Alias in subquery
+
+EXPLAIN
+SELECT t.key, t.value
+FROM (SELECT b.key as key, count(1) as value FROM src b WHERE b.key <= 20 GROUP BY 1) t
+ORDER BY 2 DESC, 1 ASC;
+
+SELECT t.key, t.value
+FROM (SELECT b.key as key, count(1) as value FROM src b WHERE b.key <= 20 GROUP BY 1) t
+ORDER BY 2 DESC, 1 ASC;
+
+EXPLAIN
+SELECT c1, c2, c3, c4
+FROM (
+ FROM
+ (
+ FROM src src1 SELECT src1.key AS c1, src1.value AS c2, COUNT(DISTINCT SUBSTR(src1.value,5)) AS c3 WHERE src1.key > 10 and src1.key < 20 GROUP BY 1, 2
+ ) a
+ JOIN
+ (
+ FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 GROUP BY 1, 2
+ ) b
+ ON (a.c1 = b.c3)
+ SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4
+) c
+ORDER BY 1 DESC, 2 DESC, 3 ASC, 4 ASC;
+
+SELECT c1, c2, c3, c4
+FROM (
+ FROM
+ (
+ FROM src src1 SELECT src1.key AS c1, src1.value AS c2, COUNT(DISTINCT SUBSTR(src1.value,5)) AS c3 WHERE src1.key > 10 and src1.key < 20 GROUP BY 1, 2
+ ) a
+ JOIN
+ (
+ FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 GROUP BY 1, 2
+ ) b
+ ON (a.c1 = b.c3)
+ SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4
+) c
+ORDER BY 1 DESC, 2 DESC, 3 ASC, 4 ASC;
Added: hive/trunk/ql/src/test/results/clientnegative/groupby_invalid_position.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientnegative/groupby_invalid_position.q.out?rev=1426165&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientnegative/groupby_invalid_position.q.out (added)
+++ hive/trunk/ql/src/test/results/clientnegative/groupby_invalid_position.q.out Thu Dec 27 11:23:20 2012
@@ -0,0 +1,3 @@
+FAILED: SemanticException [Error 10220]: Invalid position alias in Group By
+ Position alias: 3 does not exist
+The Select List is indexed from 1 to 2
Added: hive/trunk/ql/src/test/results/clientnegative/orderby_invalid_position.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientnegative/orderby_invalid_position.q.out?rev=1426165&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientnegative/orderby_invalid_position.q.out (added)
+++ hive/trunk/ql/src/test/results/clientnegative/orderby_invalid_position.q.out Thu Dec 27 11:23:20 2012
@@ -0,0 +1,3 @@
+FAILED: SemanticException [Error 10221]: Invalid position alias in Order By
+ Position alias: 0 does not exist
+The Select List is indexed from 1 to 2
Added: hive/trunk/ql/src/test/results/clientnegative/orderby_position_unsupported.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientnegative/orderby_position_unsupported.q.out?rev=1426165&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientnegative/orderby_position_unsupported.q.out (added)
+++ hive/trunk/ql/src/test/results/clientnegative/orderby_position_unsupported.q.out Thu Dec 27 11:23:20 2012
@@ -0,0 +1 @@
+FAILED: SemanticException [Error 10219]: Position in ORDER BY is not supported when using SELECT *
Added: hive/trunk/ql/src/test/results/clientpositive/groupby_position.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_position.q.out?rev=1426165&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_position.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_position.q.out Thu Dec 27 11:23:20 2012
@@ -0,0 +1,1120 @@
+PREHOOK: query: CREATE TABLE testTable1(key INT, value STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE testTable1(key INT, value STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@testTable1
+PREHOOK: query: CREATE TABLE testTable2(key INT, val1 STRING, val2 STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE testTable2(key INT, val1 STRING, val2 STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@testTable2
+PREHOOK: query: -- Position Alias in GROUP BY and ORDER BY
+
+EXPLAIN
+FROM SRC
+INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1
+INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1, 2
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Position Alias in GROUP BY and ORDER BY
+
+EXPLAIN
+FROM SRC
+INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1
+INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1, 2
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME testTable1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL SRC) key) 20)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME testTable2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) value)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL SRC) key) 20)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key) (. (TOK_TABLE_OR_COL SRC) value))))
+
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+ Stage-4 depends on stages: Stage-2
+ Stage-1 depends on stages: Stage-4
+ Stage-5 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+ src
+ TableScan
+ alias: src
+ Filter Operator
+ predicate:
+ expr: (key < 20.0)
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: key, value
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT substr(value, 5))
+ bucketGroup: false
+ keys:
+ expr: key
+ type: string
+ expr: substr(value, 5)
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ sort order: ++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col2
+ type: bigint
+ Filter Operator
+ predicate:
+ expr: (key < 20.0)
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: key, value
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT substr(value, 5))
+ bucketGroup: false
+ keys:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ expr: substr(value, 5)
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT KEY._col1:0._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: UDFToInteger(_col0)
+ type: int
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.testtable1
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.testtable1
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+
+ Stage: Stage-4
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ sort order: +++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col3
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT KEY._col2:0._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
+ Select Operator
+ expressions:
+ expr: UDFToInteger(_col0)
+ type: int
+ expr: _col1
+ type: string
+ expr: _col2
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 2
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.testtable2
+
+ Stage: Stage-1
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.testtable2
+
+ Stage: Stage-5
+ Stats-Aggr Operator
+
+
+PREHOOK: query: FROM SRC
+INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1
+INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1, 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@testtable1
+PREHOOK: Output: default@testtable2
+POSTHOOK: query: FROM SRC
+INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1
+INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1, 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@testtable1
+POSTHOOK: Output: default@testtable2
+POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: SELECT key, value FROM testTable1 ORDER BY 1, 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@testtable1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key, value FROM testTable1 ORDER BY 1, 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@testtable1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+0 1
+2 1
+4 1
+5 1
+8 1
+9 1
+10 1
+11 1
+12 1
+15 1
+17 1
+18 1
+19 1
+PREHOOK: query: SELECT key, val1, val2 FROM testTable2 ORDER BY 1, 2, 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@testtable2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key, val1, val2 FROM testTable2 ORDER BY 1, 2, 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@testtable2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+0 val_0 1
+2 val_2 1
+4 val_4 1
+5 val_5 1
+8 val_8 1
+9 val_9 1
+10 val_10 1
+11 val_11 1
+12 val_12 1
+15 val_15 1
+17 val_17 1
+18 val_18 1
+19 val_19 1
+PREHOOK: query: EXPLAIN
+FROM SRC
+INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1
+INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 2, 1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+FROM SRC
+INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1
+INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 2, 1
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME testTable1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL SRC) key) 20)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME testTable2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) value)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL SRC) key) 20)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) value) (. (TOK_TABLE_OR_COL SRC) key))))
+
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+ Stage-4 depends on stages: Stage-2
+ Stage-1 depends on stages: Stage-4
+ Stage-5 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+ src
+ TableScan
+ alias: src
+ Filter Operator
+ predicate:
+ expr: (key < 20.0)
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: key, value
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT substr(value, 5))
+ bucketGroup: false
+ keys:
+ expr: key
+ type: string
+ expr: substr(value, 5)
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ sort order: ++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col2
+ type: bigint
+ Filter Operator
+ predicate:
+ expr: (key < 20.0)
+ type: boolean
+ Select Operator
+ expressions:
+ expr: value
+ type: string
+ expr: key
+ type: string
+ outputColumnNames: value, key
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT substr(value, 5))
+ bucketGroup: false
+ keys:
+ expr: value
+ type: string
+ expr: key
+ type: string
+ expr: substr(value, 5)
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT KEY._col1:0._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: UDFToInteger(_col0)
+ type: int
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.testtable1
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.testtable1
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+
+ Stage: Stage-4
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ sort order: +++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col3
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT KEY._col2:0._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Select Operator
+ expressions:
+ expr: _col1
+ type: string
+ expr: _col0
+ type: string
+ expr: _col2
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
+ Select Operator
+ expressions:
+ expr: UDFToInteger(_col0)
+ type: int
+ expr: _col1
+ type: string
+ expr: _col2
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 2
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.testtable2
+
+ Stage: Stage-1
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.testtable2
+
+ Stage: Stage-5
+ Stats-Aggr Operator
+
+
+PREHOOK: query: FROM SRC
+INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1
+INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 2, 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@testtable1
+PREHOOK: Output: default@testtable2
+POSTHOOK: query: FROM SRC
+INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1
+INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 2, 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@testtable1
+POSTHOOK: Output: default@testtable2
+POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: SELECT key, value FROM testTable1 ORDER BY 1, 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@testtable1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key, value FROM testTable1 ORDER BY 1, 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@testtable1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+0 1
+2 1
+4 1
+5 1
+8 1
+9 1
+10 1
+11 1
+12 1
+15 1
+17 1
+18 1
+19 1
+PREHOOK: query: SELECT key, val1, val2 FROM testTable2 ORDER BY 1, 2, 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@testtable2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key, val1, val2 FROM testTable2 ORDER BY 1, 2, 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@testtable2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+0 val_0 1
+2 val_2 1
+4 val_4 1
+5 val_5 1
+8 val_8 1
+9 val_9 1
+10 val_10 1
+11 val_11 1
+12 val_12 1
+15 val_15 1
+17 val_17 1
+18 val_18 1
+19 val_19 1
+PREHOOK: query: -- Position Alias in subquery
+
+EXPLAIN
+SELECT t.key, t.value
+FROM (SELECT b.key as key, count(1) as value FROM src b WHERE b.key <= 20 GROUP BY 1) t
+ORDER BY 2 DESC, 1 ASC
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Position Alias in subquery
+
+EXPLAIN
+SELECT t.key, t.value
+FROM (SELECT b.key as key, count(1) as value FROM src b WHERE b.key <= 20 GROUP BY 1) t
+ORDER BY 2 DESC, 1 ASC
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) value)) (TOK_WHERE (<= (. (TOK_TABLE_OR_COL b) key) 20)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL b) key)))) t)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL t) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL t) value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEDESC (. (TOK_TABLE_OR_COL t) value)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL t) key)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ t:b
+ TableScan
+ alias: b
+ Filter Operator
+ predicate:
+ expr: (key <= 20.0)
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ outputColumnNames: key
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: false
+ keys:
+ expr: key
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Reduce Output Operator
+ key expressions:
+ expr: _col1
+ type: bigint
+ expr: _col0
+ type: string
+ sort order: -+
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT t.key, t.value
+FROM (SELECT b.key as key, count(1) as value FROM src b WHERE b.key <= 20 GROUP BY 1) t
+ORDER BY 2 DESC, 1 ASC
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT t.key, t.value
+FROM (SELECT b.key as key, count(1) as value FROM src b WHERE b.key <= 20 GROUP BY 1) t
+ORDER BY 2 DESC, 1 ASC
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+0 3
+5 3
+12 2
+15 2
+18 2
+10 1
+11 1
+17 1
+19 1
+2 1
+20 1
+4 1
+8 1
+9 1
+PREHOOK: query: EXPLAIN
+SELECT c1, c2, c3, c4
+FROM (
+ FROM
+ (
+ FROM src src1 SELECT src1.key AS c1, src1.value AS c2, COUNT(DISTINCT SUBSTR(src1.value,5)) AS c3 WHERE src1.key > 10 and src1.key < 20 GROUP BY 1, 2
+ ) a
+ JOIN
+ (
+ FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 GROUP BY 1, 2
+ ) b
+ ON (a.c1 = b.c3)
+ SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4
+) c
+ORDER BY 1 DESC, 2 DESC, 3 ASC, 4 ASC
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT c1, c2, c3, c4
+FROM (
+ FROM
+ (
+ FROM src src1 SELECT src1.key AS c1, src1.value AS c2, COUNT(DISTINCT SUBSTR(src1.value,5)) AS c3 WHERE src1.key > 10 and src1.key < 20 GROUP BY 1, 2
+ ) a
+ JOIN
+ (
+ FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 GROUP BY 1, 2
+ ) b
+ ON (a.c1 = b.c3)
+ SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4
+) c
+ORDER BY 1 DESC, 2 DESC, 3 ASC, 4 ASC
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) src1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) key) c1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) value) c2) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL src1) value) 5)) c3)) (TOK_WHERE (and (> (. (TOK_TABLE_OR_COL src1) key) 10) (< (. (TOK_TABLE_OR_COL src1) key) 20))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL src1) key) (. (TOK_TABLE_OR_COL src1) value)))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) src2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src2) key) c3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src2) value) c4)) (TOK_WHERE (and (> (. (TOK_TABLE_OR_COL src2) key) 15) (< (. (TOK_TABLE_OR_COL src2) key) 25))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL src2) key) (. (TOK_TABLE_OR_COL src2
) value)))) b) (= (. (TOK_TABLE_OR_COL a) c1) (. (TOK_TABLE_OR_COL b) c3)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) c1) c1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) c2) c2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) c3) c3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) c4) c4)))) c)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL c1)) (TOK_SELEXPR (TOK_TABLE_OR_COL c2)) (TOK_SELEXPR (TOK_TABLE_OR_COL c3)) (TOK_SELEXPR (TOK_TABLE_OR_COL c4))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEDESC (TOK_TABLE_OR_COL c1)) (TOK_TABSORTCOLNAMEDESC (TOK_TABLE_OR_COL c2)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL c3)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL c4)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1, Stage-4
+ Stage-3 depends on stages: Stage-2
+ Stage-4 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ c:a:src1
+ TableScan
+ alias: src1
+ Filter Operator
+ predicate:
+ expr: ((key > 10.0) and (key < 20.0))
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: key, value
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT substr(value, 5))
+ bucketGroup: false
+ keys:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ expr: substr(value, 5)
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ sort order: +++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col3
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT KEY._col2:0._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+ $INTNAME
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: 0
+ value expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ $INTNAME1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: 1
+ value expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0} {VALUE._col1}
+ 1 {VALUE._col0} {VALUE._col1}
+ handleSkewJoin: false
+ outputColumnNames: _col0, _col1, _col3, _col4
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col3
+ type: string
+ expr: _col4
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ expr: _col3
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-3
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ expr: _col3
+ type: string
+ sort order: --++
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ expr: _col3
+ type: string
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-4
+ Map Reduce
+ Alias -> Map Operator Tree:
+ c:b:src2
+ TableScan
+ alias: src2
+ Filter Operator
+ predicate:
+ expr: ((key > 15.0) and (key < 25.0))
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: key, value
+ Group By Operator
+ bucketGroup: false
+ keys:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ sort order: ++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ tag: -1
+ Reduce Operator Tree:
+ Group By Operator
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT c1, c2, c3, c4
+FROM (
+ FROM
+ (
+ FROM src src1 SELECT src1.key AS c1, src1.value AS c2, COUNT(DISTINCT SUBSTR(src1.value,5)) AS c3 WHERE src1.key > 10 and src1.key < 20 GROUP BY 1, 2
+ ) a
+ JOIN
+ (
+ FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 GROUP BY 1, 2
+ ) b
+ ON (a.c1 = b.c3)
+ SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4
+) c
+ORDER BY 1 DESC, 2 DESC, 3 ASC, 4 ASC
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT c1, c2, c3, c4
+FROM (
+ FROM
+ (
+ FROM src src1 SELECT src1.key AS c1, src1.value AS c2, COUNT(DISTINCT SUBSTR(src1.value,5)) AS c3 WHERE src1.key > 10 and src1.key < 20 GROUP BY 1, 2
+ ) a
+ JOIN
+ (
+ FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 GROUP BY 1, 2
+ ) b
+ ON (a.c1 = b.c3)
+ SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4
+) c
+ORDER BY 1 DESC, 2 DESC, 3 ASC, 4 ASC
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+19 val_19 19 val_19
+18 val_18 18 val_18
+17 val_17 17 val_17