You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by re...@apache.org on 2017/04/30 16:19:11 UTC
hive git commit: HIVE-16527 Support outer and mixed reference
aggregates in windowed functions (Remus Rusanu, reviewed by Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master 66ff59012 -> dac3786d8
HIVE-16527 Support outer and mixed reference aggregates in windowed functions (Remus Rusanu, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/dac3786d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/dac3786d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/dac3786d
Branch: refs/heads/master
Commit: dac3786d86462e4d08d62d23115e6b7a3e534f5d
Parents: 66ff590
Author: Remus Rusanu <re...@apache.org>
Authored: Sun Apr 30 09:19:01 2017 -0700
Committer: Remus Rusanu <re...@apache.org>
Committed: Sun Apr 30 09:19:01 2017 -0700
----------------------------------------------------------------------
data/files/e011_01.txt | 4 +
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 18 +-
.../clientpositive/outer_reference_windowed.q | 80 ++
.../outer_reference_windowed.q.out | 847 +++++++++++++++++++
4 files changed, 942 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/dac3786d/data/files/e011_01.txt
----------------------------------------------------------------------
diff --git a/data/files/e011_01.txt b/data/files/e011_01.txt
new file mode 100644
index 0000000..92df12a
--- /dev/null
+++ b/data/files/e011_01.txt
@@ -0,0 +1,4 @@
+12
+34
+56
+78
http://git-wip-us.apache.org/repos/asf/hive/blob/dac3786d/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index d39b8bd..654f3b1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -553,7 +553,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
function.getType() == HiveParser.TOK_SUBQUERY_EXPR) {
function = (ASTNode)function.getChild(0);
}
- doPhase1GetAllAggregations(function, aggregationTrees, wdwFns);
+ doPhase1GetAllAggregations(function, aggregationTrees, wdwFns, null);
}
// window based aggregations are handled differently
@@ -604,7 +604,8 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
* @throws SemanticException
*/
private void doPhase1GetAllAggregations(ASTNode expressionTree,
- HashMap<String, ASTNode> aggregations, List<ASTNode> wdwFns) throws SemanticException {
+ HashMap<String, ASTNode> aggregations, List<ASTNode> wdwFns,
+ ASTNode wndParent) throws SemanticException {
int exprTokenType = expressionTree.getToken().getType();
if(exprTokenType == HiveParser.TOK_SUBQUERY_EXPR) {
//since now we have scalar subqueries we can get subquery expression in having
@@ -612,6 +613,8 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
return;
}
+ boolean parentIsWindowSpec = wndParent != null;
+
if (exprTokenType == HiveParser.TOK_FUNCTION
|| exprTokenType == HiveParser.TOK_FUNCTIONDI
|| exprTokenType == HiveParser.TOK_FUNCTIONSTAR) {
@@ -622,8 +625,9 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
// Further, we will examine its children AST nodes to check whether
// there are aggregation functions within
wdwFns.add(expressionTree);
- doPhase1GetAllAggregations((ASTNode) expressionTree.getChild(expressionTree.getChildCount()-1),
- aggregations, wdwFns);
+ for(Node child : expressionTree.getChildren()) {
+ doPhase1GetAllAggregations((ASTNode) child, aggregations, wdwFns, expressionTree);
+ }
return;
}
if (expressionTree.getChild(0).getType() == HiveParser.Identifier) {
@@ -633,11 +637,11 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
if (FunctionRegistry.getFunctionInfo(functionName) == null) {
throw new SemanticException(ErrorMsg.INVALID_FUNCTION.getMsg(functionName));
}
- if(FunctionRegistry.impliesOrder(functionName)) {
+ if(FunctionRegistry.impliesOrder(functionName) && !parentIsWindowSpec) {
throw new SemanticException(ErrorMsg.MISSING_OVER_CLAUSE.getMsg(functionName));
}
if (FunctionRegistry.getGenericUDAFResolver(functionName) != null) {
- if(containsLeadLagUDF(expressionTree)) {
+ if(containsLeadLagUDF(expressionTree) && !parentIsWindowSpec) {
throw new SemanticException(ErrorMsg.MISSING_OVER_CLAUSE.getMsg(functionName));
}
aggregations.put(expressionTree.toStringTree(), expressionTree);
@@ -652,7 +656,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
}
for (int i = 0; i < expressionTree.getChildCount(); i++) {
doPhase1GetAllAggregations((ASTNode) expressionTree.getChild(i),
- aggregations, wdwFns);
+ aggregations, wdwFns, wndParent);
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/dac3786d/ql/src/test/queries/clientpositive/outer_reference_windowed.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/outer_reference_windowed.q b/ql/src/test/queries/clientpositive/outer_reference_windowed.q
new file mode 100644
index 0000000..cac6b41
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/outer_reference_windowed.q
@@ -0,0 +1,80 @@
+DROP TABLE IF EXISTS e011_01;
+DROP TABLE IF EXISTS e011_02;
+DROP TABLE IF EXISTS e011_03;
+
+CREATE TABLE e011_01 (
+ c1 decimal(15,2),
+ c2 decimal(15,2))
+ STORED AS TEXTFILE;
+
+CREATE TABLE e011_02 (
+ c1 decimal(15,2),
+ c2 decimal(15,2));
+
+CREATE TABLE e011_03 (
+ c1 decimal(15,2),
+ c2 decimal(15,2));
+
+LOAD DATA
+ LOCAL INPATH '../../data/files/e011_01.txt'
+ OVERWRITE
+ INTO TABLE e011_01;
+
+INSERT INTO TABLE e011_02
+ SELECT c1, c2
+ FROM e011_01;
+
+INSERT INTO TABLE e011_03
+ SELECT c1, c2
+ FROM e011_01;
+
+ANALYZE TABLE e011_01 COMPUTE STATISTICS FOR COLUMNS;
+ANALYZE TABLE e011_02 COMPUTE STATISTICS FOR COLUMNS;
+ANALYZE TABLE e011_03 COMPUTE STATISTICS FOR COLUMNS;
+
+set hive.explain.user=false;
+
+explain select sum(sum(c1)) over() from e011_01;
+select sum(sum(c1)) over() from e011_01;
+
+explain select sum(sum(c1)) over(
+ partition by c2 order by c1)
+ from e011_01
+ group by e011_01.c1, e011_01.c2;
+select sum(sum(c1)) over(
+ partition by c2 order by c1)
+ from e011_01
+ group by e011_01.c1, e011_01.c2;
+
+explain select sum(sum(e011_01.c1)) over(
+ partition by e011_01.c2 order by e011_01.c1)
+ from e011_01
+ join e011_03 on e011_01.c1 = e011_03.c1
+ group by e011_01.c1, e011_01.c2;
+select sum(sum(e011_01.c1)) over(
+ partition by e011_01.c2 order by e011_01.c1)
+ from e011_01
+ join e011_03 on e011_01.c1 = e011_03.c1
+ group by e011_01.c1, e011_01.c2;
+
+explain select sum(sum(e011_01.c1)) over(
+ partition by e011_03.c2 order by e011_03.c1)
+ from e011_01
+ join e011_03 on e011_01.c1 = e011_03.c1
+ group by e011_03.c1, e011_03.c2;
+select sum(sum(e011_01.c1)) over(
+ partition by e011_03.c2 order by e011_03.c1)
+ from e011_01
+ join e011_03 on e011_01.c1 = e011_03.c1
+ group by e011_03.c1, e011_03.c2;
+
+explain select sum(corr(e011_01.c1, e011_03.c1))
+ over(partition by e011_01.c2 order by e011_03.c2)
+ from e011_01
+ join e011_03 on e011_01.c1 = e011_03.c1
+ group by e011_03.c2, e011_01.c2;
+select sum(corr(e011_01.c1, e011_03.c1))
+ over(partition by e011_01.c2 order by e011_03.c2)
+ from e011_01
+ join e011_03 on e011_01.c1 = e011_03.c1
+ group by e011_03.c2, e011_01.c2;
http://git-wip-us.apache.org/repos/asf/hive/blob/dac3786d/ql/src/test/results/clientpositive/outer_reference_windowed.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/outer_reference_windowed.q.out b/ql/src/test/results/clientpositive/outer_reference_windowed.q.out
new file mode 100644
index 0000000..1df6091
--- /dev/null
+++ b/ql/src/test/results/clientpositive/outer_reference_windowed.q.out
@@ -0,0 +1,847 @@
+PREHOOK: query: DROP TABLE IF EXISTS e011_01
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS e011_01
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: DROP TABLE IF EXISTS e011_02
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS e011_02
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: DROP TABLE IF EXISTS e011_03
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS e011_03
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE e011_01 (
+ c1 decimal(15,2),
+ c2 decimal(15,2))
+ STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@e011_01
+POSTHOOK: query: CREATE TABLE e011_01 (
+ c1 decimal(15,2),
+ c2 decimal(15,2))
+ STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@e011_01
+PREHOOK: query: CREATE TABLE e011_02 (
+ c1 decimal(15,2),
+ c2 decimal(15,2))
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@e011_02
+POSTHOOK: query: CREATE TABLE e011_02 (
+ c1 decimal(15,2),
+ c2 decimal(15,2))
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@e011_02
+PREHOOK: query: CREATE TABLE e011_03 (
+ c1 decimal(15,2),
+ c2 decimal(15,2))
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@e011_03
+POSTHOOK: query: CREATE TABLE e011_03 (
+ c1 decimal(15,2),
+ c2 decimal(15,2))
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@e011_03
+PREHOOK: query: LOAD DATA
+ LOCAL INPATH '../../data/files/e011_01.txt'
+ OVERWRITE
+ INTO TABLE e011_01
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@e011_01
+POSTHOOK: query: LOAD DATA
+ LOCAL INPATH '../../data/files/e011_01.txt'
+ OVERWRITE
+ INTO TABLE e011_01
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@e011_01
+PREHOOK: query: INSERT INTO TABLE e011_02
+ SELECT c1, c2
+ FROM e011_01
+PREHOOK: type: QUERY
+PREHOOK: Input: default@e011_01
+PREHOOK: Output: default@e011_02
+POSTHOOK: query: INSERT INTO TABLE e011_02
+ SELECT c1, c2
+ FROM e011_01
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@e011_01
+POSTHOOK: Output: default@e011_02
+POSTHOOK: Lineage: e011_02.c1 SIMPLE [(e011_01)e011_01.FieldSchema(name:c1, type:decimal(15,2), comment:null), ]
+POSTHOOK: Lineage: e011_02.c2 SIMPLE [(e011_01)e011_01.FieldSchema(name:c2, type:decimal(15,2), comment:null), ]
+PREHOOK: query: INSERT INTO TABLE e011_03
+ SELECT c1, c2
+ FROM e011_01
+PREHOOK: type: QUERY
+PREHOOK: Input: default@e011_01
+PREHOOK: Output: default@e011_03
+POSTHOOK: query: INSERT INTO TABLE e011_03
+ SELECT c1, c2
+ FROM e011_01
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@e011_01
+POSTHOOK: Output: default@e011_03
+POSTHOOK: Lineage: e011_03.c1 SIMPLE [(e011_01)e011_01.FieldSchema(name:c1, type:decimal(15,2), comment:null), ]
+POSTHOOK: Lineage: e011_03.c2 SIMPLE [(e011_01)e011_01.FieldSchema(name:c2, type:decimal(15,2), comment:null), ]
+PREHOOK: query: ANALYZE TABLE e011_01 COMPUTE STATISTICS FOR COLUMNS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@e011_01
+#### A masked pattern was here ####
+POSTHOOK: query: ANALYZE TABLE e011_01 COMPUTE STATISTICS FOR COLUMNS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@e011_01
+#### A masked pattern was here ####
+PREHOOK: query: ANALYZE TABLE e011_02 COMPUTE STATISTICS FOR COLUMNS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@e011_02
+#### A masked pattern was here ####
+POSTHOOK: query: ANALYZE TABLE e011_02 COMPUTE STATISTICS FOR COLUMNS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@e011_02
+#### A masked pattern was here ####
+PREHOOK: query: ANALYZE TABLE e011_03 COMPUTE STATISTICS FOR COLUMNS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@e011_03
+#### A masked pattern was here ####
+POSTHOOK: query: ANALYZE TABLE e011_03 COMPUTE STATISTICS FOR COLUMNS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@e011_03
+#### A masked pattern was here ####
+PREHOOK: query: explain select sum(sum(c1)) over() from e011_01
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select sum(sum(c1)) over() from e011_01
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: e011_01
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c1 (type: decimal(15,2))
+ outputColumnNames: c1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(c1)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: decimal(25,2))
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: 0 (type: int)
+ sort order: +
+ Map-reduce partition columns: 0 (type: int)
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: decimal(25,2))
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: decimal(25,2))
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: decimal(25,2)
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: 0 ASC NULLS FIRST
+ partition by: 0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: sum_window_0
+ arguments: _col0
+ name: sum
+ window function: GenericUDAFSumHiveDecimal
+ window frame: PRECEDING(MAX)~FOLLOWING(MAX)
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: sum_window_0 (type: decimal(35,2))
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select sum(sum(c1)) over() from e011_01
+PREHOOK: type: QUERY
+PREHOOK: Input: default@e011_01
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(sum(c1)) over() from e011_01
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@e011_01
+#### A masked pattern was here ####
+16.00
+PREHOOK: query: explain select sum(sum(c1)) over(
+ partition by c2 order by c1)
+ from e011_01
+ group by e011_01.c1, e011_01.c2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select sum(sum(c1)) over(
+ partition by c2 order by c1)
+ from e011_01
+ group by e011_01.c1, e011_01.c2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: e011_01
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c1 (type: decimal(15,2)), c2 (type: decimal(15,2))
+ outputColumnNames: c1, c2
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(c1)
+ keys: c1 (type: decimal(15,2)), c2 (type: decimal(15,2))
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2))
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2))
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(25,2))
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: decimal(15,2)), KEY._col1 (type: decimal(15,2))
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col1 (type: decimal(15,2)), _col0 (type: decimal(15,2))
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: decimal(15,2))
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(25,2))
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey1 (type: decimal(15,2)), KEY.reducesinkkey0 (type: decimal(15,2)), VALUE._col0 (type: decimal(25,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: decimal(15,2), _col1: decimal(15,2), _col2: decimal(25,2)
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col0 ASC NULLS FIRST
+ partition by: _col1
+ raw input shape:
+ window functions:
+ window function definition
+ alias: sum_window_0
+ arguments: _col2
+ name: sum
+ window function: GenericUDAFSumHiveDecimal
+ window frame: PRECEDING(MAX)~CURRENT
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: sum_window_0 (type: decimal(35,2))
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select sum(sum(c1)) over(
+ partition by c2 order by c1)
+ from e011_01
+ group by e011_01.c1, e011_01.c2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@e011_01
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(sum(c1)) over(
+ partition by c2 order by c1)
+ from e011_01
+ group by e011_01.c1, e011_01.c2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@e011_01
+#### A masked pattern was here ####
+1.00
+3.00
+5.00
+7.00
+PREHOOK: query: explain select sum(sum(e011_01.c1)) over(
+ partition by e011_01.c2 order by e011_01.c1)
+ from e011_01
+ join e011_03 on e011_01.c1 = e011_03.c1
+ group by e011_01.c1, e011_01.c2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select sum(sum(e011_01.c1)) over(
+ partition by e011_01.c2 order by e011_01.c1)
+ from e011_01
+ join e011_03 on e011_01.c1 = e011_03.c1
+ group by e011_01.c1, e011_01.c2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-3 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: e011_01
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: c1 is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c1 (type: decimal(15,2)), c2 (type: decimal(15,2))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: decimal(15,2))
+ sort order: +
+ Map-reduce partition columns: _col0 (type: decimal(15,2))
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(15,2))
+ TableScan
+ alias: e011_03
+ Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: c1 is not null (type: boolean)
+ Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c1 (type: decimal(15,2))
+ outputColumnNames: _col0
+ Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: decimal(15,2))
+ sort order: +
+ Map-reduce partition columns: _col0 (type: decimal(15,2))
+ Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: decimal(15,2))
+ 1 _col0 (type: decimal(15,2))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 4 Data size: 39 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col0)
+ keys: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2))
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 4 Data size: 39 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2))
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2))
+ Statistics: Num rows: 4 Data size: 39 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(25,2))
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: decimal(15,2)), KEY._col1 (type: decimal(15,2))
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col1 (type: decimal(15,2)), _col0 (type: decimal(15,2))
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: decimal(15,2))
+ Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(25,2))
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey1 (type: decimal(15,2)), KEY.reducesinkkey0 (type: decimal(15,2)), VALUE._col0 (type: decimal(25,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: decimal(15,2), _col1: decimal(15,2), _col2: decimal(25,2)
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col0 ASC NULLS FIRST
+ partition by: _col1
+ raw input shape:
+ window functions:
+ window function definition
+ alias: sum_window_0
+ arguments: _col2
+ name: sum
+ window function: GenericUDAFSumHiveDecimal
+ window frame: PRECEDING(MAX)~CURRENT
+ Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: sum_window_0 (type: decimal(35,2))
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select sum(sum(e011_01.c1)) over(
+ partition by e011_01.c2 order by e011_01.c1)
+ from e011_01
+ join e011_03 on e011_01.c1 = e011_03.c1
+ group by e011_01.c1, e011_01.c2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@e011_01
+PREHOOK: Input: default@e011_03
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(sum(e011_01.c1)) over(
+ partition by e011_01.c2 order by e011_01.c1)
+ from e011_01
+ join e011_03 on e011_01.c1 = e011_03.c1
+ group by e011_01.c1, e011_01.c2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@e011_01
+POSTHOOK: Input: default@e011_03
+#### A masked pattern was here ####
+1.00
+3.00
+5.00
+7.00
+PREHOOK: query: explain select sum(sum(e011_01.c1)) over(
+ partition by e011_03.c2 order by e011_03.c1)
+ from e011_01
+ join e011_03 on e011_01.c1 = e011_03.c1
+ group by e011_03.c1, e011_03.c2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select sum(sum(e011_01.c1)) over(
+ partition by e011_03.c2 order by e011_03.c1)
+ from e011_01
+ join e011_03 on e011_01.c1 = e011_03.c1
+ group by e011_03.c1, e011_03.c2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-3 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: e011_01
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: c1 is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c1 (type: decimal(15,2))
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: decimal(15,2))
+ sort order: +
+ Map-reduce partition columns: _col0 (type: decimal(15,2))
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ alias: e011_03
+ Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: c1 is not null (type: boolean)
+ Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c1 (type: decimal(15,2)), c2 (type: decimal(15,2))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: decimal(15,2))
+ sort order: +
+ Map-reduce partition columns: _col0 (type: decimal(15,2))
+ Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(15,2))
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: decimal(15,2))
+ 1 _col0 (type: decimal(15,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 4 Data size: 39 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col0)
+ keys: _col1 (type: decimal(15,2)), _col2 (type: decimal(15,2))
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 4 Data size: 39 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2))
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2))
+ Statistics: Num rows: 4 Data size: 39 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(25,2))
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: decimal(15,2)), KEY._col1 (type: decimal(15,2))
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col1 (type: decimal(15,2)), _col0 (type: decimal(15,2))
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: decimal(15,2))
+ Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(25,2))
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey1 (type: decimal(15,2)), KEY.reducesinkkey0 (type: decimal(15,2)), VALUE._col0 (type: decimal(25,2))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: decimal(15,2), _col1: decimal(15,2), _col2: decimal(25,2)
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col0 ASC NULLS FIRST
+ partition by: _col1
+ raw input shape:
+ window functions:
+ window function definition
+ alias: sum_window_0
+ arguments: _col2
+ name: sum
+ window function: GenericUDAFSumHiveDecimal
+ window frame: PRECEDING(MAX)~CURRENT
+ Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: sum_window_0 (type: decimal(35,2))
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select sum(sum(e011_01.c1)) over(
+ partition by e011_03.c2 order by e011_03.c1)
+ from e011_01
+ join e011_03 on e011_01.c1 = e011_03.c1
+ group by e011_03.c1, e011_03.c2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@e011_01
+PREHOOK: Input: default@e011_03
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(sum(e011_01.c1)) over(
+ partition by e011_03.c2 order by e011_03.c1)
+ from e011_01
+ join e011_03 on e011_01.c1 = e011_03.c1
+ group by e011_03.c1, e011_03.c2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@e011_01
+POSTHOOK: Input: default@e011_03
+#### A masked pattern was here ####
+1.00
+3.00
+5.00
+7.00
+PREHOOK: query: explain select sum(corr(e011_01.c1, e011_03.c1))
+ over(partition by e011_01.c2 order by e011_03.c2)
+ from e011_01
+ join e011_03 on e011_01.c1 = e011_03.c1
+ group by e011_03.c2, e011_01.c2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select sum(corr(e011_01.c1, e011_03.c1))
+ over(partition by e011_01.c2 order by e011_03.c2)
+ from e011_01
+ join e011_03 on e011_01.c1 = e011_03.c1
+ group by e011_03.c2, e011_01.c2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: e011_01
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: c1 is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c1 (type: decimal(15,2)), c2 (type: decimal(15,2))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: decimal(15,2))
+ sort order: +
+ Map-reduce partition columns: _col0 (type: decimal(15,2))
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(15,2))
+ TableScan
+ alias: e011_03
+ Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: c1 is not null (type: boolean)
+ Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c1 (type: decimal(15,2)), c2 (type: decimal(15,2))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: decimal(15,2))
+ sort order: +
+ Map-reduce partition columns: _col0 (type: decimal(15,2))
+ Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: decimal(15,2))
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: decimal(15,2))
+ 1 _col0 (type: decimal(15,2))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 4 Data size: 39 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: corr(_col0, _col2)
+ keys: _col1 (type: decimal(15,2)), _col3 (type: decimal(15,2))
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 4 Data size: 39 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2))
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: decimal(15,2))
+ Statistics: Num rows: 4 Data size: 39 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: struct<count:bigint,xavg:double,yavg:double,xvar:double,yvar:double,covar:double>)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: corr(VALUE._col0)
+ keys: KEY._col0 (type: decimal(15,2)), KEY._col1 (type: decimal(15,2))
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: decimal(15,2)), _col0 (type: decimal(15,2)), _col2 (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: decimal(15,2), _col1: decimal(15,2), _col2: double
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col0 ASC NULLS FIRST
+ partition by: _col1
+ raw input shape:
+ window functions:
+ window function definition
+ alias: sum_window_0
+ arguments: _col2
+ name: sum
+ window function: GenericUDAFSumDouble
+ window frame: PRECEDING(MAX)~CURRENT
+ Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: sum_window_0 (type: double)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select sum(corr(e011_01.c1, e011_03.c1))
+ over(partition by e011_01.c2 order by e011_03.c2)
+ from e011_01
+ join e011_03 on e011_01.c1 = e011_03.c1
+ group by e011_03.c2, e011_01.c2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@e011_01
+PREHOOK: Input: default@e011_03
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(corr(e011_01.c1, e011_03.c1))
+ over(partition by e011_01.c2 order by e011_03.c2)
+ from e011_01
+ join e011_03 on e011_01.c1 = e011_03.c1
+ group by e011_03.c2, e011_01.c2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@e011_01
+POSTHOOK: Input: default@e011_03
+#### A masked pattern was here ####
+NULL
+NULL
+NULL
+NULL