You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2016/10/17 20:42:55 UTC
[63/67] [abbrv] hive git commit: HIVE-14959: Fix DISTINCT with
windowing when CBO is enabled/disabled (Jesus Camacho Rodriguez,
reviewed by Ashutosh Chauhan)
HIVE-14959: Fix DISTINCT with windowing when CBO is enabled/disabled (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/36e810fa
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/36e810fa
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/36e810fa
Branch: refs/heads/hive-14535
Commit: 36e810fa6fd1b353ee1d9907927aa472ae53dd48
Parents: 2cae736
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Thu Oct 13 22:04:37 2016 +0100
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Mon Oct 17 20:20:54 2016 +0100
----------------------------------------------------------------------
.../hadoop/hive/ql/parse/CalcitePlanner.java | 6 +
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 37 +-
.../clientpositive/distinct_windowing_no_cbo.q | 63 ++
ql/src/test/queries/clientpositive/windowing.q | 6 +
.../distinct_windowing_no_cbo.q.out | 796 +++++++++++++++++++
.../llap/cbo_rp_windowing_2.q.out | 5 +-
.../results/clientpositive/llap/windowing.q.out | 110 ++-
.../clientpositive/spark/windowing.q.out | 105 ++-
8 files changed, 1103 insertions(+), 25 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/36e810fa/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index e6ab947..cf66ad9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -2366,6 +2366,12 @@ public class CalcitePlanner extends SemanticAnalyzer {
}
}
+ // Select DISTINCT + windowing; GBy handled by genSelectForWindowing
+ if (selExprList.getToken().getType() == HiveParser.TOK_SELECTDI &&
+ !qb.getAllWindowingSpecs().isEmpty()) {
+ return null;
+ }
+
List<ASTNode> grpByAstExprs = getGroupByForClause(qbp, detsClauseName);
HashMap<String, ASTNode> aggregationTrees = qbp.getAggregationExprsForClause(detsClauseName);
boolean hasGrpByAstExprs = (grpByAstExprs != null && !grpByAstExprs.isEmpty()) ? true : false;
http://git-wip-us.apache.org/repos/asf/hive/blob/36e810fa/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 747f387..9d58193 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -3814,24 +3814,12 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
List<ASTNode> result = new ArrayList<ASTNode>(selectExprs == null ? 0
: selectExprs.getChildCount());
if (selectExprs != null) {
- HashMap<String, ASTNode> windowingExprs = parseInfo.getWindowingExprsForClause(dest);
-
for (int i = 0; i < selectExprs.getChildCount(); ++i) {
if (((ASTNode) selectExprs.getChild(i)).getToken().getType() == HiveParser.TOK_HINTLIST) {
continue;
}
// table.column AS alias
ASTNode grpbyExpr = (ASTNode) selectExprs.getChild(i).getChild(0);
- /*
- * If this is handled by Windowing then ignore it.
- */
- if (windowingExprs != null && windowingExprs.containsKey(grpbyExpr.toStringTree())) {
- if (!isCBOExecuted()) {
- throw new SemanticException("SELECT DISTINCT not allowed in the presence of windowing"
- + " functions when CBO is off");
- }
- continue;
- }
result.add(grpbyExpr);
}
}
@@ -9316,8 +9304,10 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
// Preserve operator before the GBY - we'll use it to resolve '*'
Operator<?> gbySource = curr;
- if (qbp.getAggregationExprsForClause(dest).size() != 0
- || getGroupByForClause(qbp, dest).size() > 0) {
+ if ((qbp.getAggregationExprsForClause(dest).size() != 0
+ || getGroupByForClause(qbp, dest).size() > 0)
+ && (qbp.getSelForClause(dest).getToken().getType() != HiveParser.TOK_SELECTDI
+ || qbp.getWindowingExprsForClause(dest) == null)) {
// multiple distincts is not supported with skew in data
if (conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW) &&
qbp.getDistinctFuncExprsForClause(dest).size() > 1) {
@@ -9401,12 +9391,29 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
curr = genHavingPlan(dest, qb, curr, aliasToOpInfo);
}
-
if(queryProperties.hasWindowing() && qb.getWindowingSpec(dest) != null) {
curr = genWindowingPlan(qb.getWindowingSpec(dest), curr);
+ // GBy for DISTINCT after windowing
+ if ((qbp.getAggregationExprsForClause(dest).size() != 0
+ || getGroupByForClause(qbp, dest).size() > 0)
+ && qbp.getSelForClause(dest).getToken().getType() == HiveParser.TOK_SELECTDI
+ && qbp.getWindowingExprsForClause(dest) != null) {
+ if (conf.getBoolVar(HiveConf.ConfVars.HIVEMAPSIDEAGGREGATE)) {
+ if (!conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW)) {
+ curr = genGroupByPlanMapAggrNoSkew(dest, qb, curr);
+ } else {
+ curr = genGroupByPlanMapAggr2MR(dest, qb, curr);
+ }
+ } else if (conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW)) {
+ curr = genGroupByPlan2MR(dest, qb, curr);
+ } else {
+ curr = genGroupByPlan1MR(dest, qb, curr);
+ }
+ }
}
curr = genSelectPlan(dest, qb, curr, gbySource);
+
Integer limit = qbp.getDestLimit(dest);
Integer offset = (qbp.getDestLimitOffset(dest) == null) ? 0 : qbp.getDestLimitOffset(dest);
http://git-wip-us.apache.org/repos/asf/hive/blob/36e810fa/ql/src/test/queries/clientpositive/distinct_windowing_no_cbo.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/distinct_windowing_no_cbo.q b/ql/src/test/queries/clientpositive/distinct_windowing_no_cbo.q
new file mode 100644
index 0000000..029974d
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/distinct_windowing_no_cbo.q
@@ -0,0 +1,63 @@
+set hive.cbo.enable=false;
+
+drop table over10k;
+
+create table over10k(
+ t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+ row format delimited
+ fields terminated by '|';
+
+load data local inpath '../../data/files/over10k' into table over10k;
+
+explain
+select distinct first_value(t) over ( partition by si order by i ) from over10k limit 10;
+
+select distinct first_value(t) over ( partition by si order by i ) from over10k limit 10;
+
+explain
+select distinct last_value(i) over ( partition by si order by i )
+from over10k limit 10;
+
+select distinct last_value(i) over ( partition by si order by i )
+from over10k limit 10;
+
+explain
+select distinct last_value(i) over ( partition by si order by i ),
+ first_value(t) over ( partition by si order by i )
+from over10k limit 50;
+
+select distinct last_value(i) over ( partition by si order by i ),
+ first_value(t) over ( partition by si order by i )
+from over10k limit 50;
+
+explain
+select si, max(f) mf, rank() over ( partition by si order by mf )
+FROM over10k
+GROUP BY si
+HAVING max(f) > 0
+limit 50;
+
+select si, max(f) mf, rank() over ( partition by si order by mf )
+FROM over10k
+GROUP BY si
+HAVING max(f) > 0
+limit 50;
+
+explain
+select distinct si, rank() over ( partition by si order by i )
+FROM over10k
+limit 50;
+
+select distinct si, rank() over ( partition by si order by i )
+FROM over10k
+limit 50;
http://git-wip-us.apache.org/repos/asf/hive/blob/36e810fa/ql/src/test/queries/clientpositive/windowing.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/windowing.q b/ql/src/test/queries/clientpositive/windowing.q
index 4a8383a..e60a6ef 100644
--- a/ql/src/test/queries/clientpositive/windowing.q
+++ b/ql/src/test/queries/clientpositive/windowing.q
@@ -362,6 +362,12 @@ window w1 as (distribute by p_mfgr sort by p_name range between 2 preceding and
w3 as (distribute by p_mfgr sort by p_name range between unbounded preceding and current row);
-- 35. testDistinctWithWindowing
+explain
+select DISTINCT p_mfgr, p_name, p_size,
+sum(p_size) over w1 as s
+from part
+window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following);
+
select DISTINCT p_mfgr, p_name, p_size,
sum(p_size) over w1 as s
from part
http://git-wip-us.apache.org/repos/asf/hive/blob/36e810fa/ql/src/test/results/clientpositive/distinct_windowing_no_cbo.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/distinct_windowing_no_cbo.q.out b/ql/src/test/results/clientpositive/distinct_windowing_no_cbo.q.out
new file mode 100644
index 0000000..0e9091e
--- /dev/null
+++ b/ql/src/test/results/clientpositive/distinct_windowing_no_cbo.q.out
@@ -0,0 +1,796 @@
+PREHOOK: query: drop table over10k
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table over10k
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table over10k(
+ t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+ row format delimited
+ fields terminated by '|'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@over10k
+POSTHOOK: query: create table over10k(
+ t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+ row format delimited
+ fields terminated by '|'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@over10k
+PREHOOK: query: load data local inpath '../../data/files/over10k' into table over10k
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@over10k
+POSTHOOK: query: load data local inpath '../../data/files/over10k' into table over10k
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@over10k
+PREHOOK: query: explain
+select distinct first_value(t) over ( partition by si order by i ) from over10k limit 10
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select distinct first_value(t) over ( partition by si order by i ) from over10k limit 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: over10k
+ Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: si (type: smallint), i (type: int)
+ sort order: ++
+ Map-reduce partition columns: si (type: smallint)
+ Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ value expressions: t (type: tinyint)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: tinyint, _col1: smallint, _col2: int
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col2 ASC NULLS FIRST
+ partition by: _col1
+ raw input shape:
+ window functions:
+ window function definition
+ alias: first_value_window_0
+ arguments: _col0
+ name: first_value
+ window function: GenericUDAFFirstValueEvaluator
+ window frame: PRECEDING(MAX)~
+ Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: first_value_window_0 (type: tinyint)
+ outputColumnNames: first_value_window_0
+ Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: first_value_window_0 (type: tinyint)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: tinyint)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 42397 Data size: 508765 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct first_value(t) over ( partition by si order by i ) from over10k limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@over10k
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct first_value(t) over ( partition by si order by i ) from over10k limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@over10k
+#### A masked pattern was here ####
+-2
+-1
+0
+1
+2
+3
+4
+6
+7
+8
+PREHOOK: query: explain
+select distinct last_value(i) over ( partition by si order by i )
+from over10k limit 10
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select distinct last_value(i) over ( partition by si order by i )
+from over10k limit 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: over10k
+ Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: si (type: smallint), i (type: int)
+ sort order: ++
+ Map-reduce partition columns: si (type: smallint)
+ Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int)
+ outputColumnNames: _col1, _col2
+ Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col1: smallint, _col2: int
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col2 ASC NULLS FIRST
+ partition by: _col1
+ raw input shape:
+ window functions:
+ window function definition
+ alias: last_value_window_0
+ arguments: _col2
+ name: last_value
+ window function: GenericUDAFLastValueEvaluator
+ window frame: PRECEDING(MAX)~
+ Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: last_value_window_0 (type: int)
+ outputColumnNames: last_value_window_0
+ Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: last_value_window_0 (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 63596 Data size: 508768 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct last_value(i) over ( partition by si order by i )
+from over10k limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@over10k
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct last_value(i) over ( partition by si order by i )
+from over10k limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@over10k
+#### A masked pattern was here ####
+65536
+65537
+65538
+65539
+65540
+65541
+65542
+65543
+65544
+65545
+PREHOOK: query: explain
+select distinct last_value(i) over ( partition by si order by i ),
+ first_value(t) over ( partition by si order by i )
+from over10k limit 50
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select distinct last_value(i) over ( partition by si order by i ),
+ first_value(t) over ( partition by si order by i )
+from over10k limit 50
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: over10k
+ Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: si (type: smallint), i (type: int)
+ sort order: ++
+ Map-reduce partition columns: si (type: smallint)
+ Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ value expressions: t (type: tinyint)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: tinyint, _col1: smallint, _col2: int
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col2 ASC NULLS FIRST
+ partition by: _col1
+ raw input shape:
+ window functions:
+ window function definition
+ alias: last_value_window_0
+ arguments: _col2
+ name: last_value
+ window function: GenericUDAFLastValueEvaluator
+ window frame: PRECEDING(MAX)~
+ window function definition
+ alias: first_value_window_1
+ arguments: _col0
+ name: first_value
+ window function: GenericUDAFFirstValueEvaluator
+ window frame: PRECEDING(MAX)~
+ Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: last_value_window_0 (type: int), first_value_window_1 (type: tinyint)
+ outputColumnNames: last_value_window_0, first_value_window_1
+ Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: last_value_window_0 (type: int), first_value_window_1 (type: tinyint)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: tinyint)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: tinyint)
+ Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int), KEY._col1 (type: tinyint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 42397 Data size: 508765 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 50
+ Statistics: Num rows: 50 Data size: 600 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 50 Data size: 600 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 50
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct last_value(i) over ( partition by si order by i ),
+ first_value(t) over ( partition by si order by i )
+from over10k limit 50
+PREHOOK: type: QUERY
+PREHOOK: Input: default@over10k
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct last_value(i) over ( partition by si order by i ),
+ first_value(t) over ( partition by si order by i )
+from over10k limit 50
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@over10k
+#### A masked pattern was here ####
+65536 -2
+65536 2
+65536 9
+65536 12
+65536 13
+65536 18
+65536 22
+65536 23
+65536 27
+65536 37
+65536 39
+65536 42
+65536 48
+65536 55
+65536 56
+65536 58
+65536 61
+65536 69
+65536 71
+65536 73
+65536 75
+65536 78
+65536 80
+65536 83
+65536 84
+65536 88
+65536 94
+65536 104
+65536 107
+65536 108
+65536 111
+65536 114
+65536 118
+65536 119
+65536 121
+65537 4
+65537 8
+65537 9
+65537 11
+65537 18
+65537 22
+65537 25
+65537 36
+65537 51
+65537 53
+65537 54
+65537 55
+65537 56
+65537 57
+65537 59
+PREHOOK: query: explain
+select si, max(f) mf, rank() over ( partition by si order by mf )
+FROM over10k
+GROUP BY si
+HAVING max(f) > 0
+limit 50
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select si, max(f) mf, rank() over ( partition by si order by mf )
+FROM over10k
+GROUP BY si
+HAVING max(f) > 0
+limit 50
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: over10k
+ Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: si (type: smallint), f (type: float)
+ outputColumnNames: si, f
+ Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: max(f)
+ keys: si (type: smallint)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: smallint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: smallint)
+ Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: float)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: max(VALUE._col0)
+ keys: KEY._col0 (type: smallint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 63596 Data size: 508768 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col1 > 0.0) (type: boolean)
+ Statistics: Num rows: 21198 Data size: 169584 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: smallint), _col1 (type: float)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: smallint)
+ Statistics: Num rows: 21198 Data size: 169584 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: float)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 21198 Data size: 169584 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: smallint, _col1: float
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col1 ASC NULLS FIRST
+ partition by: _col0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: _col1
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 21198 Data size: 169584 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: smallint), _col1 (type: float), rank_window_0 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 21198 Data size: 169584 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 50
+ Statistics: Num rows: 50 Data size: 400 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 50 Data size: 400 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 50
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select si, max(f) mf, rank() over ( partition by si order by mf )
+FROM over10k
+GROUP BY si
+HAVING max(f) > 0
+limit 50
+PREHOOK: type: QUERY
+PREHOOK: Input: default@over10k
+#### A masked pattern was here ####
+POSTHOOK: query: select si, max(f) mf, rank() over ( partition by si order by mf )
+FROM over10k
+GROUP BY si
+HAVING max(f) > 0
+limit 50
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@over10k
+#### A masked pattern was here ####
+256 94.87 1
+257 98.0 1
+258 98.19 1
+259 99.71 1
+260 99.78 1
+261 98.09 1
+262 98.41 1
+263 97.32 1
+264 97.65 1
+265 96.18 1
+266 99.41 1
+267 99.8 1
+268 93.34 1
+269 99.24 1
+270 98.57 1
+271 96.02 1
+272 92.82 1
+273 97.51 1
+274 95.43 1
+275 99.68 1
+276 98.94 1
+277 97.26 1
+278 98.56 1
+279 98.09 1
+280 99.21 1
+281 99.32 1
+282 95.49 1
+283 96.46 1
+284 99.34 1
+285 99.34 1
+286 92.77 1
+287 99.29 1
+288 96.71 1
+289 97.13 1
+290 99.88 1
+291 99.18 1
+292 94.99 1
+293 95.36 1
+294 99.34 1
+295 90.67 1
+296 96.85 1
+297 95.62 1
+298 99.98 1
+299 99.36 1
+300 98.76 1
+301 99.08 1
+302 99.84 1
+303 98.57 1
+304 94.68 1
+305 96.1 1
+PREHOOK: query: explain
+select distinct si, rank() over ( partition by si order by i )
+FROM over10k
+limit 50
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select distinct si, rank() over ( partition by si order by i )
+FROM over10k
+limit 50
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: over10k
+ Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: si (type: smallint), i (type: int)
+ sort order: ++
+ Map-reduce partition columns: si (type: smallint)
+ Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int)
+ outputColumnNames: _col1, _col2
+ Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col1: smallint, _col2: int
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col2 ASC NULLS FIRST
+ partition by: _col1
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: _col2
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: smallint), rank_window_0 (type: int)
+ outputColumnNames: _col1, rank_window_0
+ Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col1 (type: smallint), rank_window_0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: smallint), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int)
+ Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: smallint), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 63596 Data size: 508768 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 50
+ Statistics: Num rows: 50 Data size: 400 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 50 Data size: 400 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 50
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct si, rank() over ( partition by si order by i )
+FROM over10k
+limit 50
+PREHOOK: type: QUERY
+PREHOOK: Input: default@over10k
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct si, rank() over ( partition by si order by i )
+FROM over10k
+limit 50
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@over10k
+#### A masked pattern was here ####
+256 1
+256 2
+256 3
+256 4
+256 5
+256 6
+256 7
+256 8
+256 9
+256 10
+256 11
+256 13
+256 14
+256 15
+256 16
+256 17
+256 18
+256 19
+256 20
+256 21
+256 22
+256 23
+256 24
+256 25
+256 26
+256 27
+256 28
+256 29
+256 30
+256 32
+256 33
+256 34
+256 35
+256 37
+257 1
+257 2
+257 3
+257 4
+257 5
+257 6
+257 7
+257 8
+257 9
+257 10
+257 11
+257 12
+257 13
+257 14
+257 16
+257 17
http://git-wip-us.apache.org/repos/asf/hive/blob/36e810fa/ql/src/test/results/clientpositive/llap/cbo_rp_windowing_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/cbo_rp_windowing_2.q.out b/ql/src/test/results/clientpositive/llap/cbo_rp_windowing_2.q.out
index aa34d3d..3434336 100644
--- a/ql/src/test/results/clientpositive/llap/cbo_rp_windowing_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/cbo_rp_windowing_2.q.out
@@ -1887,8 +1887,9 @@ window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part
#### A masked pattern was here ####
-Manufacturer#1 almond antique burnished rose metallic 2 42
-Manufacturer#1 almond antique chartreuse lavender yellow 34 70
+Manufacturer#1 almond antique burnished rose metallic 2 38
+Manufacturer#1 almond antique burnished rose metallic 2 44
+Manufacturer#1 almond antique chartreuse lavender yellow 34 72
Manufacturer#1 almond antique salmon chartreuse burlywood 6 112
Manufacturer#1 almond aquamarine burnished black steel 28 110
Manufacturer#1 almond aquamarine pink moccasin thistle 42 76
http://git-wip-us.apache.org/repos/asf/hive/blob/36e810fa/ql/src/test/results/clientpositive/llap/windowing.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/windowing.q.out b/ql/src/test/results/clientpositive/llap/windowing.q.out
index b17bfc1..190d13b 100644
--- a/ql/src/test/results/clientpositive/llap/windowing.q.out
+++ b/ql/src/test/results/clientpositive/llap/windowing.q.out
@@ -1872,23 +1872,125 @@ Manufacturer#5 almond antique sky peru orange 2 2 39 108
Manufacturer#5 almond aquamarine dodger light gainsboro 46 46 85 77
Manufacturer#5 almond azure blanched chiffon midnight 23 23 108 71
PREHOOK: query: -- 35. testDistinctWithWindowing
+explain
select DISTINCT p_mfgr, p_name, p_size,
sum(p_size) over w1 as s
from part
window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following)
PREHOOK: type: QUERY
-PREHOOK: Input: default@part
-#### A masked pattern was here ####
POSTHOOK: query: -- 35. testDistinctWithWindowing
+explain
select DISTINCT p_mfgr, p_name, p_size,
sum(p_size) over w1 as s
from part
window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following)
POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: part
+ Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: p_mfgr (type: string), p_name (type: string)
+ sort order: ++
+ Map-reduce partition columns: p_mfgr (type: string)
+ Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: p_size (type: int)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int)
+ outputColumnNames: _col1, _col2, _col5
+ Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col1: string, _col2: string, _col5: int
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col1 ASC NULLS FIRST
+ partition by: _col2
+ raw input shape:
+ window functions:
+ window function definition
+ alias: sum_window_0
+ arguments: _col5
+ name: sum
+ window function: GenericUDAFSumLong
+ window frame: PRECEDING(2)~FOLLOWING(2)
+ Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), sum_window_0 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: bigint)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: bigint)
+ sort order: ++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: bigint)
+ Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: bigint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select DISTINCT p_mfgr, p_name, p_size,
+sum(p_size) over w1 as s
+from part
+window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select DISTINCT p_mfgr, p_name, p_size,
+sum(p_size) over w1 as s
+from part
+window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following)
+POSTHOOK: type: QUERY
POSTHOOK: Input: default@part
#### A masked pattern was here ####
-Manufacturer#1 almond antique burnished rose metallic 2 42
-Manufacturer#1 almond antique chartreuse lavender yellow 34 70
+Manufacturer#1 almond antique burnished rose metallic 2 38
+Manufacturer#1 almond antique burnished rose metallic 2 44
+Manufacturer#1 almond antique chartreuse lavender yellow 34 72
Manufacturer#1 almond antique salmon chartreuse burlywood 6 112
Manufacturer#1 almond aquamarine burnished black steel 28 110
Manufacturer#1 almond aquamarine pink moccasin thistle 42 76
http://git-wip-us.apache.org/repos/asf/hive/blob/36e810fa/ql/src/test/results/clientpositive/spark/windowing.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/windowing.q.out b/ql/src/test/results/clientpositive/spark/windowing.q.out
index b17bfc1..72b2245 100644
--- a/ql/src/test/results/clientpositive/spark/windowing.q.out
+++ b/ql/src/test/results/clientpositive/spark/windowing.q.out
@@ -1872,23 +1872,120 @@ Manufacturer#5 almond antique sky peru orange 2 2 39 108
Manufacturer#5 almond aquamarine dodger light gainsboro 46 46 85 77
Manufacturer#5 almond azure blanched chiffon midnight 23 23 108 71
PREHOOK: query: -- 35. testDistinctWithWindowing
+explain
select DISTINCT p_mfgr, p_name, p_size,
sum(p_size) over w1 as s
from part
window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following)
PREHOOK: type: QUERY
-PREHOOK: Input: default@part
-#### A masked pattern was here ####
POSTHOOK: query: -- 35. testDistinctWithWindowing
+explain
select DISTINCT p_mfgr, p_name, p_size,
sum(p_size) over w1 as s
from part
window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following)
POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4)
+ Reducer 3 <- Reducer 2 (GROUP, 4)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: part
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: p_mfgr (type: string), p_name (type: string)
+ sort order: ++
+ Map-reduce partition columns: p_mfgr (type: string)
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ value expressions: p_size (type: int)
+ Reducer 2
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int)
+ outputColumnNames: _col1, _col2, _col5
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col1: string, _col2: string, _col5: int
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col1 ASC NULLS FIRST
+ partition by: _col2
+ raw input shape:
+ window functions:
+ window function definition
+ alias: sum_window_0
+ arguments: _col5
+ name: sum
+ window function: GenericUDAFSumLong
+ window frame: PRECEDING(2)~FOLLOWING(2)
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), sum_window_0 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: bigint)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: bigint)
+ sort order: ++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: bigint)
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ Reducer 3
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: bigint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select DISTINCT p_mfgr, p_name, p_size,
+sum(p_size) over w1 as s
+from part
+window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select DISTINCT p_mfgr, p_name, p_size,
+sum(p_size) over w1 as s
+from part
+window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following)
+POSTHOOK: type: QUERY
POSTHOOK: Input: default@part
#### A masked pattern was here ####
-Manufacturer#1 almond antique burnished rose metallic 2 42
-Manufacturer#1 almond antique chartreuse lavender yellow 34 70
+Manufacturer#1 almond antique burnished rose metallic 2 38
+Manufacturer#1 almond antique burnished rose metallic 2 44
+Manufacturer#1 almond antique chartreuse lavender yellow 34 72
Manufacturer#1 almond antique salmon chartreuse burlywood 6 112
Manufacturer#1 almond aquamarine burnished black steel 28 110
Manufacturer#1 almond aquamarine pink moccasin thistle 42 76