You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2016/10/17 20:42:55 UTC

[63/67] [abbrv] hive git commit: HIVE-14959: Fix DISTINCT with windowing when CBO is enabled/disabled (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

HIVE-14959: Fix DISTINCT with windowing when CBO is enabled/disabled (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/36e810fa
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/36e810fa
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/36e810fa

Branch: refs/heads/hive-14535
Commit: 36e810fa6fd1b353ee1d9907927aa472ae53dd48
Parents: 2cae736
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Thu Oct 13 22:04:37 2016 +0100
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Mon Oct 17 20:20:54 2016 +0100

----------------------------------------------------------------------
 .../hadoop/hive/ql/parse/CalcitePlanner.java    |   6 +
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |  37 +-
 .../clientpositive/distinct_windowing_no_cbo.q  |  63 ++
 ql/src/test/queries/clientpositive/windowing.q  |   6 +
 .../distinct_windowing_no_cbo.q.out             | 796 +++++++++++++++++++
 .../llap/cbo_rp_windowing_2.q.out               |   5 +-
 .../results/clientpositive/llap/windowing.q.out | 110 ++-
 .../clientpositive/spark/windowing.q.out        | 105 ++-
 8 files changed, 1103 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/36e810fa/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index e6ab947..cf66ad9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -2366,6 +2366,12 @@ public class CalcitePlanner extends SemanticAnalyzer {
         }
       }
 
+      // Select DISTINCT + windowing; GBy handled by genSelectForWindowing
+      if (selExprList.getToken().getType() == HiveParser.TOK_SELECTDI &&
+              !qb.getAllWindowingSpecs().isEmpty()) {
+        return null;
+      }
+
       List<ASTNode> grpByAstExprs = getGroupByForClause(qbp, detsClauseName);
       HashMap<String, ASTNode> aggregationTrees = qbp.getAggregationExprsForClause(detsClauseName);
       boolean hasGrpByAstExprs = (grpByAstExprs != null && !grpByAstExprs.isEmpty()) ? true : false;

http://git-wip-us.apache.org/repos/asf/hive/blob/36e810fa/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 747f387..9d58193 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -3814,24 +3814,12 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
       List<ASTNode> result = new ArrayList<ASTNode>(selectExprs == null ? 0
           : selectExprs.getChildCount());
       if (selectExprs != null) {
-        HashMap<String, ASTNode> windowingExprs = parseInfo.getWindowingExprsForClause(dest);
-
         for (int i = 0; i < selectExprs.getChildCount(); ++i) {
           if (((ASTNode) selectExprs.getChild(i)).getToken().getType() == HiveParser.TOK_HINTLIST) {
             continue;
           }
           // table.column AS alias
           ASTNode grpbyExpr = (ASTNode) selectExprs.getChild(i).getChild(0);
-          /*
-           * If this is handled by Windowing then ignore it.
-           */
-          if (windowingExprs != null && windowingExprs.containsKey(grpbyExpr.toStringTree())) {
-            if (!isCBOExecuted()) {
-              throw new SemanticException("SELECT DISTINCT not allowed in the presence of windowing"
-                      + " functions when CBO is off");
-            }
-            continue;
-          }
           result.add(grpbyExpr);
         }
       }
@@ -9316,8 +9304,10 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
             // Preserve operator before the GBY - we'll use it to resolve '*'
             Operator<?> gbySource = curr;
 
-            if (qbp.getAggregationExprsForClause(dest).size() != 0
-                || getGroupByForClause(qbp, dest).size() > 0) {
+            if ((qbp.getAggregationExprsForClause(dest).size() != 0
+                || getGroupByForClause(qbp, dest).size() > 0)
+                    && (qbp.getSelForClause(dest).getToken().getType() != HiveParser.TOK_SELECTDI
+                      || qbp.getWindowingExprsForClause(dest) == null)) {
               // multiple distincts is not supported with skew in data
               if (conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW) &&
                   qbp.getDistinctFuncExprsForClause(dest).size() > 1) {
@@ -9401,12 +9391,29 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
       curr = genHavingPlan(dest, qb, curr, aliasToOpInfo);
     }
 
-
     if(queryProperties.hasWindowing() && qb.getWindowingSpec(dest) != null) {
       curr = genWindowingPlan(qb.getWindowingSpec(dest), curr);
+      // GBy for DISTINCT after windowing
+      if ((qbp.getAggregationExprsForClause(dest).size() != 0
+              || getGroupByForClause(qbp, dest).size() > 0)
+                  && qbp.getSelForClause(dest).getToken().getType() == HiveParser.TOK_SELECTDI
+                  && qbp.getWindowingExprsForClause(dest) != null) {
+        if (conf.getBoolVar(HiveConf.ConfVars.HIVEMAPSIDEAGGREGATE)) {
+          if (!conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW)) {
+            curr = genGroupByPlanMapAggrNoSkew(dest, qb, curr);
+          } else {
+            curr = genGroupByPlanMapAggr2MR(dest, qb, curr);
+          }
+        } else if (conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW)) {
+          curr = genGroupByPlan2MR(dest, qb, curr);
+        } else {
+          curr = genGroupByPlan1MR(dest, qb, curr);
+        }
+      }
     }
 
     curr = genSelectPlan(dest, qb, curr, gbySource);
+
     Integer limit = qbp.getDestLimit(dest);
     Integer offset = (qbp.getDestLimitOffset(dest) == null) ? 0 : qbp.getDestLimitOffset(dest);
 

http://git-wip-us.apache.org/repos/asf/hive/blob/36e810fa/ql/src/test/queries/clientpositive/distinct_windowing_no_cbo.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/distinct_windowing_no_cbo.q b/ql/src/test/queries/clientpositive/distinct_windowing_no_cbo.q
new file mode 100644
index 0000000..029974d
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/distinct_windowing_no_cbo.q
@@ -0,0 +1,63 @@
+set hive.cbo.enable=false;
+
+drop table over10k;
+
+create table over10k(
+           t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+       row format delimited
+       fields terminated by '|';
+
+load data local inpath '../../data/files/over10k' into table over10k;
+
+explain
+select distinct first_value(t) over ( partition by si order by i ) from over10k limit 10;
+
+select distinct first_value(t) over ( partition by si order by i ) from over10k limit 10;
+
+explain
+select distinct last_value(i) over ( partition by si order by i )
+from over10k limit 10;
+
+select distinct last_value(i) over ( partition by si order by i )
+from over10k limit 10;
+
+explain
+select distinct last_value(i) over ( partition by si order by i ),
+                first_value(t)  over ( partition by si order by i )
+from over10k limit 50;
+
+select distinct last_value(i) over ( partition by si order by i ),
+                first_value(t)  over ( partition by si order by i )
+from over10k limit 50;
+
+explain
+select si, max(f) mf, rank() over ( partition by si order by mf )
+FROM over10k
+GROUP BY si
+HAVING max(f) > 0
+limit 50;
+
+select si, max(f) mf, rank() over ( partition by si order by mf )
+FROM over10k
+GROUP BY si
+HAVING max(f) > 0
+limit 50;
+
+explain
+select distinct si, rank() over ( partition by si order by i )
+FROM over10k
+limit 50;
+
+select distinct si, rank() over ( partition by si order by i )
+FROM over10k
+limit 50;

http://git-wip-us.apache.org/repos/asf/hive/blob/36e810fa/ql/src/test/queries/clientpositive/windowing.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/windowing.q b/ql/src/test/queries/clientpositive/windowing.q
index 4a8383a..e60a6ef 100644
--- a/ql/src/test/queries/clientpositive/windowing.q
+++ b/ql/src/test/queries/clientpositive/windowing.q
@@ -362,6 +362,12 @@ window w1 as (distribute by p_mfgr sort by p_name range between 2 preceding and
        w3 as (distribute by p_mfgr sort by p_name range between unbounded preceding and current row); 
 
 -- 35. testDistinctWithWindowing
+explain
+select DISTINCT p_mfgr, p_name, p_size,
+sum(p_size) over w1 as s
+from part
+window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following);
+
 select DISTINCT p_mfgr, p_name, p_size,
 sum(p_size) over w1 as s
 from part

http://git-wip-us.apache.org/repos/asf/hive/blob/36e810fa/ql/src/test/results/clientpositive/distinct_windowing_no_cbo.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/distinct_windowing_no_cbo.q.out b/ql/src/test/results/clientpositive/distinct_windowing_no_cbo.q.out
new file mode 100644
index 0000000..0e9091e
--- /dev/null
+++ b/ql/src/test/results/clientpositive/distinct_windowing_no_cbo.q.out
@@ -0,0 +1,796 @@
+PREHOOK: query: drop table over10k
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table over10k
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table over10k(
+           t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+       row format delimited
+       fields terminated by '|'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@over10k
+POSTHOOK: query: create table over10k(
+           t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+       row format delimited
+       fields terminated by '|'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@over10k
+PREHOOK: query: load data local inpath '../../data/files/over10k' into table over10k
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@over10k
+POSTHOOK: query: load data local inpath '../../data/files/over10k' into table over10k
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@over10k
+PREHOOK: query: explain
+select distinct first_value(t) over ( partition by si order by i ) from over10k limit 10
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select distinct first_value(t) over ( partition by si order by i ) from over10k limit 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: over10k
+            Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+            Reduce Output Operator
+              key expressions: si (type: smallint), i (type: int)
+              sort order: ++
+              Map-reduce partition columns: si (type: smallint)
+              Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+              value expressions: t (type: tinyint)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int)
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+          PTF Operator
+            Function definitions:
+                Input definition
+                  input alias: ptf_0
+                  output shape: _col0: tinyint, _col1: smallint, _col2: int
+                  type: WINDOWING
+                Windowing table definition
+                  input alias: ptf_1
+                  name: windowingtablefunction
+                  order by: _col2 ASC NULLS FIRST
+                  partition by: _col1
+                  raw input shape:
+                  window functions:
+                      window function definition
+                        alias: first_value_window_0
+                        arguments: _col0
+                        name: first_value
+                        window function: GenericUDAFFirstValueEvaluator
+                        window frame: PRECEDING(MAX)~
+            Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: first_value_window_0 (type: tinyint)
+              outputColumnNames: first_value_window_0
+              Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: first_value_window_0 (type: tinyint)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: tinyint)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: tinyint)
+              Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+              TopN Hash Memory Usage: 0.1
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: tinyint)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 42397 Data size: 508765 Basic stats: COMPLETE Column stats: NONE
+          Limit
+            Number of rows: 10
+            Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 10
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select distinct first_value(t) over ( partition by si order by i ) from over10k limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@over10k
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct first_value(t) over ( partition by si order by i ) from over10k limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@over10k
+#### A masked pattern was here ####
+-2
+-1
+0
+1
+2
+3
+4
+6
+7
+8
+PREHOOK: query: explain
+select distinct last_value(i) over ( partition by si order by i )
+from over10k limit 10
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select distinct last_value(i) over ( partition by si order by i )
+from over10k limit 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: over10k
+            Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+            Reduce Output Operator
+              key expressions: si (type: smallint), i (type: int)
+              sort order: ++
+              Map-reduce partition columns: si (type: smallint)
+              Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int)
+          outputColumnNames: _col1, _col2
+          Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+          PTF Operator
+            Function definitions:
+                Input definition
+                  input alias: ptf_0
+                  output shape: _col1: smallint, _col2: int
+                  type: WINDOWING
+                Windowing table definition
+                  input alias: ptf_1
+                  name: windowingtablefunction
+                  order by: _col2 ASC NULLS FIRST
+                  partition by: _col1
+                  raw input shape:
+                  window functions:
+                      window function definition
+                        alias: last_value_window_0
+                        arguments: _col2
+                        name: last_value
+                        window function: GenericUDAFLastValueEvaluator
+                        window frame: PRECEDING(MAX)~
+            Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: last_value_window_0 (type: int)
+              outputColumnNames: last_value_window_0
+              Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: last_value_window_0 (type: int)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: int)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: int)
+              Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+              TopN Hash Memory Usage: 0.1
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 63596 Data size: 508768 Basic stats: COMPLETE Column stats: NONE
+          Limit
+            Number of rows: 10
+            Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 10
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select distinct last_value(i) over ( partition by si order by i )
+from over10k limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@over10k
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct last_value(i) over ( partition by si order by i )
+from over10k limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@over10k
+#### A masked pattern was here ####
+65536
+65537
+65538
+65539
+65540
+65541
+65542
+65543
+65544
+65545
+PREHOOK: query: explain
+select distinct last_value(i) over ( partition by si order by i ),
+                first_value(t)  over ( partition by si order by i )
+from over10k limit 50
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select distinct last_value(i) over ( partition by si order by i ),
+                first_value(t)  over ( partition by si order by i )
+from over10k limit 50
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: over10k
+            Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+            Reduce Output Operator
+              key expressions: si (type: smallint), i (type: int)
+              sort order: ++
+              Map-reduce partition columns: si (type: smallint)
+              Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+              value expressions: t (type: tinyint)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int)
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+          PTF Operator
+            Function definitions:
+                Input definition
+                  input alias: ptf_0
+                  output shape: _col0: tinyint, _col1: smallint, _col2: int
+                  type: WINDOWING
+                Windowing table definition
+                  input alias: ptf_1
+                  name: windowingtablefunction
+                  order by: _col2 ASC NULLS FIRST
+                  partition by: _col1
+                  raw input shape:
+                  window functions:
+                      window function definition
+                        alias: last_value_window_0
+                        arguments: _col2
+                        name: last_value
+                        window function: GenericUDAFLastValueEvaluator
+                        window frame: PRECEDING(MAX)~
+                      window function definition
+                        alias: first_value_window_1
+                        arguments: _col0
+                        name: first_value
+                        window function: GenericUDAFFirstValueEvaluator
+                        window frame: PRECEDING(MAX)~
+            Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: last_value_window_0 (type: int), first_value_window_1 (type: tinyint)
+              outputColumnNames: last_value_window_0, first_value_window_1
+              Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: last_value_window_0 (type: int), first_value_window_1 (type: tinyint)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: int), _col1 (type: tinyint)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: int), _col1 (type: tinyint)
+              Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+              TopN Hash Memory Usage: 0.1
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: int), KEY._col1 (type: tinyint)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 42397 Data size: 508765 Basic stats: COMPLETE Column stats: NONE
+          Limit
+            Number of rows: 50
+            Statistics: Num rows: 50 Data size: 600 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 50 Data size: 600 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 50
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select distinct last_value(i) over ( partition by si order by i ),
+                first_value(t)  over ( partition by si order by i )
+from over10k limit 50
+PREHOOK: type: QUERY
+PREHOOK: Input: default@over10k
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct last_value(i) over ( partition by si order by i ),
+                first_value(t)  over ( partition by si order by i )
+from over10k limit 50
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@over10k
+#### A masked pattern was here ####
+65536	-2
+65536	2
+65536	9
+65536	12
+65536	13
+65536	18
+65536	22
+65536	23
+65536	27
+65536	37
+65536	39
+65536	42
+65536	48
+65536	55
+65536	56
+65536	58
+65536	61
+65536	69
+65536	71
+65536	73
+65536	75
+65536	78
+65536	80
+65536	83
+65536	84
+65536	88
+65536	94
+65536	104
+65536	107
+65536	108
+65536	111
+65536	114
+65536	118
+65536	119
+65536	121
+65537	4
+65537	8
+65537	9
+65537	11
+65537	18
+65537	22
+65537	25
+65537	36
+65537	51
+65537	53
+65537	54
+65537	55
+65537	56
+65537	57
+65537	59
+PREHOOK: query: explain
+select si, max(f) mf, rank() over ( partition by si order by mf )
+FROM over10k
+GROUP BY si
+HAVING max(f) > 0
+limit 50
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select si, max(f) mf, rank() over ( partition by si order by mf )
+FROM over10k
+GROUP BY si
+HAVING max(f) > 0
+limit 50
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: over10k
+            Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: si (type: smallint), f (type: float)
+              outputColumnNames: si, f
+              Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: max(f)
+                keys: si (type: smallint)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: smallint)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: smallint)
+                  Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: float)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: max(VALUE._col0)
+          keys: KEY._col0 (type: smallint)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 63596 Data size: 508768 Basic stats: COMPLETE Column stats: NONE
+          Filter Operator
+            predicate: (_col1 > 0.0) (type: boolean)
+            Statistics: Num rows: 21198 Data size: 169584 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: smallint), _col1 (type: float)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: smallint)
+              Statistics: Num rows: 21198 Data size: 169584 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: float)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 21198 Data size: 169584 Basic stats: COMPLETE Column stats: NONE
+          PTF Operator
+            Function definitions:
+                Input definition
+                  input alias: ptf_0
+                  output shape: _col0: smallint, _col1: float
+                  type: WINDOWING
+                Windowing table definition
+                  input alias: ptf_1
+                  name: windowingtablefunction
+                  order by: _col1 ASC NULLS FIRST
+                  partition by: _col0
+                  raw input shape:
+                  window functions:
+                      window function definition
+                        alias: rank_window_0
+                        arguments: _col1
+                        name: rank
+                        window function: GenericUDAFRankEvaluator
+                        window frame: PRECEDING(MAX)~FOLLOWING(MAX)
+                        isPivotResult: true
+            Statistics: Num rows: 21198 Data size: 169584 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: _col0 (type: smallint), _col1 (type: float), rank_window_0 (type: int)
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 21198 Data size: 169584 Basic stats: COMPLETE Column stats: NONE
+              Limit
+                Number of rows: 50
+                Statistics: Num rows: 50 Data size: 400 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 50 Data size: 400 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 50
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select si, max(f) mf, rank() over ( partition by si order by mf )
+FROM over10k
+GROUP BY si
+HAVING max(f) > 0
+limit 50
+PREHOOK: type: QUERY
+PREHOOK: Input: default@over10k
+#### A masked pattern was here ####
+POSTHOOK: query: select si, max(f) mf, rank() over ( partition by si order by mf )
+FROM over10k
+GROUP BY si
+HAVING max(f) > 0
+limit 50
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@over10k
+#### A masked pattern was here ####
+256	94.87	1
+257	98.0	1
+258	98.19	1
+259	99.71	1
+260	99.78	1
+261	98.09	1
+262	98.41	1
+263	97.32	1
+264	97.65	1
+265	96.18	1
+266	99.41	1
+267	99.8	1
+268	93.34	1
+269	99.24	1
+270	98.57	1
+271	96.02	1
+272	92.82	1
+273	97.51	1
+274	95.43	1
+275	99.68	1
+276	98.94	1
+277	97.26	1
+278	98.56	1
+279	98.09	1
+280	99.21	1
+281	99.32	1
+282	95.49	1
+283	96.46	1
+284	99.34	1
+285	99.34	1
+286	92.77	1
+287	99.29	1
+288	96.71	1
+289	97.13	1
+290	99.88	1
+291	99.18	1
+292	94.99	1
+293	95.36	1
+294	99.34	1
+295	90.67	1
+296	96.85	1
+297	95.62	1
+298	99.98	1
+299	99.36	1
+300	98.76	1
+301	99.08	1
+302	99.84	1
+303	98.57	1
+304	94.68	1
+305	96.1	1
+PREHOOK: query: explain
+select distinct si, rank() over ( partition by si order by i )
+FROM over10k
+limit 50
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select distinct si, rank() over ( partition by si order by i )
+FROM over10k
+limit 50
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: over10k
+            Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+            Reduce Output Operator
+              key expressions: si (type: smallint), i (type: int)
+              sort order: ++
+              Map-reduce partition columns: si (type: smallint)
+              Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int)
+          outputColumnNames: _col1, _col2
+          Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+          PTF Operator
+            Function definitions:
+                Input definition
+                  input alias: ptf_0
+                  output shape: _col1: smallint, _col2: int
+                  type: WINDOWING
+                Windowing table definition
+                  input alias: ptf_1
+                  name: windowingtablefunction
+                  order by: _col2 ASC NULLS FIRST
+                  partition by: _col1
+                  raw input shape:
+                  window functions:
+                      window function definition
+                        alias: rank_window_0
+                        arguments: _col2
+                        name: rank
+                        window function: GenericUDAFRankEvaluator
+                        window frame: PRECEDING(MAX)~FOLLOWING(MAX)
+                        isPivotResult: true
+            Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: _col1 (type: smallint), rank_window_0 (type: int)
+              outputColumnNames: _col1, rank_window_0
+              Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: _col1 (type: smallint), rank_window_0 (type: int)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: smallint), _col1 (type: int)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int)
+              Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+              TopN Hash Memory Usage: 0.1
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: smallint), KEY._col1 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 63596 Data size: 508768 Basic stats: COMPLETE Column stats: NONE
+          Limit
+            Number of rows: 50
+            Statistics: Num rows: 50 Data size: 400 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 50 Data size: 400 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 50
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select distinct si, rank() over ( partition by si order by i )
+FROM over10k
+limit 50
+PREHOOK: type: QUERY
+PREHOOK: Input: default@over10k
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct si, rank() over ( partition by si order by i )
+FROM over10k
+limit 50
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@over10k
+#### A masked pattern was here ####
+256	1
+256	2
+256	3
+256	4
+256	5
+256	6
+256	7
+256	8
+256	9
+256	10
+256	11
+256	13
+256	14
+256	15
+256	16
+256	17
+256	18
+256	19
+256	20
+256	21
+256	22
+256	23
+256	24
+256	25
+256	26
+256	27
+256	28
+256	29
+256	30
+256	32
+256	33
+256	34
+256	35
+256	37
+257	1
+257	2
+257	3
+257	4
+257	5
+257	6
+257	7
+257	8
+257	9
+257	10
+257	11
+257	12
+257	13
+257	14
+257	16
+257	17

http://git-wip-us.apache.org/repos/asf/hive/blob/36e810fa/ql/src/test/results/clientpositive/llap/cbo_rp_windowing_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/cbo_rp_windowing_2.q.out b/ql/src/test/results/clientpositive/llap/cbo_rp_windowing_2.q.out
index aa34d3d..3434336 100644
--- a/ql/src/test/results/clientpositive/llap/cbo_rp_windowing_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/cbo_rp_windowing_2.q.out
@@ -1887,8 +1887,9 @@ window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@part
 #### A masked pattern was here ####
-Manufacturer#1	almond antique burnished rose metallic	2	42
-Manufacturer#1	almond antique chartreuse lavender yellow	34	70
+Manufacturer#1	almond antique burnished rose metallic	2	38
+Manufacturer#1	almond antique burnished rose metallic	2	44
+Manufacturer#1	almond antique chartreuse lavender yellow	34	72
 Manufacturer#1	almond antique salmon chartreuse burlywood	6	112
 Manufacturer#1	almond aquamarine burnished black steel	28	110
 Manufacturer#1	almond aquamarine pink moccasin thistle	42	76

http://git-wip-us.apache.org/repos/asf/hive/blob/36e810fa/ql/src/test/results/clientpositive/llap/windowing.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/windowing.q.out b/ql/src/test/results/clientpositive/llap/windowing.q.out
index b17bfc1..190d13b 100644
--- a/ql/src/test/results/clientpositive/llap/windowing.q.out
+++ b/ql/src/test/results/clientpositive/llap/windowing.q.out
@@ -1872,23 +1872,125 @@ Manufacturer#5	almond antique sky peru orange	2	2	39	108
 Manufacturer#5	almond aquamarine dodger light gainsboro	46	46	85	77
 Manufacturer#5	almond azure blanched chiffon midnight	23	23	108	71
 PREHOOK: query: -- 35. testDistinctWithWindowing
+explain
 select DISTINCT p_mfgr, p_name, p_size,
 sum(p_size) over w1 as s
 from part
 window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following)
 PREHOOK: type: QUERY
-PREHOOK: Input: default@part
-#### A masked pattern was here ####
 POSTHOOK: query: -- 35. testDistinctWithWindowing
+explain
 select DISTINCT p_mfgr, p_name, p_size,
 sum(p_size) over w1 as s
 from part
 window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following)
 POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: part
+                  Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: p_mfgr (type: string), p_name (type: string)
+                    sort order: ++
+                    Map-reduce partition columns: p_mfgr (type: string)
+                    Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
+                    value expressions: p_size (type: int)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int)
+                outputColumnNames: _col1, _col2, _col5
+                Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
+                PTF Operator
+                  Function definitions:
+                      Input definition
+                        input alias: ptf_0
+                        output shape: _col1: string, _col2: string, _col5: int
+                        type: WINDOWING
+                      Windowing table definition
+                        input alias: ptf_1
+                        name: windowingtablefunction
+                        order by: _col1 ASC NULLS FIRST
+                        partition by: _col2
+                        raw input shape:
+                        window functions:
+                            window function definition
+                              alias: sum_window_0
+                              arguments: _col5
+                              name: sum
+                              window function: GenericUDAFSumLong
+                              window frame: PRECEDING(2)~FOLLOWING(2)
+                  Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), sum_window_0 (type: bigint)
+                    outputColumnNames: _col0, _col1, _col2, _col3
+                    Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      keys: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: bigint)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: bigint)
+                        sort order: ++++
+                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: bigint)
+                        Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: bigint)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select DISTINCT p_mfgr, p_name, p_size,
+sum(p_size) over w1 as s
+from part
+window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select DISTINCT p_mfgr, p_name, p_size,
+sum(p_size) over w1 as s
+from part
+window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following)
+POSTHOOK: type: QUERY
 POSTHOOK: Input: default@part
 #### A masked pattern was here ####
-Manufacturer#1	almond antique burnished rose metallic	2	42
-Manufacturer#1	almond antique chartreuse lavender yellow	34	70
+Manufacturer#1	almond antique burnished rose metallic	2	38
+Manufacturer#1	almond antique burnished rose metallic	2	44
+Manufacturer#1	almond antique chartreuse lavender yellow	34	72
 Manufacturer#1	almond antique salmon chartreuse burlywood	6	112
 Manufacturer#1	almond aquamarine burnished black steel	28	110
 Manufacturer#1	almond aquamarine pink moccasin thistle	42	76

http://git-wip-us.apache.org/repos/asf/hive/blob/36e810fa/ql/src/test/results/clientpositive/spark/windowing.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/windowing.q.out b/ql/src/test/results/clientpositive/spark/windowing.q.out
index b17bfc1..72b2245 100644
--- a/ql/src/test/results/clientpositive/spark/windowing.q.out
+++ b/ql/src/test/results/clientpositive/spark/windowing.q.out
@@ -1872,23 +1872,120 @@ Manufacturer#5	almond antique sky peru orange	2	2	39	108
 Manufacturer#5	almond aquamarine dodger light gainsboro	46	46	85	77
 Manufacturer#5	almond azure blanched chiffon midnight	23	23	108	71
 PREHOOK: query: -- 35. testDistinctWithWindowing
+explain
 select DISTINCT p_mfgr, p_name, p_size,
 sum(p_size) over w1 as s
 from part
 window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following)
 PREHOOK: type: QUERY
-PREHOOK: Input: default@part
-#### A masked pattern was here ####
 POSTHOOK: query: -- 35. testDistinctWithWindowing
+explain
 select DISTINCT p_mfgr, p_name, p_size,
 sum(p_size) over w1 as s
 from part
 window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following)
 POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4)
+        Reducer 3 <- Reducer 2 (GROUP, 4)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: part
+                  Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: p_mfgr (type: string), p_name (type: string)
+                    sort order: ++
+                    Map-reduce partition columns: p_mfgr (type: string)
+                    Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: p_size (type: int)
+        Reducer 2 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int)
+                outputColumnNames: _col1, _col2, _col5
+                Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                PTF Operator
+                  Function definitions:
+                      Input definition
+                        input alias: ptf_0
+                        output shape: _col1: string, _col2: string, _col5: int
+                        type: WINDOWING
+                      Windowing table definition
+                        input alias: ptf_1
+                        name: windowingtablefunction
+                        order by: _col1 ASC NULLS FIRST
+                        partition by: _col2
+                        raw input shape:
+                        window functions:
+                            window function definition
+                              alias: sum_window_0
+                              arguments: _col5
+                              name: sum
+                              window function: GenericUDAFSumLong
+                              window frame: PRECEDING(2)~FOLLOWING(2)
+                  Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), sum_window_0 (type: bigint)
+                    outputColumnNames: _col0, _col1, _col2, _col3
+                    Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: bigint)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: bigint)
+                        sort order: ++++
+                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: bigint)
+                        Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+        Reducer 3 
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: bigint)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select DISTINCT p_mfgr, p_name, p_size,
+sum(p_size) over w1 as s
+from part
+window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select DISTINCT p_mfgr, p_name, p_size,
+sum(p_size) over w1 as s
+from part
+window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following)
+POSTHOOK: type: QUERY
 POSTHOOK: Input: default@part
 #### A masked pattern was here ####
-Manufacturer#1	almond antique burnished rose metallic	2	42
-Manufacturer#1	almond antique chartreuse lavender yellow	34	70
+Manufacturer#1	almond antique burnished rose metallic	2	38
+Manufacturer#1	almond antique burnished rose metallic	2	44
+Manufacturer#1	almond antique chartreuse lavender yellow	34	72
 Manufacturer#1	almond antique salmon chartreuse burlywood	6	112
 Manufacturer#1	almond aquamarine burnished black steel	28	110
 Manufacturer#1	almond aquamarine pink moccasin thistle	42	76