You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2016/04/05 20:26:15 UTC
[2/2] hive git commit: HIVE-13242: DISTINCT keyword is dropped by the
parser for windowing (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
HIVE-13242: DISTINCT keyword is dropped by the parser for windowing (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f819fa8e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f819fa8e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f819fa8e
Branch: refs/heads/branch-2.0
Commit: f819fa8e62cb6ee47133ed827f619eba6ba3a408
Parents: 59491df
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Thu Mar 17 10:32:08 2016 +0100
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Tue Apr 5 11:25:58 2016 -0700
----------------------------------------------------------------------
.../ql/optimizer/calcite/HiveRelFactories.java | 5 -
.../calcite/reloperators/HiveAggregate.java | 9 +-
.../hadoop/hive/ql/parse/CalcitePlanner.java | 31 +-
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 9 +-
.../distinct_windowing_failure1.q | 20 +
.../distinct_windowing_failure2.q | 22 +
.../queries/clientpositive/distinct_windowing.q | 39 ++
.../distinct_windowing_failure1.q.out | 47 ++
.../distinct_windowing_failure2.q.out | 47 ++
.../clientpositive/distinct_windowing.q.out | 451 +++++++++++++++++++
10 files changed, 655 insertions(+), 25 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/f819fa8e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java
index eeec44e..d62b16b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java
@@ -24,7 +24,6 @@ import java.util.Set;
import org.apache.calcite.plan.Contexts;
import org.apache.calcite.plan.RelOptCluster;
import org.apache.calcite.plan.RelTraitSet;
-import org.apache.calcite.rel.InvalidRelException;
import org.apache.calcite.rel.RelCollation;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.AggregateCall;
@@ -184,12 +183,8 @@ public class HiveRelFactories {
public RelNode createAggregate(RelNode child, boolean indicator,
ImmutableBitSet groupSet, ImmutableList<ImmutableBitSet> groupSets,
List<AggregateCall> aggCalls) {
- try {
return new HiveAggregate(child.getCluster(), child.getTraitSet(), child, indicator,
groupSet, groupSets, aggCalls);
- } catch (InvalidRelException e) {
- throw new RuntimeException(e);
- }
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/f819fa8e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java
index aae57e6..2e79fa8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java
@@ -25,7 +25,6 @@ import org.apache.calcite.plan.RelOptCluster;
import org.apache.calcite.plan.RelOptCost;
import org.apache.calcite.plan.RelOptPlanner;
import org.apache.calcite.plan.RelTraitSet;
-import org.apache.calcite.rel.InvalidRelException;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.Aggregate;
import org.apache.calcite.rel.core.AggregateCall;
@@ -44,7 +43,7 @@ public class HiveAggregate extends Aggregate implements HiveRelNode {
public HiveAggregate(RelOptCluster cluster, RelTraitSet traitSet, RelNode child,
boolean indicator, ImmutableBitSet groupSet, List<ImmutableBitSet> groupSets,
- List<AggregateCall> aggCalls) throws InvalidRelException {
+ List<AggregateCall> aggCalls) {
super(cluster, TraitsUtil.getDefaultTraitSet(cluster), child, indicator, groupSet,
groupSets, aggCalls);
}
@@ -53,14 +52,8 @@ public class HiveAggregate extends Aggregate implements HiveRelNode {
public Aggregate copy(RelTraitSet traitSet, RelNode input,
boolean indicator, ImmutableBitSet groupSet,
List<ImmutableBitSet> groupSets, List<AggregateCall> aggCalls) {
- try {
return new HiveAggregate(getCluster(), traitSet, input, indicator, groupSet,
groupSets, aggCalls);
- } catch (InvalidRelException e) {
- // Semantic error not possible. Must be a bug. Convert to
- // internal error.
- throw new AssertionError(e);
- }
}
@Override
http://git-wip-us.apache.org/repos/asf/hive/blob/f819fa8e/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 411c6d1..13953da 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -49,7 +49,6 @@ import org.apache.calcite.plan.hep.HepMatchOrder;
import org.apache.calcite.plan.hep.HepPlanner;
import org.apache.calcite.plan.hep.HepProgram;
import org.apache.calcite.plan.hep.HepProgramBuilder;
-import org.apache.calcite.rel.InvalidRelException;
import org.apache.calcite.rel.RelCollation;
import org.apache.calcite.rel.RelCollationImpl;
import org.apache.calcite.rel.RelCollations;
@@ -1998,14 +1997,9 @@ public class CalcitePlanner extends SemanticAnalyzer {
}
RelNode gbInputRel = HiveProject.create(srcRel, gbChildProjLst, null);
- HiveRelNode aggregateRel = null;
- try {
- aggregateRel = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION),
+ HiveRelNode aggregateRel = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION),
gbInputRel, (transformedGroupSets!=null ? true:false), groupSet,
transformedGroupSets, aggregateCalls);
- } catch (InvalidRelException e) {
- throw new SemanticException(e);
- }
return aggregateRel;
}
@@ -2182,7 +2176,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
}
}
- List<ASTNode> grpByAstExprs = SemanticAnalyzer.getGroupByForClause(qbp, detsClauseName);
+ List<ASTNode> grpByAstExprs = getGroupByForClause(qbp, detsClauseName);
HashMap<String, ASTNode> aggregationTrees = qbp.getAggregationExprsForClause(detsClauseName);
boolean hasGrpByAstExprs = (grpByAstExprs != null && !grpByAstExprs.isEmpty()) ? true : false;
boolean hasAggregationTrees = (aggregationTrees != null && !aggregationTrees.isEmpty()) ? true
@@ -2944,9 +2938,26 @@ public class CalcitePlanner extends SemanticAnalyzer {
}
// 8. Build Calcite Rel
- RelNode selRel = genSelectRelNode(calciteColLst, out_rwsch, srcRel);
+ RelNode outputRel = genSelectRelNode(calciteColLst, out_rwsch, srcRel);
- return selRel;
+ // 9. Handle select distinct as GBY if there exist windowing functions
+ if (selForWindow != null && selExprList.getToken().getType() == HiveParser.TOK_SELECTDI) {
+ ImmutableBitSet groupSet = ImmutableBitSet.range(outputRel.getRowType().getFieldList().size());
+ outputRel = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION),
+ outputRel, false, groupSet, null, new ArrayList<AggregateCall>());
+ RowResolver groupByOutputRowResolver = new RowResolver();
+ for (int i = 0; i < out_rwsch.getColumnInfos().size(); i++) {
+ ColumnInfo colInfo = out_rwsch.getColumnInfos().get(i);
+ ColumnInfo newColInfo = new ColumnInfo(colInfo.getInternalName(),
+ colInfo.getType(), colInfo.getTabAlias(), colInfo.getIsVirtualCol());
+ groupByOutputRowResolver.put(colInfo.getTabAlias(), colInfo.getAlias(), newColInfo);
+ }
+ relToHiveColNameCalcitePosMap.put(outputRel,
+ buildHiveToCalciteColumnMap(groupByOutputRowResolver, outputRel));
+ this.relToHiveRR.put(outputRel, groupByOutputRowResolver);
+ }
+
+ return outputRel;
}
private RelNode genLogicalPlan(QBExpr qbexpr) throws SemanticException {
http://git-wip-us.apache.org/repos/asf/hive/blob/f819fa8e/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 86d0bee..5cddae6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -3481,7 +3481,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
* automatically translates SELECT DISTINCT a,b,c to SELECT a,b,c GROUP BY
* a,b,c.
*/
- static List<ASTNode> getGroupByForClause(QBParseInfo parseInfo, String dest) {
+ List<ASTNode> getGroupByForClause(QBParseInfo parseInfo, String dest) throws SemanticException {
if (parseInfo.getSelForClause(dest).getToken().getType() == HiveParser.TOK_SELECTDI) {
ASTNode selectExprs = parseInfo.getSelForClause(dest);
List<ASTNode> result = new ArrayList<ASTNode>(selectExprs == null ? 0
@@ -3499,6 +3499,10 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
* If this is handled by Windowing then ignore it.
*/
if (windowingExprs != null && windowingExprs.containsKey(grpbyExpr.toStringTree())) {
+ if (!isCBOExecuted()) {
+ throw new SemanticException("SELECT DISTINCT not allowed in the presence of windowing"
+ + " functions when CBO is off");
+ }
continue;
}
result.add(grpbyExpr);
@@ -3886,6 +3890,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
output = genUDTFPlan(genericUDTF, udtfTableAlias, udtfColAliases, qb,
output, outerLV);
}
+
if (LOG.isDebugEnabled()) {
LOG.debug("Created Select Plan row schema: " + out_rwsch.toString());
}
@@ -5577,7 +5582,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
return groupByOperatorInfo2;
}
- private boolean optimizeMapAggrGroupBy(String dest, QB qb) {
+ private boolean optimizeMapAggrGroupBy(String dest, QB qb) throws SemanticException {
List<ASTNode> grpByExprs = getGroupByForClause(qb.getParseInfo(), dest);
if ((grpByExprs != null) && !grpByExprs.isEmpty()) {
return false;
http://git-wip-us.apache.org/repos/asf/hive/blob/f819fa8e/ql/src/test/queries/clientnegative/distinct_windowing_failure1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/distinct_windowing_failure1.q b/ql/src/test/queries/clientnegative/distinct_windowing_failure1.q
new file mode 100644
index 0000000..39fe474
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/distinct_windowing_failure1.q
@@ -0,0 +1,20 @@
+drop table over10k;
+
+create table over10k(
+ t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+ row format delimited
+ fields terminated by '|';
+
+load data local inpath '../../data/files/over10k' into table over10k;
+
+select count(distinct last_value(i) over ( partition by si order by i )) from over10k;
http://git-wip-us.apache.org/repos/asf/hive/blob/f819fa8e/ql/src/test/queries/clientnegative/distinct_windowing_failure2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/distinct_windowing_failure2.q b/ql/src/test/queries/clientnegative/distinct_windowing_failure2.q
new file mode 100644
index 0000000..f07dc18
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/distinct_windowing_failure2.q
@@ -0,0 +1,22 @@
+drop table over10k;
+
+create table over10k(
+ t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+ row format delimited
+ fields terminated by '|';
+
+load data local inpath '../../data/files/over10k' into table over10k;
+
+select distinct last_value(i) over ( partition by si order by i ),
+ distinct first_value(t) over ( partition by si order by i )
+from over10k ;
http://git-wip-us.apache.org/repos/asf/hive/blob/f819fa8e/ql/src/test/queries/clientpositive/distinct_windowing.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/distinct_windowing.q b/ql/src/test/queries/clientpositive/distinct_windowing.q
new file mode 100644
index 0000000..f6a00f0
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/distinct_windowing.q
@@ -0,0 +1,39 @@
+drop table over10k;
+
+create table over10k(
+ t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+ row format delimited
+ fields terminated by '|';
+
+load data local inpath '../../data/files/over10k' into table over10k;
+
+explain
+select distinct first_value(t) over ( partition by si order by i ) from over10k limit 10;
+
+select distinct first_value(t) over ( partition by si order by i ) from over10k limit 10;
+
+explain
+select distinct last_value(i) over ( partition by si order by i )
+from over10k limit 10;
+
+select distinct last_value(i) over ( partition by si order by i )
+from over10k limit 10;
+
+explain
+select distinct last_value(i) over ( partition by si order by i ),
+ first_value(t) over ( partition by si order by i )
+from over10k limit 50;
+
+select distinct last_value(i) over ( partition by si order by i ),
+ first_value(t) over ( partition by si order by i )
+from over10k limit 50;
http://git-wip-us.apache.org/repos/asf/hive/blob/f819fa8e/ql/src/test/results/clientnegative/distinct_windowing_failure1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/distinct_windowing_failure1.q.out b/ql/src/test/results/clientnegative/distinct_windowing_failure1.q.out
new file mode 100644
index 0000000..18cf0c9
--- /dev/null
+++ b/ql/src/test/results/clientnegative/distinct_windowing_failure1.q.out
@@ -0,0 +1,47 @@
+PREHOOK: query: drop table over10k
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table over10k
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table over10k(
+ t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+ row format delimited
+ fields terminated by '|'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@over10k
+POSTHOOK: query: create table over10k(
+ t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+ row format delimited
+ fields terminated by '|'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@over10k
+PREHOOK: query: load data local inpath '../../data/files/over10k' into table over10k
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@over10k
+POSTHOOK: query: load data local inpath '../../data/files/over10k' into table over10k
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@over10k
+FAILED: SemanticException [Error 10002]: Line 3:68 Invalid column reference 'i': (possible column names are: t, si, i, b, f, d, bo, s, ts, dec, bin)
http://git-wip-us.apache.org/repos/asf/hive/blob/f819fa8e/ql/src/test/results/clientnegative/distinct_windowing_failure2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/distinct_windowing_failure2.q.out b/ql/src/test/results/clientnegative/distinct_windowing_failure2.q.out
new file mode 100644
index 0000000..e370819
--- /dev/null
+++ b/ql/src/test/results/clientnegative/distinct_windowing_failure2.q.out
@@ -0,0 +1,47 @@
+PREHOOK: query: drop table over10k
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table over10k
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table over10k(
+ t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+ row format delimited
+ fields terminated by '|'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@over10k
+POSTHOOK: query: create table over10k(
+ t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+ row format delimited
+ fields terminated by '|'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@over10k
+PREHOOK: query: load data local inpath '../../data/files/over10k' into table over10k
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@over10k
+POSTHOOK: query: load data local inpath '../../data/files/over10k' into table over10k
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@over10k
+FAILED: ParseException line 4:7 cannot recognize input near 'distinct' 'first_value' '(' in selection target
http://git-wip-us.apache.org/repos/asf/hive/blob/f819fa8e/ql/src/test/results/clientpositive/distinct_windowing.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/distinct_windowing.q.out b/ql/src/test/results/clientpositive/distinct_windowing.q.out
new file mode 100644
index 0000000..e6cde90
--- /dev/null
+++ b/ql/src/test/results/clientpositive/distinct_windowing.q.out
@@ -0,0 +1,451 @@
+PREHOOK: query: drop table over10k
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table over10k
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table over10k(
+ t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+ row format delimited
+ fields terminated by '|'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@over10k
+POSTHOOK: query: create table over10k(
+ t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ ts timestamp,
+ dec decimal(4,2),
+ bin binary)
+ row format delimited
+ fields terminated by '|'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@over10k
+PREHOOK: query: load data local inpath '../../data/files/over10k' into table over10k
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@over10k
+POSTHOOK: query: load data local inpath '../../data/files/over10k' into table over10k
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@over10k
+PREHOOK: query: explain
+select distinct first_value(t) over ( partition by si order by i ) from over10k limit 10
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select distinct first_value(t) over ( partition by si order by i ) from over10k limit 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: over10k
+ Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: si (type: smallint), i (type: int)
+ sort order: ++
+ Map-reduce partition columns: si (type: smallint)
+ Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ value expressions: t (type: tinyint)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: tinyint, _col1: smallint, _col2: int
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col2 ASC NULLS FIRST
+ partition by: _col1
+ raw input shape:
+ window functions:
+ window function definition
+ alias: first_value_window_0
+ arguments: _col0
+ name: first_value
+ window function: GenericUDAFFirstValueEvaluator
+ window frame: PRECEDING(MAX)~
+ Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: first_value_window_0 (type: tinyint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: tinyint)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: tinyint)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 42397 Data size: 508765 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct first_value(t) over ( partition by si order by i ) from over10k limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@over10k
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct first_value(t) over ( partition by si order by i ) from over10k limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@over10k
+#### A masked pattern was here ####
+-2
+-1
+0
+1
+2
+3
+4
+6
+7
+8
+PREHOOK: query: explain
+select distinct last_value(i) over ( partition by si order by i )
+from over10k limit 10
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select distinct last_value(i) over ( partition by si order by i )
+from over10k limit 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: over10k
+ Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: si (type: smallint), i (type: int)
+ sort order: ++
+ Map-reduce partition columns: si (type: smallint)
+ Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int)
+ outputColumnNames: _col1, _col2
+ Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col1: smallint, _col2: int
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col2 ASC NULLS FIRST
+ partition by: _col1
+ raw input shape:
+ window functions:
+ window function definition
+ alias: last_value_window_0
+ arguments: _col2
+ name: last_value
+ window function: GenericUDAFLastValueEvaluator
+ window frame: PRECEDING(MAX)~
+ Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: last_value_window_0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 63596 Data size: 508768 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct last_value(i) over ( partition by si order by i )
+from over10k limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@over10k
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct last_value(i) over ( partition by si order by i )
+from over10k limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@over10k
+#### A masked pattern was here ####
+65536
+65537
+65538
+65539
+65540
+65541
+65542
+65543
+65544
+65545
+PREHOOK: query: explain
+select distinct last_value(i) over ( partition by si order by i ),
+ first_value(t) over ( partition by si order by i )
+from over10k limit 50
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select distinct last_value(i) over ( partition by si order by i ),
+ first_value(t) over ( partition by si order by i )
+from over10k limit 50
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: over10k
+ Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: si (type: smallint), i (type: int)
+ sort order: ++
+ Map-reduce partition columns: si (type: smallint)
+ Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ value expressions: t (type: tinyint)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: tinyint, _col1: smallint, _col2: int
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col2 ASC NULLS FIRST
+ partition by: _col1
+ raw input shape:
+ window functions:
+ window function definition
+ alias: last_value_window_0
+ arguments: _col2
+ name: last_value
+ window function: GenericUDAFLastValueEvaluator
+ window frame: PRECEDING(MAX)~
+ window function definition
+ alias: first_value_window_1
+ arguments: _col0
+ name: first_value
+ window function: GenericUDAFFirstValueEvaluator
+ window frame: PRECEDING(MAX)~
+ Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: last_value_window_0 (type: int), first_value_window_1 (type: tinyint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: int), _col1 (type: tinyint)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: tinyint)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: tinyint)
+ Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int), KEY._col1 (type: tinyint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 42397 Data size: 508765 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 50
+ Statistics: Num rows: 50 Data size: 600 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 50 Data size: 600 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 50
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct last_value(i) over ( partition by si order by i ),
+ first_value(t) over ( partition by si order by i )
+from over10k limit 50
+PREHOOK: type: QUERY
+PREHOOK: Input: default@over10k
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct last_value(i) over ( partition by si order by i ),
+ first_value(t) over ( partition by si order by i )
+from over10k limit 50
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@over10k
+#### A masked pattern was here ####
+65536 -2
+65536 2
+65536 9
+65536 12
+65536 13
+65536 18
+65536 22
+65536 23
+65536 27
+65536 37
+65536 39
+65536 42
+65536 48
+65536 55
+65536 56
+65536 58
+65536 61
+65536 69
+65536 71
+65536 73
+65536 75
+65536 78
+65536 80
+65536 83
+65536 84
+65536 88
+65536 94
+65536 104
+65536 107
+65536 108
+65536 111
+65536 114
+65536 118
+65536 119
+65536 121
+65537 4
+65537 8
+65537 9
+65537 11
+65537 18
+65537 22
+65537 25
+65537 36
+65537 51
+65537 53
+65537 54
+65537 55
+65537 56
+65537 57
+65537 59