You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2017/01/06 09:29:14 UTC
hive git commit: HIVE-15474: Extend limit propagation for chain of
RS-GB-RS operators (Jesus Camacho Rodriguez, reviewed by Rui Li)
Repository: hive
Updated Branches:
refs/heads/master 4e3a62071 -> aab5dd185
HIVE-15474: Extend limit propagation for chain of RS-GB-RS operators (Jesus Camacho Rodriguez, reviewed by Rui Li)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/aab5dd18
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/aab5dd18
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/aab5dd18
Branch: refs/heads/master
Commit: aab5dd185dbf68daff57b5f774a6157f61c0874e
Parents: 4e3a620
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Tue Dec 20 20:37:14 2016 +0000
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Fri Jan 6 09:29:10 2017 +0000
----------------------------------------------------------------------
.../test/resources/testconfiguration.properties | 1 +
.../ql/optimizer/LimitPushdownOptimizer.java | 67 +-
.../hadoop/hive/ql/plan/ExprNodeDescUtils.java | 69 ++
.../queries/clientpositive/limit_pushdown2.q | 18 +
.../clientpositive/limit_pushdown2.q.out | 232 ++++
.../clientpositive/spark/limit_pushdown2.q.out | 1141 ++++++++++++++++++
6 files changed, 1494 insertions(+), 34 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/aab5dd18/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 1cebc70..70e7197 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -1156,6 +1156,7 @@ spark.query.files=add_part_multiple.q, \
leftsemijoin_mr.q, \
limit_partition_metadataonly.q, \
limit_pushdown.q, \
+ limit_pushdown2.q, \
list_bucket_dml_2.q, \
load_dyn_part1.q, \
load_dyn_part10.q, \
http://git-wip-us.apache.org/repos/asf/hive/blob/aab5dd18/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java
index f68d0ad..9bf197b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java
@@ -179,15 +179,41 @@ public class LimitPushdownOptimizer extends Transform {
// Not safe to continue for RS-GBY-GBY-LIM kind of pipelines. See HIVE-10607 for more.
return false;
}
- if (!checkKeys(cRS.getConf().getKeyCols(), pRS.getConf().getKeyCols(), cRS, pRS)) {
- // Keys are not the same; bail out
- return false;
+ List<ExprNodeDesc> cKeys = cRS.getConf().getKeyCols();
+ List<ExprNodeDesc> pKeys = pRS.getConf().getKeyCols();
+ if (pRS.getChildren().get(0) instanceof GroupByOperator &&
+ pRS.getChildren().get(0).getChildren().get(0) == cRS) {
+ // RS-GB-RS
+ GroupByOperator gBy = (GroupByOperator) pRS.getChildren().get(0);
+ List<ExprNodeDesc> gKeys = gBy.getConf().getKeys();
+ if (!ExprNodeDescUtils.checkPrefixKeysUpstream(cKeys, pKeys, cRS, pRS)) {
+ // We might still be able to push the limit
+ if (!ExprNodeDescUtils.checkPrefixKeys(cKeys, gKeys, cRS, gBy) ||
+ !ExprNodeDescUtils.checkPrefixKeys(gKeys, pKeys, gBy, pRS)) {
+ // We cannot push limit; bail out
+ return false;
+ }
+ }
+ } else {
+ if (!ExprNodeDescUtils.checkPrefixKeysUpstream(cKeys, pKeys, cRS, pRS)) {
+ // We cannot push limit; bail out
+ return false;
+ }
}
// Copy order
- StringBuilder order = new StringBuilder(cRS.getConf().getOrder());
- StringBuilder orderNull = new StringBuilder(cRS.getConf().getNullOrder());
- order.append(pRS.getConf().getOrder().substring(order.length()));
- orderNull.append(pRS.getConf().getNullOrder().substring(orderNull.length()));
+ StringBuilder order;
+ StringBuilder orderNull;
+ if (pRS.getConf().getOrder().length() > cRS.getConf().getOrder().length()) {
+ order = new StringBuilder(cRS.getConf().getOrder());
+ orderNull = new StringBuilder(cRS.getConf().getNullOrder());
+ order.append(pRS.getConf().getOrder().substring(order.length()));
+ orderNull.append(pRS.getConf().getNullOrder().substring(orderNull.length()));
+ } else {
+ order = new StringBuilder(cRS.getConf().getOrder().substring(
+ 0, pRS.getConf().getOrder().length()));
+ orderNull = new StringBuilder(cRS.getConf().getNullOrder().substring(
+ 0, pRS.getConf().getNullOrder().length()));
+ }
pRS.getConf().setOrder(order.toString());
pRS.getConf().setNullOrder(orderNull.toString());
// Copy limit
@@ -201,33 +227,6 @@ public class LimitPushdownOptimizer extends Transform {
}
}
- private static boolean checkKeys(List<ExprNodeDesc> cKeys, List<ExprNodeDesc> pKeys,
- ReduceSinkOperator cRS, ReduceSinkOperator pRS) throws SemanticException {
- if (cKeys == null || cKeys.isEmpty()) {
- if (pKeys != null && !pKeys.isEmpty()) {
- return false;
- }
- return true;
- }
- if (pKeys == null || pKeys.isEmpty()) {
- return false;
- }
- if (cKeys.size() > pKeys.size()) {
- return false;
- }
- for (int i = 0; i < cKeys.size(); i++) {
- ExprNodeDesc expr = ExprNodeDescUtils.backtrack(cKeys.get(i), cRS, pRS);
- if (expr == null) {
- // cKey is not present in parent
- return false;
- }
- if (!expr.isSame(pKeys.get(i))) {
- return false;
- }
- }
- return true;
- }
-
private static class LimitPushdownContext implements NodeProcessorCtx {
private final float threshold;
http://git-wip-us.apache.org/repos/asf/hive/blob/aab5dd18/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
index 4c699a2..6c10704 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
@@ -713,4 +713,73 @@ public class ExprNodeDescUtils {
// Otherwise, we return the expression
return foldedExpr;
}
+
+ /**
+ * Checks whether the keys of a parent operator are a prefix of the keys of a
+ * child operator.
+ * @param childKeys keys of the child operator
+ * @param parentKeys keys of the parent operator
+ * @param childOp child operator
+ * @param parentOp parent operator
+ * @return true if the keys are a prefix, false otherwise
+ * @throws SemanticException
+ */
+ public static boolean checkPrefixKeys(List<ExprNodeDesc> childKeys, List<ExprNodeDesc> parentKeys,
+ Operator<? extends OperatorDesc> childOp, Operator<? extends OperatorDesc> parentOp)
+ throws SemanticException {
+ return checkPrefixKeys(childKeys, parentKeys, childOp, parentOp, false);
+ }
+
+ /**
+ * Checks whether the keys of a child operator are a prefix of the keys of a
+ * parent operator.
+ * @param childKeys keys of the child operator
+ * @param parentKeys keys of the parent operator
+ * @param childOp child operator
+ * @param parentOp parent operator
+ * @return true if the keys are a prefix, false otherwise
+ * @throws SemanticException
+ */
+ public static boolean checkPrefixKeysUpstream(List<ExprNodeDesc> childKeys, List<ExprNodeDesc> parentKeys,
+ Operator<? extends OperatorDesc> childOp, Operator<? extends OperatorDesc> parentOp)
+ throws SemanticException {
+ return checkPrefixKeys(childKeys, parentKeys, childOp, parentOp, true);
+ }
+
+ private static boolean checkPrefixKeys(List<ExprNodeDesc> childKeys, List<ExprNodeDesc> parentKeys,
+ Operator<? extends OperatorDesc> childOp, Operator<? extends OperatorDesc> parentOp,
+ boolean upstream) throws SemanticException {
+ if (childKeys == null || childKeys.isEmpty()) {
+ if (parentKeys != null && !parentKeys.isEmpty()) {
+ return false;
+ }
+ return true;
+ }
+ if (parentKeys == null || parentKeys.isEmpty()) {
+ return false;
+ }
+ int size;
+ if (upstream) {
+ if (childKeys.size() > parentKeys.size()) {
+ return false;
+ }
+ size = childKeys.size();
+ } else {
+ if (parentKeys.size() > childKeys.size()) {
+ return false;
+ }
+ size = parentKeys.size();
+ }
+ for (int i = 0; i < size; i++) {
+ ExprNodeDesc expr = ExprNodeDescUtils.backtrack(childKeys.get(i), childOp, parentOp);
+ if (expr == null) {
+ // cKey is not present in parent
+ return false;
+ }
+ if (!expr.isSame(parentKeys.get(i))) {
+ return false;
+ }
+ }
+ return true;
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/aab5dd18/ql/src/test/queries/clientpositive/limit_pushdown2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/limit_pushdown2.q b/ql/src/test/queries/clientpositive/limit_pushdown2.q
index e222763..1f00182 100644
--- a/ql/src/test/queries/clientpositive/limit_pushdown2.q
+++ b/ql/src/test/queries/clientpositive/limit_pushdown2.q
@@ -57,6 +57,24 @@ select key, value, avg(key + 1) from src
group by value, key
order by key desc limit 20;
+explain
+select key, value, count(key + 1) as agg1 from src
+group by key, value
+order by key, value, agg1 limit 20;
+
+select key, value, count(key + 1) as agg1 from src
+group by key, value
+order by key, value, agg1 limit 20;
+
+explain
+select key, value, count(key + 1) as agg1 from src
+group by key, value
+order by key desc, value, agg1 limit 20;
+
+select key, value, count(key + 1) as agg1 from src
+group by key, value
+order by key desc, value, agg1 limit 20;
+
-- NOT APPLICABLE
explain
select value, avg(key + 1) myavg from src
http://git-wip-us.apache.org/repos/asf/hive/blob/aab5dd18/ql/src/test/results/clientpositive/limit_pushdown2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/limit_pushdown2.q.out b/ql/src/test/results/clientpositive/limit_pushdown2.q.out
index b44b529..fac6164 100644
--- a/ql/src/test/results/clientpositive/limit_pushdown2.q.out
+++ b/ql/src/test/results/clientpositive/limit_pushdown2.q.out
@@ -562,6 +562,238 @@ POSTHOOK: Input: default@src
76 val_76 77.0
74 val_74 75.0
72 val_72 73.0
+PREHOOK: query: explain
+select key, value, count(key + 1) as agg1 from src
+group by key, value
+order by key, value, agg1 limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key, value, count(key + 1) as agg1 from src
+group by key, value
+order by key, value, agg1 limit 20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string), (UDFToDouble(key) + 1.0) (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col2)
+ keys: _col0 (type: string), _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.3
+ value expressions: _col2 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+ sort order: +++
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.3
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 20
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 20
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select key, value, count(key + 1) as agg1 from src
+group by key, value
+order by key, value, agg1 limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value, count(key + 1) as agg1 from src
+group by key, value
+order by key, value, agg1 limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 val_0 3
+10 val_10 1
+100 val_100 2
+103 val_103 2
+104 val_104 2
+105 val_105 1
+11 val_11 1
+111 val_111 1
+113 val_113 2
+114 val_114 1
+116 val_116 1
+118 val_118 2
+119 val_119 3
+12 val_12 2
+120 val_120 2
+125 val_125 2
+126 val_126 1
+128 val_128 3
+129 val_129 2
+131 val_131 1
+PREHOOK: query: explain
+select key, value, count(key + 1) as agg1 from src
+group by key, value
+order by key desc, value, agg1 limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key, value, count(key + 1) as agg1 from src
+group by key, value
+order by key desc, value, agg1 limit 20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string), (UDFToDouble(key) + 1.0) (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col2)
+ keys: _col0 (type: string), _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: -+
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.3
+ value expressions: _col2 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+ sort order: -++
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.3
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 20
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 20
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select key, value, count(key + 1) as agg1 from src
+group by key, value
+order by key desc, value, agg1 limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value, count(key + 1) as agg1 from src
+group by key, value
+order by key desc, value, agg1 limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+98 val_98 2
+97 val_97 2
+96 val_96 1
+95 val_95 2
+92 val_92 1
+90 val_90 3
+9 val_9 1
+87 val_87 1
+86 val_86 1
+85 val_85 1
+84 val_84 2
+83 val_83 2
+82 val_82 1
+80 val_80 1
+8 val_8 1
+78 val_78 1
+77 val_77 1
+76 val_76 2
+74 val_74 1
+72 val_72 2
PREHOOK: query: -- NOT APPLICABLE
explain
select value, avg(key + 1) myavg from src
http://git-wip-us.apache.org/repos/asf/hive/blob/aab5dd18/ql/src/test/results/clientpositive/spark/limit_pushdown2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/limit_pushdown2.q.out b/ql/src/test/results/clientpositive/spark/limit_pushdown2.q.out
new file mode 100644
index 0000000..8b7c96d
--- /dev/null
+++ b/ql/src/test/results/clientpositive/spark/limit_pushdown2.q.out
@@ -0,0 +1,1141 @@
+PREHOOK: query: explain
+select key, value, avg(key + 1) from src
+group by key, value
+order by key, value limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key, value, avg(key + 1) from src
+group by key, value
+order by key, value limit 20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string), (UDFToDouble(key) + 1.0) (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(_col2)
+ keys: _col0 (type: string), _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.3
+ value expressions: _col2 (type: struct<count:bigint,sum:double,input:double>)
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 20
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 20
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select key, value, avg(key + 1) from src
+group by key, value
+order by key, value limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value, avg(key + 1) from src
+group by key, value
+order by key, value limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 val_0 1.0
+10 val_10 11.0
+100 val_100 101.0
+103 val_103 104.0
+104 val_104 105.0
+105 val_105 106.0
+11 val_11 12.0
+111 val_111 112.0
+113 val_113 114.0
+114 val_114 115.0
+116 val_116 117.0
+118 val_118 119.0
+119 val_119 120.0
+12 val_12 13.0
+120 val_120 121.0
+125 val_125 126.0
+126 val_126 127.0
+128 val_128 129.0
+129 val_129 130.0
+131 val_131 132.0
+PREHOOK: query: explain
+select key, value, avg(key + 1) from src
+group by key, value
+order by key, value desc limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key, value, avg(key + 1) from src
+group by key, value
+order by key, value desc limit 20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string), (UDFToDouble(key) + 1.0) (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(_col2)
+ keys: _col0 (type: string), _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: +-
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.3
+ value expressions: _col2 (type: struct<count:bigint,sum:double,input:double>)
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 20
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 20
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select key, value, avg(key + 1) from src
+group by key, value
+order by key, value desc limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value, avg(key + 1) from src
+group by key, value
+order by key, value desc limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 val_0 1.0
+10 val_10 11.0
+100 val_100 101.0
+103 val_103 104.0
+104 val_104 105.0
+105 val_105 106.0
+11 val_11 12.0
+111 val_111 112.0
+113 val_113 114.0
+114 val_114 115.0
+116 val_116 117.0
+118 val_118 119.0
+119 val_119 120.0
+12 val_12 13.0
+120 val_120 121.0
+125 val_125 126.0
+126 val_126 127.0
+128 val_128 129.0
+129 val_129 130.0
+131 val_131 132.0
+PREHOOK: query: explain
+select key, value, avg(key + 1) from src
+group by key, value
+order by key desc, value limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key, value, avg(key + 1) from src
+group by key, value
+order by key desc, value limit 20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string), (UDFToDouble(key) + 1.0) (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(_col2)
+ keys: _col0 (type: string), _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: -+
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.3
+ value expressions: _col2 (type: struct<count:bigint,sum:double,input:double>)
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 20
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 20
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select key, value, avg(key + 1) from src
+group by key, value
+order by key desc, value limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value, avg(key + 1) from src
+group by key, value
+order by key desc, value limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+98 val_98 99.0
+97 val_97 98.0
+96 val_96 97.0
+95 val_95 96.0
+92 val_92 93.0
+90 val_90 91.0
+9 val_9 10.0
+87 val_87 88.0
+86 val_86 87.0
+85 val_85 86.0
+84 val_84 85.0
+83 val_83 84.0
+82 val_82 83.0
+80 val_80 81.0
+8 val_8 9.0
+78 val_78 79.0
+77 val_77 78.0
+76 val_76 77.0
+74 val_74 75.0
+72 val_72 73.0
+PREHOOK: query: explain
+select key, value, avg(key + 1) from src
+group by value, key
+order by key, value limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key, value, avg(key + 1) from src
+group by value, key
+order by key, value limit 20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: value (type: string), key (type: string), (UDFToDouble(key) + 1.0) (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(_col2)
+ keys: _col1 (type: string), _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.3
+ value expressions: _col2 (type: struct<count:bigint,sum:double,input:double>)
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 20
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 20
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select key, value, avg(key + 1) from src
+group by value, key
+order by key, value limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value, avg(key + 1) from src
+group by value, key
+order by key, value limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 val_0 1.0
+10 val_10 11.0
+100 val_100 101.0
+103 val_103 104.0
+104 val_104 105.0
+105 val_105 106.0
+11 val_11 12.0
+111 val_111 112.0
+113 val_113 114.0
+114 val_114 115.0
+116 val_116 117.0
+118 val_118 119.0
+119 val_119 120.0
+12 val_12 13.0
+120 val_120 121.0
+125 val_125 126.0
+126 val_126 127.0
+128 val_128 129.0
+129 val_129 130.0
+131 val_131 132.0
+PREHOOK: query: explain
+select key, value, avg(key + 1) from src
+group by value, key
+order by key desc, value limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key, value, avg(key + 1) from src
+group by value, key
+order by key desc, value limit 20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: value (type: string), key (type: string), (UDFToDouble(key) + 1.0) (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(_col2)
+ keys: _col1 (type: string), _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: -+
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.3
+ value expressions: _col2 (type: struct<count:bigint,sum:double,input:double>)
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 20
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 20
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select key, value, avg(key + 1) from src
+group by value, key
+order by key desc, value limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value, avg(key + 1) from src
+group by value, key
+order by key desc, value limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+98 val_98 99.0
+97 val_97 98.0
+96 val_96 97.0
+95 val_95 96.0
+92 val_92 93.0
+90 val_90 91.0
+9 val_9 10.0
+87 val_87 88.0
+86 val_86 87.0
+85 val_85 86.0
+84 val_84 85.0
+83 val_83 84.0
+82 val_82 83.0
+80 val_80 81.0
+8 val_8 9.0
+78 val_78 79.0
+77 val_77 78.0
+76 val_76 77.0
+74 val_74 75.0
+72 val_72 73.0
+PREHOOK: query: explain
+select key, value, avg(key + 1) from src
+group by value, key
+order by key desc limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key, value, avg(key + 1) from src
+group by value, key
+order by key desc limit 20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: value (type: string), key (type: string), (UDFToDouble(key) + 1.0) (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(_col2)
+ keys: _col1 (type: string), _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: -+
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.3
+ value expressions: _col2 (type: struct<count:bigint,sum:double,input:double>)
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 20
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 20
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select key, value, avg(key + 1) from src
+group by value, key
+order by key desc limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value, avg(key + 1) from src
+group by value, key
+order by key desc limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+98 val_98 99.0
+97 val_97 98.0
+96 val_96 97.0
+95 val_95 96.0
+92 val_92 93.0
+90 val_90 91.0
+9 val_9 10.0
+87 val_87 88.0
+86 val_86 87.0
+85 val_85 86.0
+84 val_84 85.0
+83 val_83 84.0
+82 val_82 83.0
+80 val_80 81.0
+8 val_8 9.0
+78 val_78 79.0
+77 val_77 78.0
+76 val_76 77.0
+74 val_74 75.0
+72 val_72 73.0
+PREHOOK: query: explain
+select key, value, count(key + 1) as agg1 from src
+group by key, value
+order by key, value, agg1 limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key, value, count(key + 1) as agg1 from src
+group by key, value
+order by key, value, agg1 limit 20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP, 2)
+ Reducer 3 <- Reducer 2 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string), (UDFToDouble(key) + 1.0) (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col2)
+ keys: _col0 (type: string), _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.3
+ value expressions: _col2 (type: bigint)
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+ sort order: +++
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.3
+ Reducer 3
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 20
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 20
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select key, value, count(key + 1) as agg1 from src
+group by key, value
+order by key, value, agg1 limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value, count(key + 1) as agg1 from src
+group by key, value
+order by key, value, agg1 limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 val_0 3
+10 val_10 1
+100 val_100 2
+103 val_103 2
+104 val_104 2
+105 val_105 1
+11 val_11 1
+111 val_111 1
+113 val_113 2
+114 val_114 1
+116 val_116 1
+118 val_118 2
+119 val_119 3
+12 val_12 2
+120 val_120 2
+125 val_125 2
+126 val_126 1
+128 val_128 3
+129 val_129 2
+131 val_131 1
+PREHOOK: query: explain
+select key, value, count(key + 1) as agg1 from src
+group by key, value
+order by key desc, value, agg1 limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key, value, count(key + 1) as agg1 from src
+group by key, value
+order by key desc, value, agg1 limit 20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP, 2)
+ Reducer 3 <- Reducer 2 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string), (UDFToDouble(key) + 1.0) (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col2)
+ keys: _col0 (type: string), _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: -+
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.3
+ value expressions: _col2 (type: bigint)
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+ sort order: -++
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.3
+ Reducer 3
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 20
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 20
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select key, value, count(key + 1) as agg1 from src
+group by key, value
+order by key desc, value, agg1 limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value, count(key + 1) as agg1 from src
+group by key, value
+order by key desc, value, agg1 limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+98 val_98 2
+97 val_97 2
+96 val_96 1
+95 val_95 2
+92 val_92 1
+90 val_90 3
+9 val_9 1
+87 val_87 1
+86 val_86 1
+85 val_85 1
+84 val_84 2
+83 val_83 2
+82 val_82 1
+80 val_80 1
+8 val_8 1
+78 val_78 1
+77 val_77 1
+76 val_76 2
+74 val_74 1
+72 val_72 2
+PREHOOK: query: -- NOT APPLICABLE
+explain
+select value, avg(key + 1) myavg from src
+group by value
+order by myavg, value desc limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: -- NOT APPLICABLE
+explain
+select value, avg(key + 1) myavg from src
+group by value
+order by myavg, value desc limit 20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP, 2)
+ Reducer 3 <- Reducer 2 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: value (type: string), (UDFToDouble(key) + 1.0) (type: double)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(_col1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: struct<count:bigint,sum:double,input:double>)
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: double), _col0 (type: string)
+ sort order: +-
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.3
+ Reducer 3
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: double)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 20
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 20
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select value, avg(key + 1) myavg from src
+group by value
+order by myavg, value desc limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select value, avg(key + 1) myavg from src
+group by value
+order by myavg, value desc limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+val_0 1.0
+val_2 3.0
+val_4 5.0
+val_5 6.0
+val_8 9.0
+val_9 10.0
+val_10 11.0
+val_11 12.0
+val_12 13.0
+val_15 16.0
+val_17 18.0
+val_18 19.0
+val_19 20.0
+val_20 21.0
+val_24 25.0
+val_26 27.0
+val_27 28.0
+val_28 29.0
+val_30 31.0
+val_33 34.0
+PREHOOK: query: -- NOT APPLICABLE
+explain
+select key, value, avg(key + 1) from src
+group by value, key with rollup
+order by key, value limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: -- NOT APPLICABLE
+explain
+select key, value, avg(key + 1) from src
+group by value, key with rollup
+order by key, value limit 20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP, 2)
+ Reducer 3 <- Reducer 2 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: value (type: string), key (type: string), (UDFToDouble(key) + 1.0) (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(_col2)
+ keys: _col0 (type: string), _col1 (type: string), 0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+ Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: struct<count:bigint,sum:double,input:double>)
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE
+ pruneGroupingSetId: true
+ Select Operator
+ expressions: _col1 (type: string), _col0 (type: string), _col3 (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.3
+ value expressions: _col2 (type: double)
+ Reducer 3
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 20
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 20
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+select key, value, avg(key + 1) from src
+group by rollup(value, key)
+order by key, value limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key, value, avg(key + 1) from src
+group by rollup(value, key)
+order by key, value limit 20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP, 2)
+ Reducer 3 <- Reducer 2 (SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: value (type: string), key (type: string), (UDFToDouble(key) + 1.0) (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(_col2)
+ keys: _col0 (type: string), _col1 (type: string), 0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+ Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: struct<count:bigint,sum:double,input:double>)
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE
+ pruneGroupingSetId: true
+ Select Operator
+ expressions: _col1 (type: string), _col0 (type: string), _col3 (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.3
+ value expressions: _col2 (type: double)
+ Reducer 3
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 20
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 20
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select key, value, avg(key + 1) from src
+group by value, key with rollup
+order by key, value limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value, avg(key + 1) from src
+group by value, key with rollup
+order by key, value limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+NULL NULL 261.182
+NULL val_0 1.0
+NULL val_10 11.0
+NULL val_100 101.0
+NULL val_103 104.0
+NULL val_104 105.0
+NULL val_105 106.0
+NULL val_11 12.0
+NULL val_111 112.0
+NULL val_113 114.0
+NULL val_114 115.0
+NULL val_116 117.0
+NULL val_118 119.0
+NULL val_119 120.0
+NULL val_12 13.0
+NULL val_120 121.0
+NULL val_125 126.0
+NULL val_126 127.0
+NULL val_128 129.0
+NULL val_129 130.0