You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by om...@apache.org on 2015/11/17 21:19:15 UTC
[04/43] hive git commit: HIVE-12325 : Turn hive.map.groupby.sorted on
by default (Chetna Chaudhari via Ashutosh Chauhan)
HIVE-12325 : Turn hive.map.groupby.sorted on by default (Chetna Chaudhari via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8a5040c2
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8a5040c2
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8a5040c2
Branch: refs/heads/master-fixed
Commit: 8a5040c2a57242bc1926b3c2dabe7b30e59003a5
Parents: 96c45a3
Author: Chetna Chaudhari <ch...@gmail.com>
Authored: Thu Nov 5 20:44:00 2015 -0800
Committer: Owen O'Malley <om...@apache.org>
Committed: Tue Nov 17 12:18:29 2015 -0800
----------------------------------------------------------------------
.../org/apache/hadoop/hive/conf/HiveConf.java | 6 +-
.../hive/ql/optimizer/GroupByOptimizer.java | 8 --
.../queries/clientpositive/groupby_sort_8.q | 6 --
.../clientpositive/groupby_sort_test_1.q | 1 -
.../clientpositive/auto_sortmerge_join_10.q.out | 100 +++++++------------
.../results/clientpositive/bucket_groupby.q.out | 46 +++------
.../results/clientpositive/groupby_sort_8.q.out | 64 ------------
.../clientpositive/groupby_sort_test_1.q.out | 87 ++++++++++------
.../spark/auto_sortmerge_join_10.q.out | 45 +++------
.../tez/auto_sortmerge_join_10.q.out | 71 ++++++-------
10 files changed, 155 insertions(+), 279 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/8a5040c2/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 7272ea4..7a8517b 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -862,14 +862,10 @@ public class HiveConf extends Configuration {
HIVEMULTIGROUPBYSINGLEREDUCER("hive.multigroupby.singlereducer", true,
"Whether to optimize multi group by query to generate single M/R job plan. If the multi group by query has \n" +
"common group by keys, it will be optimized to generate single M/R job."),
- HIVE_MAP_GROUPBY_SORT("hive.map.groupby.sorted", false,
+ HIVE_MAP_GROUPBY_SORT("hive.map.groupby.sorted", true,
"If the bucketing/sorting properties of the table exactly match the grouping key, whether to perform \n" +
"the group by in the mapper by using BucketizedHiveInputFormat. The only downside to this\n" +
"is that it limits the number of mappers to the number of files."),
- HIVE_MAP_GROUPBY_SORT_TESTMODE("hive.map.groupby.sorted.testmode", false,
- "If the bucketing/sorting properties of the table exactly match the grouping key, whether to perform \n" +
- "the group by in the mapper by using BucketizedHiveInputFormat. If the test mode is set, the plan\n" +
- "is not converted, but a query property is set to denote the same."),
HIVE_GROUPBY_ORDERBY_POSITION_ALIAS("hive.groupby.orderby.position.alias", false,
"Whether to enable using Column Position Alias in Group By or Order By"),
HIVE_NEW_JOB_GROUPING_SET_CARDINALITY("hive.new.job.grouping.set.cardinality", 30,
http://git-wip-us.apache.org/repos/asf/hive/blob/8a5040c2/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java
index f758776..fe459f7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java
@@ -212,11 +212,7 @@ public class GroupByOptimizer implements Transform {
convertGroupByMapSideSortedGroupBy(hiveConf, groupByOp, depth);
}
else if (optimizeDistincts && !HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) {
- // In test mode, dont change the query plan. However, setup a query property
pGraphContext.getQueryProperties().setHasMapGroupBy(true);
- if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_MAP_GROUPBY_SORT_TESTMODE)) {
- return;
- }
ReduceSinkOperator reduceSinkOp =
(ReduceSinkOperator)groupByOp.getChildOperators().get(0);
GroupByDesc childGroupByDesc =
@@ -518,11 +514,7 @@ public class GroupByOptimizer implements Transform {
// The operators specified by depth and removed from the tree.
protected void convertGroupByMapSideSortedGroupBy(
HiveConf conf, GroupByOperator groupByOp, int depth) {
- // In test mode, dont change the query plan. However, setup a query property
pGraphContext.getQueryProperties().setHasMapGroupBy(true);
- if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_MAP_GROUPBY_SORT_TESTMODE)) {
- return;
- }
if (removeChildren(groupByOp, depth)) {
// Use bucketized hive input format - that makes sure that one mapper reads the entire file
http://git-wip-us.apache.org/repos/asf/hive/blob/8a5040c2/ql/src/test/queries/clientpositive/groupby_sort_8.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_sort_8.q b/ql/src/test/queries/clientpositive/groupby_sort_8.q
index f53295e..f0d3a59 100644
--- a/ql/src/test/queries/clientpositive/groupby_sort_8.q
+++ b/ql/src/test/queries/clientpositive/groupby_sort_8.q
@@ -18,10 +18,4 @@ EXPLAIN
select count(distinct key) from T1;
select count(distinct key) from T1;
-set hive.map.groupby.sorted.testmode=true;
--- In testmode, the plan is not changed
-EXPLAIN
-select count(distinct key) from T1;
-select count(distinct key) from T1;
-
DROP TABLE T1;
http://git-wip-us.apache.org/repos/asf/hive/blob/8a5040c2/ql/src/test/queries/clientpositive/groupby_sort_test_1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_sort_test_1.q b/ql/src/test/queries/clientpositive/groupby_sort_test_1.q
index 4ec138e..70eef33 100644
--- a/ql/src/test/queries/clientpositive/groupby_sort_test_1.q
+++ b/ql/src/test/queries/clientpositive/groupby_sort_test_1.q
@@ -2,7 +2,6 @@ set hive.enforce.bucketing = true;
set hive.enforce.sorting = true;
set hive.exec.reducers.max = 10;
set hive.map.groupby.sorted=true;
-set hive.map.groupby.sorted.testmode=true;
CREATE TABLE T1(key STRING, val STRING)
CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
http://git-wip-us.apache.org/repos/asf/hive/blob/8a5040c2/ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out
index e7f6de3..fb1e656 100644
--- a/ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out
+++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out
@@ -242,15 +242,19 @@ select count(*) from
on subq1.key = subq2.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-6 depends on stages: Stage-1
- Stage-3 depends on stages: Stage-6
- Stage-0 depends on stages: Stage-3
+ Stage-5 is a root stage
+ Stage-2 depends on stages: Stage-5
+ Stage-0 depends on stages: Stage-2
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
+ Stage: Stage-5
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ subq1:a
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ subq1:a
TableScan
alias: a
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
@@ -259,43 +263,22 @@ STAGE PLANS:
Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
- bucketGroup: true
keys: key (type: int)
- mode: hash
+ mode: final
outputColumnNames: _col0, _col1
- Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint)
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- keys: KEY._col0 (type: int)
- mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
- Stage: Stage-6
- Map Reduce Local Work
- Alias -> Map Local Tables:
- subq2:a
- Fetch Operator
- limit: -1
- Alias -> Map Local Operator Tree:
- subq2:a
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
TableScan
alias: a
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
@@ -306,31 +289,22 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
- HashTable Sink Operator
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
-
- Stage: Stage-3
- Map Reduce
- Map Operator Tree:
- TableScan
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint)
+ Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
Local Work:
Map Reduce Local Work
Reduce Operator Tree:
http://git-wip-us.apache.org/repos/asf/hive/blob/8a5040c2/ql/src/test/results/clientpositive/bucket_groupby.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/bucket_groupby.q.out b/ql/src/test/results/clientpositive/bucket_groupby.q.out
index 1b48d3a..1ac5287 100644
--- a/ql/src/test/results/clientpositive/bucket_groupby.q.out
+++ b/ql/src/test/results/clientpositive/bucket_groupby.q.out
@@ -1191,38 +1191,24 @@ STAGE PLANS:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- bucketGroup: true
keys: _col0 (type: string), _col1 (type: string)
- mode: hash
+ mode: final
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: string)
- sort order: ++
- Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col2 (type: bigint)
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- keys: KEY._col0 (type: string), KEY._col1 (type: string)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col2 (type: bigint)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Limit
- Number of rows: 10
- Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col2 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/8a5040c2/ql/src/test/results/clientpositive/groupby_sort_8.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby_sort_8.q.out b/ql/src/test/results/clientpositive/groupby_sort_8.q.out
index 5152385..5d8f513 100644
--- a/ql/src/test/results/clientpositive/groupby_sort_8.q.out
+++ b/ql/src/test/results/clientpositive/groupby_sort_8.q.out
@@ -101,70 +101,6 @@ POSTHOOK: Input: default@t1
POSTHOOK: Input: default@t1@ds=1
#### A masked pattern was here ####
5
-PREHOOK: query: -- In testmode, the plan is not changed
-EXPLAIN
-select count(distinct key) from T1
-PREHOOK: type: QUERY
-POSTHOOK: query: -- In testmode, the plan is not changed
-EXPLAIN
-select count(distinct key) from T1
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: t1
- Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: key
- Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count(DISTINCT key)
- keys: key (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(DISTINCT KEY._col0:0._col0)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: select count(distinct key) from T1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t1
-PREHOOK: Input: default@t1@ds=1
-#### A masked pattern was here ####
-POSTHOOK: query: select count(distinct key) from T1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t1
-POSTHOOK: Input: default@t1@ds=1
-#### A masked pattern was here ####
-5
PREHOOK: query: DROP TABLE T1
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@t1
http://git-wip-us.apache.org/repos/asf/hive/blob/8a5040c2/ql/src/test/results/clientpositive/groupby_sort_test_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby_sort_test_1.q.out b/ql/src/test/results/clientpositive/groupby_sort_test_1.q.out
index 8c1765d..dfe0ff1 100644
--- a/ql/src/test/results/clientpositive/groupby_sort_test_1.q.out
+++ b/ql/src/test/results/clientpositive/groupby_sort_test_1.q.out
@@ -50,8 +50,13 @@ SELECT key, count(1) FROM T1 GROUP BY key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5
+ Stage-4
+ Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
Stage-2 depends on stages: Stage-0
+ Stage-3
+ Stage-5
+ Stage-6 depends on stages: Stage-5
STAGE PLANS:
Stage: Stage-1
@@ -67,34 +72,30 @@ STAGE PLANS:
Group By Operator
aggregations: count(1)
keys: _col0 (type: string)
- mode: hash
+ mode: final
outputColumnNames: _col0, _col1
- Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint)
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- keys: KEY._col0 (type: string)
- mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.outputtbl1
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-7
+ Conditional Operator
+
+ Stage: Stage-4
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
Stage: Stage-0
Move Operator
@@ -109,3 +110,33 @@ STAGE PLANS:
Stage: Stage-2
Stats-Aggr Operator
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-5
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-6
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
http://git-wip-us.apache.org/repos/asf/hive/blob/8a5040c2/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out
index ee9f448..17d20cb 100644
--- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out
@@ -206,8 +206,6 @@ STAGE DEPENDENCIES:
STAGE PLANS:
Stage: Stage-2
Spark
- Edges:
- Reducer 2 <- Map 1 (GROUP, 1)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -220,43 +218,28 @@ STAGE PLANS:
Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
- bucketGroup: true
keys: key (type: int)
- mode: hash
+ mode: final
outputColumnNames: _col0, _col1
- Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint)
- Reducer 2
+ Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
Local Work:
Map Reduce Local Work
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- keys: KEY._col0 (type: int)
- mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
Stage: Stage-1
Spark
Edges:
- Reducer 4 <- Map 3 (GROUP, 1)
+ Reducer 3 <- Map 2 (GROUP, 1)
#### A masked pattern was here ####
Vertices:
- Map 3
+ Map 2
Map Operator Tree:
TableScan
alias: a
@@ -275,7 +258,7 @@ STAGE PLANS:
0 _col0 (type: int)
1 _col0 (type: int)
input vertices:
- 0 Reducer 2
+ 0 Map 1
Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
@@ -288,7 +271,7 @@ STAGE PLANS:
value expressions: _col0 (type: bigint)
Local Work:
Map Reduce Local Work
- Reducer 4
+ Reducer 3
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
http://git-wip-us.apache.org/repos/asf/hive/blob/8a5040c2/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_10.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_10.q.out b/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_10.q.out
index 0d22ea7..98e099c 100644
--- a/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_10.q.out
+++ b/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_10.q.out
@@ -245,8 +245,8 @@ STAGE PLANS:
Stage: Stage-1
Tez
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (BROADCAST_EDGE)
- Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+ Map 1 <- Map 3 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -259,18 +259,34 @@ STAGE PLANS:
Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
- bucketGroup: true
keys: key (type: int)
- mode: hash
+ mode: final
outputColumnNames: _col0, _col1
- Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint)
- Map 4
+ Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ input vertices:
+ 1 Map 3
+ Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE
+ HybridGraceHashJoin: true
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Map 3
Map Operator Tree:
TableScan
alias: a
@@ -291,37 +307,6 @@ STAGE PLANS:
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
- keys: KEY._col0 (type: int)
- mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- input vertices:
- 1 Map 4
- Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE
- HybridGraceHashJoin: true
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint)
- Reducer 3
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
mode: mergepartial
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE