You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jd...@apache.org on 2016/05/01 03:31:02 UTC
[08/11] hive git commit: HIVE-12963 : LIMIT statement with SORT BY
creates additional MR job with hardcoded only one reducer (Alina Abramova,
reviewed by Sergey Shelukhin)
HIVE-12963 : LIMIT statement with SORT BY creates additional MR job with hardcoded only one reducer (Alina Abramova, reviewed by Sergey Shelukhin)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/324a2c6e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/324a2c6e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/324a2c6e
Branch: refs/heads/llap
Commit: 324a2c6ec86aba7c1baf74caf52b615b400825c9
Parents: 6460529
Author: Sergey Shelukhin <se...@apache.org>
Authored: Fri Apr 29 12:13:18 2016 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Fri Apr 29 12:13:18 2016 -0700
----------------------------------------------------------------------
common/src/java/org/apache/hadoop/hive/conf/HiveConf.java | 2 ++
itests/src/test/resources/testconfiguration.properties | 1 +
ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java | 2 +-
ql/src/test/results/clientpositive/groupby1_limit.q.out | 2 +-
4 files changed, 5 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/324a2c6e/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 46a3b96..b13de92 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1009,6 +1009,8 @@ public class HiveConf extends Configuration {
"This parameter decides if Hive should add an additional map-reduce job. If the grouping set\n" +
"cardinality (4 in the example above), is more than this value, a new MR job is added under the\n" +
"assumption that the original group by will reduce the data size."),
+ HIVE_GROUPBY_LIMIT_EXTRASTEP("hive.groupby.limit.extrastep", true, "This parameter decides if Hive should \n" +
+ "create new MR job for sorting final output"),
// Max filesize used to do a single copy (after that, distcp is used)
HIVE_EXEC_COPYFILE_MAXSIZE("hive.exec.copyfile.maxsize", 32L * 1024 * 1024 /*32M*/,
http://git-wip-us.apache.org/repos/asf/hive/blob/324a2c6e/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index e46e6ce..0ef3161 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -874,6 +874,7 @@ spark.query.files=add_part_multiple.q, \
groupby_sort_1_23.q, \
groupby_sort_skew_1.q, \
groupby_sort_skew_1_23.q, \
+ qroupby_limit_extrastep.q, \
having.q, \
identity_project_remove_skip.q, \
index_auto_self_join.q, \
http://git-wip-us.apache.org/repos/asf/hive/blob/324a2c6e/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index cfe4497..06db7f9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -7300,7 +7300,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
Operator curr = genLimitPlan(dest, qb, input, offset, limit);
// the client requested that an extra map-reduce step be performed
- if (!extraMRStep) {
+ if (!extraMRStep || !HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_GROUPBY_LIMIT_EXTRASTEP)){
return curr;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/324a2c6e/ql/src/test/results/clientpositive/groupby1_limit.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby1_limit.q.out b/ql/src/test/results/clientpositive/groupby1_limit.q.out
index aacd23c..8d7fbfa 100644
--- a/ql/src/test/results/clientpositive/groupby1_limit.q.out
+++ b/ql/src/test/results/clientpositive/groupby1_limit.q.out
@@ -68,7 +68,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
Reduce Output Operator
- sort order:
+ sort order:
Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.1
value expressions: _col0 (type: string), _col1 (type: double)