You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jd...@apache.org on 2016/05/01 03:31:02 UTC
[08/11] hive git commit: HIVE-12963 : LIMIT statement with SORT BY creates additional MR job with hardcoded only one reducer (Alina Abramova, reviewed by Sergey Shelukhin)

HIVE-12963 : LIMIT statement with SORT BY creates additional MR job with hardcoded only one reducer (Alina Abramova, reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/324a2c6e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/324a2c6e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/324a2c6e

Branch: refs/heads/llap
Commit: 324a2c6ec86aba7c1baf74caf52b615b400825c9
Parents: 6460529
Author: Sergey Shelukhin <se...@apache.org>
Authored: Fri Apr 29 12:13:18 2016 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Fri Apr 29 12:13:18 2016 -0700

----------------------------------------------------------------------
 common/src/java/org/apache/hadoop/hive/conf/HiveConf.java         | 2 ++
 itests/src/test/resources/testconfiguration.properties            | 1 +
 ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java | 2 +-
 ql/src/test/results/clientpositive/groupby1_limit.q.out           | 2 +-
 4 files changed, 5 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/324a2c6e/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 46a3b96..b13de92 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1009,6 +1009,8 @@ public class HiveConf extends Configuration {
         "This parameter decides if Hive should add an additional map-reduce job. If the grouping set\n" +
         "cardinality (4 in the example above), is more than this value, a new MR job is added under the\n" +
         "assumption that the original group by will reduce the data size."),
+    HIVE_GROUPBY_LIMIT_EXTRASTEP("hive.groupby.limit.extrastep", true, "This parameter decides if Hive should \n" +
+        "create new MR job for sorting final output"),
 
     // Max filesize used to do a single copy (after that, distcp is used)
     HIVE_EXEC_COPYFILE_MAXSIZE("hive.exec.copyfile.maxsize", 32L * 1024 * 1024 /*32M*/,

http://git-wip-us.apache.org/repos/asf/hive/blob/324a2c6e/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index e46e6ce..0ef3161 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -874,6 +874,7 @@ spark.query.files=add_part_multiple.q, \
   groupby_sort_1_23.q, \
   groupby_sort_skew_1.q, \
   groupby_sort_skew_1_23.q, \
+  qroupby_limit_extrastep.q, \
   having.q, \
   identity_project_remove_skip.q, \
   index_auto_self_join.q, \

http://git-wip-us.apache.org/repos/asf/hive/blob/324a2c6e/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index cfe4497..06db7f9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -7300,7 +7300,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
     Operator curr = genLimitPlan(dest, qb, input, offset, limit);
 
     // the client requested that an extra map-reduce step be performed
-    if (!extraMRStep) {
+    if (!extraMRStep  || !HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_GROUPBY_LIMIT_EXTRASTEP)){
       return curr;
     }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/324a2c6e/ql/src/test/results/clientpositive/groupby1_limit.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby1_limit.q.out b/ql/src/test/results/clientpositive/groupby1_limit.q.out
index aacd23c..8d7fbfa 100644
--- a/ql/src/test/results/clientpositive/groupby1_limit.q.out
+++ b/ql/src/test/results/clientpositive/groupby1_limit.q.out
@@ -68,7 +68,7 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             Reduce Output Operator
-              sort order: 
+              sort order:
               Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE
               TopN Hash Memory Usage: 0.1
               value expressions: _col0 (type: string), _col1 (type: double)