You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by su...@apache.org on 2017/04/01 17:44:08 UTC

hive git commit: HIVE-16336: Rename hive.spark.use.file.size.for.mapjoin to hive.spark.use.ts.stats.for.mapjoin (Chao Sun, reviewed by Rui Li and Lefty Leverenz)

Repository: hive
Updated Branches:
  refs/heads/master 3a2d31778 -> 149e4fad4


HIVE-16336: Rename hive.spark.use.file.size.for.mapjoin to hive.spark.use.ts.stats.for.mapjoin (Chao Sun, reviewed by Rui Li and Lefty Leverenz)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/149e4fad
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/149e4fad
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/149e4fad

Branch: refs/heads/master
Commit: 149e4fad4e78508da8793ac293a161aaf3106e01
Parents: 3a2d317
Author: Chao Sun <su...@apache.org>
Authored: Fri Mar 31 17:35:13 2017 -0700
Committer: Chao Sun <su...@apache.org>
Committed: Sat Apr 1 10:43:44 2017 -0700

----------------------------------------------------------------------
 .../java/org/apache/hadoop/hive/conf/HiveConf.java   | 15 ++++++++-------
 .../ql/optimizer/spark/SparkMapJoinOptimizer.java    |  2 +-
 2 files changed, 9 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/149e4fad/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index e12fea1..b9a72d1 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -3246,13 +3246,14 @@ public class HiveConf extends Configuration {
         "logger used for llap-daemons."),
 
     SPARK_USE_OP_STATS("hive.spark.use.op.stats", true,
-        "Whether to use operator stats to determine reducer parallelism for Hive on Spark. "
-            + "If this is false, Hive will use source table stats to determine reducer "
-            + "parallelism for all first level reduce tasks, and the maximum reducer parallelism "
-            + "from all parents for all the rest (second level and onward) reducer tasks."),
-    SPARK_USE_FILE_SIZE_FOR_MAPJOIN("hive.spark.use.file.size.for.mapjoin", false,
-        "If this is set to true, mapjoin optimization in Hive/Spark will use source file sizes associated "
-            + "with TableScan operator on the root of operator tree, instead of using operator statistics."),
+        "Whether to use operator stats to determine reducer parallelism for Hive on Spark.\n" +
+        "If this is false, Hive will use source table stats to determine reducer\n" +
+        "parallelism for all first level reduce tasks, and the maximum reducer parallelism\n" +
+        "from all parents for all the rest (second level and onward) reducer tasks."),
+    SPARK_USE_TS_STATS_FOR_MAPJOIN("hive.spark.use.ts.stats.for.mapjoin", false,
+        "If this is set to true, mapjoin optimization in Hive/Spark will use statistics from\n" +
+        "TableScan operators at the root of operator tree, instead of parent ReduceSink\n" +
+        "operators of the Join operator."),
     SPARK_CLIENT_FUTURE_TIMEOUT("hive.spark.client.future.timeout",
       "60s", new TimeValidator(TimeUnit.SECONDS),
       "Timeout for requests from Hive client to remote Spark driver."),

http://git-wip-us.apache.org/repos/asf/hive/blob/149e4fad/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java
index d8f37ae..207f7b3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java
@@ -195,7 +195,7 @@ public class SparkMapJoinOptimizer implements NodeProcessor {
     // bigTableFound means we've encountered a table that's bigger than the
     // max. This table is either the big table or we cannot convert.
     boolean bigTableFound = false;
-    boolean useTsStats = context.getConf().getBoolean(HiveConf.ConfVars.SPARK_USE_FILE_SIZE_FOR_MAPJOIN.varname, false);
+    boolean useTsStats = context.getConf().getBoolean(HiveConf.ConfVars.SPARK_USE_TS_STATS_FOR_MAPJOIN.varname, false);
     boolean hasUpstreamSinks = false;
 
     // Check whether there's any upstream RS.