You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by su...@apache.org on 2017/04/01 17:44:08 UTC
hive git commit: HIVE-16336: Rename
hive.spark.use.file.size.for.mapjoin to hive.spark.use.ts.stats.for.mapjoin
(Chao Sun, reviewed by Rui Li and Lefty Leverenz)
Repository: hive
Updated Branches:
refs/heads/master 3a2d31778 -> 149e4fad4
HIVE-16336: Rename hive.spark.use.file.size.for.mapjoin to hive.spark.use.ts.stats.for.mapjoin (Chao Sun, reviewed by Rui Li and Lefty Leverenz)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/149e4fad
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/149e4fad
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/149e4fad
Branch: refs/heads/master
Commit: 149e4fad4e78508da8793ac293a161aaf3106e01
Parents: 3a2d317
Author: Chao Sun <su...@apache.org>
Authored: Fri Mar 31 17:35:13 2017 -0700
Committer: Chao Sun <su...@apache.org>
Committed: Sat Apr 1 10:43:44 2017 -0700
----------------------------------------------------------------------
.../java/org/apache/hadoop/hive/conf/HiveConf.java | 15 ++++++++-------
.../ql/optimizer/spark/SparkMapJoinOptimizer.java | 2 +-
2 files changed, 9 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/149e4fad/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index e12fea1..b9a72d1 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -3246,13 +3246,14 @@ public class HiveConf extends Configuration {
"logger used for llap-daemons."),
SPARK_USE_OP_STATS("hive.spark.use.op.stats", true,
- "Whether to use operator stats to determine reducer parallelism for Hive on Spark. "
- + "If this is false, Hive will use source table stats to determine reducer "
- + "parallelism for all first level reduce tasks, and the maximum reducer parallelism "
- + "from all parents for all the rest (second level and onward) reducer tasks."),
- SPARK_USE_FILE_SIZE_FOR_MAPJOIN("hive.spark.use.file.size.for.mapjoin", false,
- "If this is set to true, mapjoin optimization in Hive/Spark will use source file sizes associated "
- + "with TableScan operator on the root of operator tree, instead of using operator statistics."),
+ "Whether to use operator stats to determine reducer parallelism for Hive on Spark.\n" +
+ "If this is false, Hive will use source table stats to determine reducer\n" +
+ "parallelism for all first level reduce tasks, and the maximum reducer parallelism\n" +
+ "from all parents for all the rest (second level and onward) reducer tasks."),
+ SPARK_USE_TS_STATS_FOR_MAPJOIN("hive.spark.use.ts.stats.for.mapjoin", false,
+ "If this is set to true, mapjoin optimization in Hive/Spark will use statistics from\n" +
+ "TableScan operators at the root of operator tree, instead of parent ReduceSink\n" +
+ "operators of the Join operator."),
SPARK_CLIENT_FUTURE_TIMEOUT("hive.spark.client.future.timeout",
"60s", new TimeValidator(TimeUnit.SECONDS),
"Timeout for requests from Hive client to remote Spark driver."),
http://git-wip-us.apache.org/repos/asf/hive/blob/149e4fad/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java
index d8f37ae..207f7b3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java
@@ -195,7 +195,7 @@ public class SparkMapJoinOptimizer implements NodeProcessor {
// bigTableFound means we've encountered a table that's bigger than the
// max. This table is either the big table or we cannot convert.
boolean bigTableFound = false;
- boolean useTsStats = context.getConf().getBoolean(HiveConf.ConfVars.SPARK_USE_FILE_SIZE_FOR_MAPJOIN.varname, false);
+ boolean useTsStats = context.getConf().getBoolean(HiveConf.ConfVars.SPARK_USE_TS_STATS_FOR_MAPJOIN.varname, false);
boolean hasUpstreamSinks = false;
// Check whether there's any upstream RS.