You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2020/12/11 09:53:52 UTC
[spark] branch branch-3.0 updated: [SPARK-33740][SQL][3.0] hadoop
configs in hive-site.xml can overrides pre-existing hadoop ones
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 9439e11 [SPARK-33740][SQL][3.0] hadoop configs in hive-site.xml can overrides pre-existing hadoop ones
9439e11 is described below
commit 9439e11e65ed808b2dd990a27cacd44b2a6f0009
Author: Kent Yao <ya...@hotmail.com>
AuthorDate: Fri Dec 11 01:49:13 2020 -0800
[SPARK-33740][SQL][3.0] hadoop configs in hive-site.xml can overrides pre-existing hadoop ones
Backport #30709 to 3.0
### What changes were proposed in this pull request?
org.apache.hadoop.conf.Configuration#setIfUnset will ignore those with defaults too
### Why are the changes needed?
fix a regression
### Does this PR introduce _any_ user-facing change?
no
### How was this patch tested?
new tests
Closes #30720 from yaooqinn/SPARK-33740-30.
Authored-by: Kent Yao <ya...@hotmail.com>
Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
.../scala/org/apache/spark/sql/internal/SharedState.scala | 12 +++++++++---
sql/core/src/test/resources/hive-site.xml | 5 +++++
.../org/apache/spark/sql/internal/SharedStateSuite.scala | 11 +++++++++++
3 files changed, 25 insertions(+), 3 deletions(-)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
index ce4385d..1922a58 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
@@ -228,14 +228,20 @@ object SharedState extends Logging {
def loadHiveConfFile(
sparkConf: SparkConf,
hadoopConf: Configuration): Unit = {
+ def containsInSparkConf(key: String): Boolean = {
+ sparkConf.contains(key) || sparkConf.contains("spark.hadoop." + key) ||
+ (key.startsWith("hive") && sparkConf.contains("spark." + key))
+ }
+
val hiveWarehouseKey = "hive.metastore.warehouse.dir"
- val configFile = Utils.getContextOrSparkClassLoader.getResource("hive-site.xml")
+ val configFile = Utils.getContextOrSparkClassLoader.getResourceAsStream("hive-site.xml")
if (configFile != null) {
logInfo(s"loading hive config file: $configFile")
val hadoopConfTemp = new Configuration()
+ hadoopConfTemp.clear()
hadoopConfTemp.addResource(configFile)
- hadoopConfTemp.asScala.foreach { entry =>
- hadoopConf.setIfUnset(entry.getKey, entry.getValue)
+ for (entry <- hadoopConfTemp.asScala if !containsInSparkConf(entry.getKey)) {
+ hadoopConf.set(entry.getKey, entry.getValue)
}
}
// hive.metastore.warehouse.dir only stay in hadoopConf
diff --git a/sql/core/src/test/resources/hive-site.xml b/sql/core/src/test/resources/hive-site.xml
index 17297b3..4bf6189 100644
--- a/sql/core/src/test/resources/hive-site.xml
+++ b/sql/core/src/test/resources/hive-site.xml
@@ -23,4 +23,9 @@
<value>true</value>
<description>Internal marker for test.</description>
</property>
+ <property>
+ <name>hadoop.tmp.dir</name>
+ <value>/tmp/hive_one</value>
+ <description>default is /tmp/hadoop-${user.name} and will be overridden </description>
+ </property>
</configuration>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/SharedStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/SharedStateSuite.scala
index 81bf153..4d33fc1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/SharedStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/SharedStateSuite.scala
@@ -52,4 +52,15 @@ class SharedStateSuite extends SharedSparkSession {
assert(conf.isInstanceOf[Configuration])
assert(conf.asInstanceOf[Configuration].get("fs.defaultFS") == "file:///")
}
+
+ test("SPARK-33740: hadoop configs in hive-site.xml can overrides pre-existing hadoop ones") {
+ val conf = new SparkConf()
+ val hadoopConf = new Configuration()
+ SharedState.loadHiveConfFile(conf, hadoopConf)
+ assert(hadoopConf.get("hadoop.tmp.dir") === "/tmp/hive_one")
+ hadoopConf.clear()
+ SharedState.loadHiveConfFile(
+ conf.set("spark.hadoop.hadoop.tmp.dir", "noop"), hadoopConf)
+ assert(hadoopConf.get("hadoop.tmp.dir") === null)
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org