You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2020/12/11 09:53:52 UTC

[spark] branch branch-3.0 updated: [SPARK-33740][SQL][3.0] hadoop configs in hive-site.xml can overrides pre-existing hadoop ones

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 9439e11  [SPARK-33740][SQL][3.0] hadoop configs in hive-site.xml can overrides pre-existing hadoop ones
9439e11 is described below

commit 9439e11e65ed808b2dd990a27cacd44b2a6f0009
Author: Kent Yao <ya...@hotmail.com>
AuthorDate: Fri Dec 11 01:49:13 2020 -0800

    [SPARK-33740][SQL][3.0] hadoop configs in hive-site.xml can overrides pre-existing hadoop ones
    
    Backport  #30709 to 3.0
    
    ### What changes were proposed in this pull request?
    
     org.apache.hadoop.conf.Configuration#setIfUnset will ignore those with defaults too
    
    ### Why are the changes needed?
    
        fix a regression
    
    ### Does this PR introduce _any_ user-facing change?
    
    no
    
    ### How was this patch tested?
    
    new tests
    
    Closes #30720 from yaooqinn/SPARK-33740-30.
    
    Authored-by: Kent Yao <ya...@hotmail.com>
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
 .../scala/org/apache/spark/sql/internal/SharedState.scala    | 12 +++++++++---
 sql/core/src/test/resources/hive-site.xml                    |  5 +++++
 .../org/apache/spark/sql/internal/SharedStateSuite.scala     | 11 +++++++++++
 3 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
index ce4385d..1922a58 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
@@ -228,14 +228,20 @@ object SharedState extends Logging {
   def loadHiveConfFile(
       sparkConf: SparkConf,
       hadoopConf: Configuration): Unit = {
+    def containsInSparkConf(key: String): Boolean = {
+      sparkConf.contains(key) || sparkConf.contains("spark.hadoop." + key) ||
+        (key.startsWith("hive") && sparkConf.contains("spark." + key))
+    }
+
     val hiveWarehouseKey = "hive.metastore.warehouse.dir"
-    val configFile = Utils.getContextOrSparkClassLoader.getResource("hive-site.xml")
+    val configFile = Utils.getContextOrSparkClassLoader.getResourceAsStream("hive-site.xml")
     if (configFile != null) {
       logInfo(s"loading hive config file: $configFile")
       val hadoopConfTemp = new Configuration()
+      hadoopConfTemp.clear()
       hadoopConfTemp.addResource(configFile)
-      hadoopConfTemp.asScala.foreach { entry =>
-        hadoopConf.setIfUnset(entry.getKey, entry.getValue)
+      for (entry <- hadoopConfTemp.asScala if !containsInSparkConf(entry.getKey)) {
+        hadoopConf.set(entry.getKey, entry.getValue)
       }
     }
     // hive.metastore.warehouse.dir only stay in hadoopConf
diff --git a/sql/core/src/test/resources/hive-site.xml b/sql/core/src/test/resources/hive-site.xml
index 17297b3..4bf6189 100644
--- a/sql/core/src/test/resources/hive-site.xml
+++ b/sql/core/src/test/resources/hive-site.xml
@@ -23,4 +23,9 @@
       <value>true</value>
       <description>Internal marker for test.</description>
   </property>
+  <property>
+    <name>hadoop.tmp.dir</name>
+    <value>/tmp/hive_one</value>
+    <description>default is /tmp/hadoop-${user.name} and will be overridden </description>
+  </property>
 </configuration>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/SharedStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/SharedStateSuite.scala
index 81bf153..4d33fc1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/SharedStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/SharedStateSuite.scala
@@ -52,4 +52,15 @@ class SharedStateSuite extends SharedSparkSession {
     assert(conf.isInstanceOf[Configuration])
     assert(conf.asInstanceOf[Configuration].get("fs.defaultFS") == "file:///")
   }
+
+  test("SPARK-33740: hadoop configs in hive-site.xml can overrides pre-existing hadoop ones") {
+    val conf = new SparkConf()
+    val hadoopConf = new Configuration()
+    SharedState.loadHiveConfFile(conf, hadoopConf)
+    assert(hadoopConf.get("hadoop.tmp.dir") === "/tmp/hive_one")
+    hadoopConf.clear()
+    SharedState.loadHiveConfFile(
+      conf.set("spark.hadoop.hadoop.tmp.dir", "noop"), hadoopConf)
+    assert(hadoopConf.get("hadoop.tmp.dir") === null)
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org