You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2021/02/15 17:33:41 UTC

[spark] branch branch-3.0 updated: [SPARK-34431][CORE] Only load `hive-site.xml` once

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new d326016  [SPARK-34431][CORE] Only load `hive-site.xml` once
d326016 is described below

commit d326016cb152045fd378867b10101a9bbfb73c5f
Author: herman <he...@databricks.com>
AuthorDate: Mon Feb 15 09:31:51 2021 -0800

    [SPARK-34431][CORE] Only load `hive-site.xml` once
    
    ### What changes were proposed in this pull request?
    Lazily load Hive's configuration properties from `hive-site.xml` only once.
    
    ### Why are the changes needed?
    It is expensive to parse the same file over and over.
    
    ### Does this PR introduce _any_ user-facing change?
    Should not. The changes can improve performance slightly.
    
    ### How was this patch tested?
    By existing test suites such as `SparkContextSuite`.
    
    Closes #31556 from MaxGekk/load-hive-site-once.
    
    Authored-by: herman <he...@databricks.com>
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
    (cherry picked from commit 4fd3247bca400f31b0175813df811352b906acbf)
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
 .../scala/org/apache/spark/deploy/SparkHadoopUtil.scala | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
index d872c3b..c719aef 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -450,7 +450,7 @@ private[spark] object SparkHadoopUtil extends Logging {
           hadoopConf.set("fs.s3a.session.token", sessionToken)
         }
       }
-      loadHiveConfFile(conf, hadoopConf)
+      appendHiveConfigs(hadoopConf)
       appendSparkHadoopConfigs(conf, hadoopConf)
       appendSparkHiveConfigs(conf, hadoopConf)
       val bufferSize = conf.get(BUFFER_SIZE).toString
@@ -458,11 +458,20 @@ private[spark] object SparkHadoopUtil extends Logging {
     }
   }
 
-  private def loadHiveConfFile(conf: SparkConf, hadoopConf: Configuration): Unit = {
+  private lazy val hiveConfKeys = {
     val configFile = Utils.getContextOrSparkClassLoader.getResource("hive-site.xml")
     if (configFile != null) {
-      logInfo(s"Loading hive config file: $configFile")
-      hadoopConf.addResource(configFile)
+      val conf = new Configuration(false)
+      conf.addResource(configFile)
+      conf.iterator().asScala.toSeq
+    } else {
+      Nil
+    }
+  }
+
+  private def appendHiveConfigs(hadoopConf: Configuration): Unit = {
+    hiveConfKeys.foreach { kv =>
+      hadoopConf.set(kv.getKey, kv.getValue)
     }
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org