You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2021/02/15 17:33:22 UTC
[spark] branch branch-3.1 updated: [SPARK-34431][CORE] Only load
`hive-site.xml` once
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new 21754a1 [SPARK-34431][CORE] Only load `hive-site.xml` once
21754a1 is described below
commit 21754a1186731e3f8cb32bf183ffbb8d3db6f607
Author: herman <he...@databricks.com>
AuthorDate: Mon Feb 15 09:31:51 2021 -0800
[SPARK-34431][CORE] Only load `hive-site.xml` once
### What changes were proposed in this pull request?
Lazily load Hive's configuration properties from `hive-site.xml` only once.
### Why are the changes needed?
It is expensive to parse the same file over and over.
### Does this PR introduce _any_ user-facing change?
Should not. The changes can improve performance slightly.
### How was this patch tested?
By existing test suites such as `SparkContextSuite`.
Closes #31556 from MaxGekk/load-hive-site-once.
Authored-by: herman <he...@databricks.com>
Signed-off-by: Dongjoon Hyun <dh...@apple.com>
(cherry picked from commit 4fd3247bca400f31b0175813df811352b906acbf)
Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
.../scala/org/apache/spark/deploy/SparkHadoopUtil.scala | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
index d872c3b..c719aef 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -450,7 +450,7 @@ private[spark] object SparkHadoopUtil extends Logging {
hadoopConf.set("fs.s3a.session.token", sessionToken)
}
}
- loadHiveConfFile(conf, hadoopConf)
+ appendHiveConfigs(hadoopConf)
appendSparkHadoopConfigs(conf, hadoopConf)
appendSparkHiveConfigs(conf, hadoopConf)
val bufferSize = conf.get(BUFFER_SIZE).toString
@@ -458,11 +458,20 @@ private[spark] object SparkHadoopUtil extends Logging {
}
}
- private def loadHiveConfFile(conf: SparkConf, hadoopConf: Configuration): Unit = {
+ private lazy val hiveConfKeys = {
val configFile = Utils.getContextOrSparkClassLoader.getResource("hive-site.xml")
if (configFile != null) {
- logInfo(s"Loading hive config file: $configFile")
- hadoopConf.addResource(configFile)
+ val conf = new Configuration(false)
+ conf.addResource(configFile)
+ conf.iterator().asScala.toSeq
+ } else {
+ Nil
+ }
+ }
+
+ private def appendHiveConfigs(hadoopConf: Configuration): Unit = {
+ hiveConfKeys.foreach { kv =>
+ hadoopConf.set(kv.getKey, kv.getValue)
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org