You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2020/12/31 21:21:01 UTC

[spark] branch master updated: [SPARK-33944][SQL] Incorrect logging for warehouse keys in SharedState options

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new ed9f728  [SPARK-33944][SQL] Incorrect logging for warehouse keys in SharedState options
ed9f728 is described below

commit ed9f7288019be4803cdb1ee570ca21ad76af371a
Author: Kent Yao <ya...@apache.org>
AuthorDate: Thu Dec 31 13:20:31 2020 -0800

    [SPARK-33944][SQL] Incorrect logging for warehouse keys in SharedState options
    
    ### What changes were proposed in this pull request?
    
    While using SparkSession's initial options to generate the sharable Spark conf and Hadoop conf in ShardState, we shall put the log in the codeblock that the warehouse keys being handled.
    
    ### Why are the changes needed?
    
    bugfix, rm ambiguous log when setting spark.sql.warehouse.dir in SparkSession.builder.config, but only warn setting hive.metastore.warehouse.dir
    ### Does this PR introduce _any_ user-facing change?
    
    no
    ### How was this patch tested?
    
     new tests
    
    Closes #30978 from yaooqinn/SPARK-33944.
    
    Authored-by: Kent Yao <ya...@apache.org>
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
 .../apache/spark/sql/internal/SharedState.scala    | 16 ++++++++-----
 .../spark/sql/SparkSessionBuilderSuite.scala       | 26 ++++++++++++++++++++++
 2 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
index 6018afb..cc21def 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
@@ -56,17 +56,15 @@ private[sql] class SharedState(
   private[sql] val (conf, hadoopConf) = {
     // Load hive-site.xml into hadoopConf and determine the warehouse path which will be set into
     // both spark conf and hadoop conf avoiding be affected by any SparkSession level options
-    SharedState.loadHiveConfFile(
+    val initialConfigsWithoutWarehouse = SharedState.loadHiveConfFile(
       sparkContext.conf, sparkContext.hadoopConfiguration, initialConfigs)
+
     val confClone = sparkContext.conf.clone()
     val hadoopConfClone = new Configuration(sparkContext.hadoopConfiguration)
     // If `SparkSession` is instantiated using an existing `SparkContext` instance and no existing
     // `SharedState`, all `SparkSession` level configurations have higher priority to generate a
     // `SharedState` instance. This will be done only once then shared across `SparkSession`s
-    initialConfigs.foreach {
-      case (k, _)  if k == "hive.metastore.warehouse.dir" || k == WAREHOUSE_PATH.key =>
-        logWarning(s"Not allowing to set ${WAREHOUSE_PATH.key} or hive.metastore.warehouse.dir " +
-          s"in SparkSession's options, it should be set statically for cross-session usages")
+    initialConfigsWithoutWarehouse.foreach {
       case (k, v) if SQLConf.staticConfKeys.contains(k) =>
         logDebug(s"Applying static initial session options to SparkConf: $k -> $v")
         confClone.set(k, v)
@@ -228,7 +226,8 @@ object SharedState extends Logging {
   def loadHiveConfFile(
       sparkConf: SparkConf,
       hadoopConf: Configuration,
-      initialConfigs: scala.collection.Map[String, String] = Map.empty): Unit = {
+      initialConfigs: scala.collection.Map[String, String] = Map.empty)
+    : scala.collection.Map[String, String] = {
 
     def containsInSparkConf(key: String): Boolean = {
       sparkConf.contains(key) || sparkConf.contains("spark.hadoop." + key) ||
@@ -248,6 +247,10 @@ object SharedState extends Logging {
     }
     val sparkWarehouseOption =
       initialConfigs.get(WAREHOUSE_PATH.key).orElse(sparkConf.getOption(WAREHOUSE_PATH.key))
+    if (initialConfigs.contains(hiveWarehouseKey)) {
+      logWarning(s"Not allowing to set $hiveWarehouseKey in SparkSession's options, please use " +
+        s"${WAREHOUSE_PATH.key} to set statically for cross-session usages")
+    }
     // hive.metastore.warehouse.dir only stay in hadoopConf
     sparkConf.remove(hiveWarehouseKey)
     // Set the Hive metastore warehouse path to the one we use
@@ -272,5 +275,6 @@ object SharedState extends Logging {
       sparkWarehouseDir
     }
     logInfo(s"Warehouse path is '$warehousePath'.")
+    initialConfigs -- Seq(WAREHOUSE_PATH.key, hiveWarehouseKey)
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
index e539768..1f16bb6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
@@ -386,4 +386,30 @@ class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach {
     assert(spark2.conf.get(custom) === "c2")
 
   }
+
+  test("SPARK-33944: warning setting hive.metastore.warehouse.dir using session options") {
+    val msg = "Not allowing to set hive.metastore.warehouse.dir in SparkSession's options"
+    val logAppender = new LogAppender(msg)
+    withLogAppender(logAppender) {
+      SparkSession.builder()
+        .master("local")
+        .config("hive.metastore.warehouse.dir", "any")
+        .getOrCreate()
+        .sharedState
+    }
+    assert(logAppender.loggingEvents.exists(_.getRenderedMessage.contains(msg)))
+  }
+
+  test("SPARK-33944: no warning setting spark.sql.warehouse.dir using session options") {
+    val msg = "Not allowing to set hive.metastore.warehouse.dir in SparkSession's options"
+    val logAppender = new LogAppender(msg)
+    withLogAppender(logAppender) {
+      SparkSession.builder()
+        .master("local")
+        .config("spark.sql.warehouse.dir", "any")
+        .getOrCreate()
+        .sharedState
+    }
+    assert(!logAppender.loggingEvents.exists(_.getRenderedMessage.contains(msg)))
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org