You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2021/02/08 13:51:45 UTC

[spark] branch branch-3.0 updated: [SPARK-33438][SQL] Eagerly init objects with defined SQL Confs for command `set -v`

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new f611788  [SPARK-33438][SQL] Eagerly init objects with defined SQL Confs for command `set -v`
f611788 is described below

commit f611788e5a18e4bf56df7d3e3b0263384ec10bff
Author: Linhong Liu <li...@databricks.com>
AuthorDate: Mon Feb 8 22:48:28 2021 +0900

    [SPARK-33438][SQL] Eagerly init objects with defined SQL Confs for command `set -v`
    
    ### What changes were proposed in this pull request?
    In Spark, `set -v` is defined as "Queries all properties that are defined in the SQLConf of the sparkSession".
    But there are other external modules that also define properties and register them to SQLConf. In this case,
    it can't be displayed by `set -v` until the conf object is initiated (i.e. calling the object at least once).
    
    In this PR, I propose to eagerly initiate all the objects registered to SQLConf, so that `set -v` will always output
    the completed properties.
    
    ### Why are the changes needed?
    Improve the `set -v` command to produces completed and  deterministic results
    
    ### Does this PR introduce _any_ user-facing change?
    `set -v` command will dump more configs
    
    ### How was this patch tested?
    existing tests
    
    Closes #30363 from linhongliu-db/set-v.
    
    Authored-by: Linhong Liu <li...@databricks.com>
    Signed-off-by: HyukjinKwon <gu...@apache.org>
    (cherry picked from commit 037bfb2dbcb73cfbd73f0fd9abe0b38789a182a2)
    Signed-off-by: HyukjinKwon <gu...@apache.org>
---
 .../org/apache/spark/sql/internal/SQLConf.scala    | 24 ++++++++++++++++++++++
 .../spark/sql/api/python/PythonSQLUtils.scala      | 15 +-------------
 2 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index f55546f..a2aa6f8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -25,6 +25,7 @@ import java.util.zip.Deflater
 import scala.collection.JavaConverters._
 import scala.collection.immutable
 import scala.util.Try
+import scala.util.control.NonFatal
 import scala.util.matching.Regex
 
 import org.apache.hadoop.fs.Path
@@ -35,6 +36,7 @@ import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.{IGNORE_MISSING_FILES => SPARK_IGNORE_MISSING_FILES}
 import org.apache.spark.network.util.ByteUnit
 import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.ScalaReflection
 import org.apache.spark.sql.catalyst.analysis.{HintErrorLogger, Resolver}
 import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode
 import org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator
@@ -3292,6 +3294,27 @@ class SQLConf extends Serializable with Logging {
     }
   }
 
+  private var definedConfsLoaded = false
+  /**
+   * Init [[StaticSQLConf]] and [[org.apache.spark.sql.hive.HiveUtils]] so that all the defined
+   * SQL Configurations will be registered to SQLConf
+   */
+  private def loadDefinedConfs(): Unit = {
+    if (!definedConfsLoaded) {
+      definedConfsLoaded = true
+      // Force to register static SQL configurations
+      StaticSQLConf
+      try {
+        // Force to register SQL configurations from Hive module
+        val symbol = ScalaReflection.mirror.staticModule("org.apache.spark.sql.hive.HiveUtils")
+        ScalaReflection.mirror.reflectModule(symbol).instance
+      } catch {
+        case NonFatal(e) =>
+          logWarning("SQL configurations from Hive module is not loaded", e)
+      }
+    }
+  }
+
   /**
    * Return all the configuration properties that have been set (i.e. not the default).
    * This creates a new copy of the config properties in the form of a Map.
@@ -3304,6 +3327,7 @@ class SQLConf extends Serializable with Logging {
    * definition contains key, defaultValue and doc.
    */
   def getAllDefinedConfs: Seq[(String, String, String, String)] = sqlConfEntries.synchronized {
+    loadDefinedConfs()
     sqlConfEntries.values.asScala.filter(_.isPublic).map { entry =>
       val displayValue = Option(getConfString(entry.key, null)).getOrElse(entry.defaultValueString)
       (entry.key, displayValue, entry.doc, entry.version)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala
index 3825460..2cd26e2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala
@@ -20,20 +20,17 @@ package org.apache.spark.sql.api.python
 import java.io.InputStream
 import java.nio.channels.Channels
 
-import scala.util.control.NonFatal
-
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.api.python.PythonRDDServer
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{DataFrame, SQLContext}
-import org.apache.spark.sql.catalyst.ScalaReflection
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
 import org.apache.spark.sql.catalyst.expressions.ExpressionInfo
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.execution.{ExplainMode, QueryExecution}
 import org.apache.spark.sql.execution.arrow.ArrowConverters
-import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.DataType
 
 private[sql] object PythonSQLUtils extends Logging {
@@ -46,16 +43,6 @@ private[sql] object PythonSQLUtils extends Logging {
 
   private def listAllSQLConfigs(): Seq[(String, String, String, String)] = {
     val conf = new SQLConf()
-    // Force to build static SQL configurations
-    StaticSQLConf
-    // Force to build SQL configurations from Hive module
-    try {
-      val symbol = ScalaReflection.mirror.staticModule("org.apache.spark.sql.hive.HiveUtils")
-      ScalaReflection.mirror.reflectModule(symbol).instance
-    } catch {
-      case NonFatal(e) =>
-        logWarning("Cannot generated sql configurations from hive module", e)
-    }
     conf.getAllDefinedConfs
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org