You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2023/03/17 03:29:31 UTC
[spark] branch master updated: [SPARK-42823][SQL] `spark-sql` shell supports multipart namespaces for initialization

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 2000d5f8db8 [SPARK-42823][SQL] `spark-sql` shell supports multipart namespaces for initialization
2000d5f8db8 is described below

commit 2000d5f8db838db62967a45d574728a8bf2aaf6b
Author: Kent Yao <ya...@apache.org>
AuthorDate: Thu Mar 16 20:29:16 2023 -0700

    [SPARK-42823][SQL] `spark-sql` shell supports multipart namespaces for initialization
    
    ### What changes were proposed in this pull request?
    
    Currently, we only support initializing spark-sql shell with a single-part schema, which also must be forced to the session catalog.
    
    #### case 1, specifying catalog field for v1sessioncatalog
    ```sql
    bin/spark-sql --database spark_catalog.default
    
    Exception in thread "main" org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException: Database 'spark_catalog.default' not found
    ```
    
    #### case 2, setting the default catalog to another one
    
    ```sql
    bin/spark-sql -c spark.sql.defaultCatalog=testcat -c spark.sql.catalog.testcat=org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog -c spark.sql.catalog.testcat.url='jdbc:derby:memory:testcat;create=true' -c spark.sql.catalog.testcat.driver=org.apache.derby.jdbc.AutoloadedDriver -c spark.sql.catalogImplementation=in-memory  --database SYS
    23/03/16 18:40:49 WARN ObjectStore: Failed to get database sys, returning NoSuchObjectException
    Exception in thread "main" org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException: Database 'sys' not found
    
    ```
    In this PR, we switch to use-statement to support multipart namespaces, which helps us resovle
    to catalog correctly.
    
    ### Why are the changes needed?
    
    Make spark-sql shell better support the v2 catalog framework.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes, `--database` option supports multipart namespaces and works for v2 catalogs now. And you will see this behavior on spark web ui.
    
    ### How was this patch tested?
    
    new ut
    
    Closes #40457 from yaooqinn/SPARK-42823.
    
    Authored-by: Kent Yao <ya...@apache.org>
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
 .../sql/hive/thriftserver/SparkSQLCLIDriver.scala  | 15 ++++++-------
 .../spark/sql/hive/thriftserver/CliSuite.scala     | 26 ++++++++++++++++++++++
 2 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
index 51b314ad2c1..22df4e67440 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -201,14 +201,6 @@ private[hive] object SparkSQLCLIDriver extends Logging {
       case e: UnsupportedEncodingException => exit(ERROR_PATH_NOT_FOUND)
     }
 
-    if (sessionState.database != null) {
-      SparkSQLEnv.sqlContext.sessionState.catalog.setCurrentDatabase(
-        s"${sessionState.database}")
-    }
-
-    // Execute -i init files (always in silent mode)
-    cli.processInitFiles(sessionState)
-
     // We don't propagate hive.metastore.warehouse.dir, because it might has been adjusted in
     // [[SharedState.loadHiveConfFile]] based on the user specified or default values of
     // spark.sql.warehouse.dir and hive.metastore.warehouse.dir.
@@ -216,6 +208,13 @@ private[hive] object SparkSQLCLIDriver extends Logging {
       SparkSQLEnv.sqlContext.setConf(k, v)
     }
 
+    if (sessionState.database != null) {
+      SparkSQLEnv.sqlContext.sql(s"USE ${sessionState.database}")
+    }
+
+    // Execute -i init files (always in silent mode)
+    cli.processInitFiles(sessionState)
+
     cli.printMasterAndAppId
 
     if (sessionState.execString != null) {
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
index 5413635ba47..651c6b7aafb 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
@@ -35,6 +35,7 @@ import org.apache.hadoop.hive.ql.session.SessionState
 import org.apache.spark.{ErrorMessageFormat, SparkConf, SparkContext, SparkFunSuite}
 import org.apache.spark.ProcessTestUtils.ProcessOutputCapturer
 import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
 import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.hive.HiveUtils._
 import org.apache.spark.sql.hive.client.HiveClientImpl
@@ -806,4 +807,29 @@ class CliSuite extends SparkFunSuite {
       prompt = "spark-sql (spark_42448)>")(
       "select current_database();" -> "spark_42448")
   }
+
+  test("SPARK-42823: multipart identifier support for specify database by --database option") {
+    val catalogName = "testcat"
+    val catalogImpl = s"spark.sql.catalog.$catalogName=${classOf[JDBCTableCatalog].getName}"
+    val catalogUrl =
+      s"spark.sql.catalog.$catalogName.url=jdbc:derby:memory:$catalogName;create=true"
+    val catalogDriver =
+      s"spark.sql.catalog.$catalogName.driver=org.apache.derby.jdbc.AutoloadedDriver"
+    val database = s"-database $catalogName.SYS"
+    val catalogConfigs =
+      Seq(catalogImpl, catalogDriver, catalogUrl, "spark.sql.catalogImplementation=in-memory")
+        .flatMap(Seq("--conf", _))
+    runCliWithin(
+      2.minute,
+      catalogConfigs ++ Seq("--database", s"$catalogName.SYS"))(
+      "SELECT CURRENT_CATALOG();" -> catalogName,
+      "SELECT CURRENT_SCHEMA();" -> "SYS")
+
+    runCliWithin(
+      2.minute,
+      catalogConfigs ++
+        Seq("--conf", s"spark.sql.defaultCatalog=$catalogName", "--database", "SYS"))(
+      "SELECT CURRENT_CATALOG();" -> catalogName,
+      "SELECT CURRENT_SCHEMA();" -> "SYS")
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org