You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2023/03/17 03:29:31 UTC
[spark] branch master updated: [SPARK-42823][SQL] `spark-sql` shell supports multipart namespaces for initialization
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 2000d5f8db8 [SPARK-42823][SQL] `spark-sql` shell supports multipart namespaces for initialization
2000d5f8db8 is described below
commit 2000d5f8db838db62967a45d574728a8bf2aaf6b
Author: Kent Yao <ya...@apache.org>
AuthorDate: Thu Mar 16 20:29:16 2023 -0700
[SPARK-42823][SQL] `spark-sql` shell supports multipart namespaces for initialization
### What changes were proposed in this pull request?
Currently, we only support initializing spark-sql shell with a single-part schema, which also must be forced to the session catalog.
#### case 1, specifying catalog field for v1sessioncatalog
```sql
bin/spark-sql --database spark_catalog.default
Exception in thread "main" org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException: Database 'spark_catalog.default' not found
```
#### case 2, setting the default catalog to another one
```sql
bin/spark-sql -c spark.sql.defaultCatalog=testcat -c spark.sql.catalog.testcat=org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog -c spark.sql.catalog.testcat.url='jdbc:derby:memory:testcat;create=true' -c spark.sql.catalog.testcat.driver=org.apache.derby.jdbc.AutoloadedDriver -c spark.sql.catalogImplementation=in-memory --database SYS
23/03/16 18:40:49 WARN ObjectStore: Failed to get database sys, returning NoSuchObjectException
Exception in thread "main" org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException: Database 'sys' not found
```
In this PR, we switch to use-statement to support multipart namespaces, which helps us resovle
to catalog correctly.
### Why are the changes needed?
Make spark-sql shell better support the v2 catalog framework.
### Does this PR introduce _any_ user-facing change?
Yes, `--database` option supports multipart namespaces and works for v2 catalogs now. And you will see this behavior on spark web ui.
### How was this patch tested?
new ut
Closes #40457 from yaooqinn/SPARK-42823.
Authored-by: Kent Yao <ya...@apache.org>
Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
.../sql/hive/thriftserver/SparkSQLCLIDriver.scala | 15 ++++++-------
.../spark/sql/hive/thriftserver/CliSuite.scala | 26 ++++++++++++++++++++++
2 files changed, 33 insertions(+), 8 deletions(-)
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
index 51b314ad2c1..22df4e67440 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -201,14 +201,6 @@ private[hive] object SparkSQLCLIDriver extends Logging {
case e: UnsupportedEncodingException => exit(ERROR_PATH_NOT_FOUND)
}
- if (sessionState.database != null) {
- SparkSQLEnv.sqlContext.sessionState.catalog.setCurrentDatabase(
- s"${sessionState.database}")
- }
-
- // Execute -i init files (always in silent mode)
- cli.processInitFiles(sessionState)
-
// We don't propagate hive.metastore.warehouse.dir, because it might has been adjusted in
// [[SharedState.loadHiveConfFile]] based on the user specified or default values of
// spark.sql.warehouse.dir and hive.metastore.warehouse.dir.
@@ -216,6 +208,13 @@ private[hive] object SparkSQLCLIDriver extends Logging {
SparkSQLEnv.sqlContext.setConf(k, v)
}
+ if (sessionState.database != null) {
+ SparkSQLEnv.sqlContext.sql(s"USE ${sessionState.database}")
+ }
+
+ // Execute -i init files (always in silent mode)
+ cli.processInitFiles(sessionState)
+
cli.printMasterAndAppId
if (sessionState.execString != null) {
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
index 5413635ba47..651c6b7aafb 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
@@ -35,6 +35,7 @@ import org.apache.hadoop.hive.ql.session.SessionState
import org.apache.spark.{ErrorMessageFormat, SparkConf, SparkContext, SparkFunSuite}
import org.apache.spark.ProcessTestUtils.ProcessOutputCapturer
import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
import org.apache.spark.sql.hive.HiveUtils
import org.apache.spark.sql.hive.HiveUtils._
import org.apache.spark.sql.hive.client.HiveClientImpl
@@ -806,4 +807,29 @@ class CliSuite extends SparkFunSuite {
prompt = "spark-sql (spark_42448)>")(
"select current_database();" -> "spark_42448")
}
+
+ test("SPARK-42823: multipart identifier support for specify database by --database option") {
+ val catalogName = "testcat"
+ val catalogImpl = s"spark.sql.catalog.$catalogName=${classOf[JDBCTableCatalog].getName}"
+ val catalogUrl =
+ s"spark.sql.catalog.$catalogName.url=jdbc:derby:memory:$catalogName;create=true"
+ val catalogDriver =
+ s"spark.sql.catalog.$catalogName.driver=org.apache.derby.jdbc.AutoloadedDriver"
+ val database = s"-database $catalogName.SYS"
+ val catalogConfigs =
+ Seq(catalogImpl, catalogDriver, catalogUrl, "spark.sql.catalogImplementation=in-memory")
+ .flatMap(Seq("--conf", _))
+ runCliWithin(
+ 2.minute,
+ catalogConfigs ++ Seq("--database", s"$catalogName.SYS"))(
+ "SELECT CURRENT_CATALOG();" -> catalogName,
+ "SELECT CURRENT_SCHEMA();" -> "SYS")
+
+ runCliWithin(
+ 2.minute,
+ catalogConfigs ++
+ Seq("--conf", s"spark.sql.defaultCatalog=$catalogName", "--database", "SYS"))(
+ "SELECT CURRENT_CATALOG();" -> catalogName,
+ "SELECT CURRENT_SCHEMA();" -> "SYS")
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org