You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2022/06/28 06:18:48 UTC
[GitHub] [spark] amaliujia commented on a diff in pull request #36969: [SPARK-39235][SQL] Make setCurrentDatabase compatible with 3 layer namespace

amaliujia commented on code in PR #36969:
URL: https://github.com/apache/spark/pull/36969#discussion_r908074187


##########
sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala:
##########
@@ -43,12 +43,6 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
 
   private def sessionCatalog: SessionCatalog = sparkSession.sessionState.catalog
 
-  private def requireDatabaseExists(dbName: String): Unit = {

Review Comment:
   why inline this function?



##########
sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala:
##########
@@ -58,15 +52,24 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
   /**
    * Returns the current default database in this session.
    */
-  override def currentDatabase: String = sessionCatalog.getCurrentDatabase
+  override def currentDatabase: String =
+    sparkSession.sessionState.catalogManager.currentNamespace.mkString(".")
 
   /**
    * Sets the current default database in this session.
    */
   @throws[AnalysisException]("database does not exist")
   override def setCurrentDatabase(dbName: String): Unit = {
-    requireDatabaseExists(dbName)
-    sessionCatalog.setCurrentDatabase(dbName)
+    val ident = sparkSession.sessionState.sqlParser.parseMultipartIdentifier(dbName)

Review Comment:
   No need to parse. Just use the input as DB name directly.
   
   Basically if we say this function should respect `currentCatalog`, then we don't expect users to pass in catalog name. What users should provide are only DB names and we don't treat the input special. 



##########
sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala:
##########
@@ -58,15 +52,24 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
   /**
    * Returns the current default database in this session.
    */
-  override def currentDatabase: String = sessionCatalog.getCurrentDatabase
+  override def currentDatabase: String =
+    sparkSession.sessionState.catalogManager.currentNamespace.mkString(".")
 
   /**
    * Sets the current default database in this session.
    */
   @throws[AnalysisException]("database does not exist")
   override def setCurrentDatabase(dbName: String): Unit = {
-    requireDatabaseExists(dbName)
-    sessionCatalog.setCurrentDatabase(dbName)
+    val ident = sparkSession.sessionState.sqlParser.parseMultipartIdentifier(dbName)
+    // ident may or may not include catalog prefix
+    // if it includes the current catalog, strip it. otherwise consider it all to be namespace
+    val catalog = currentCatalog()
+    val ns = ident.headOption match {
+      // TODO case-sensitivity?

Review Comment:
   I think we can be case insensitive as Hive metastore behaves this?
   
   Though I guess we don't need to check about the catalog name.



##########
sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala:
##########
@@ -749,4 +749,51 @@ class CatalogSuite extends SharedSparkSession with AnalysisTest with BeforeAndAf
     assert(spark.catalog.currentCatalog().equals("spark_catalog"))
     assert(spark.catalog.listCatalogs().collect().map(c => c.name).toSet == Set("testcat"))
   }
+
+  test("three layer namespace compatibility - current database") {
+    sql("CREATE NAMESPACE testcat.my_db")
+    // current catalog is spark_catalog, setting current databse to one in testcat will fail
+    val hiveErr = intercept[AnalysisException] {
+      spark.catalog.setCurrentDatabase("testcat.my_db")
+    }.getMessage
+    assert(hiveErr.contains("Namespace 'testcat.my_db' not found"))
+
+    // now switch catalogs and try again
+    // TODO? sql("USE CATALOG testcat")

Review Comment:
   remove to do?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org