You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2022/08/08 05:56:43 UTC

[GitHub] [spark] zhengruifeng commented on a diff in pull request #37287: [SPARK-39912][SPARK-39828][SQL] Refine CatalogImpl

zhengruifeng commented on code in PR #37287:
URL: https://github.com/apache/spark/pull/37287#discussion_r939847966


##########
sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala:
##########
@@ -197,48 +212,37 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
   override def listFunctions(dbName: String): Dataset[Function] = {
     // `dbName` could be either a single database name (behavior in Spark 3.3 and prior) or
     // a qualified namespace with catalog name. We assume it's a single database name
-    // and check if we can find the dbName in sessionCatalog. If so we listFunctions under
-    // that database. Otherwise we try 3-part name parsing and locate the database.
-    if (sessionCatalog.databaseExists(dbName)) {
-      val functions = sessionCatalog.listFunctions(dbName)
-        .map { case (functIdent, _) => makeFunction(functIdent) }
-      CatalogImpl.makeDataset(functions, sparkSession)
+    // and check if we can find it in the sessionCatalog. If so we list functions under
+    // that database. Otherwise we will resolve the catalog/namespace and list functions there.
+    val namespace = if (sessionCatalog.databaseExists(dbName)) {
+      Seq(CatalogManager.SESSION_CATALOG_NAME, dbName)
     } else {
-      val ident = sparkSession.sessionState.sqlParser.parseMultipartIdentifier(dbName)
-      val functions = collection.mutable.ArrayBuilder.make[Function]
-
-      // built-in functions
-      val plan0 = ShowFunctions(UnresolvedNamespace(ident),
-        userScope = false, systemScope = true, None)
-      sparkSession.sessionState.executePlan(plan0).toRdd.collect().foreach { row =>
-        // `lookupBuiltinOrTempFunction` and `lookupBuiltinOrTempTableFunction` in Analyzer
-        // require the input identifier only contains the function name, otherwise, built-in
-        // functions will be skipped.
-        val name = row.getString(0)
-        functions += makeFunction(Seq(name))
-      }
-
-      // user functions
-      val plan1 = ShowFunctions(UnresolvedNamespace(ident),
-        userScope = true, systemScope = false, None)
-      sparkSession.sessionState.executePlan(plan1).toRdd.collect().foreach { row =>
-        // `row.getString(0)` may contain dbName like `db.function`, so extract the function name.
-        val name = row.getString(0).split("\\.").last
-        functions += makeFunction(ident :+ name)
-      }
+      parseIdent(dbName)
+    }
+    val functions = collection.mutable.ArrayBuilder.make[Function]
+
+    // TODO: The SHOW FUNCTIONS should tell us the function type (built-in, temp, persistent) and
+    //       we can simply the code below quite a bit. For now we need to list built-in functions
+    //       separately as several built-in function names are not parsable, such as `!=`.
+
+    // List built-in functions. We don't need to specify the namespace here as SHOW FUNCTIONS with
+    // only system scope does not need to know the catalog and namespace.
+    val plan0 = ShowFunctions(UnresolvedNamespace(Nil), userScope = false, systemScope = true, None)
+    sparkSession.sessionState.executePlan(plan0).toRdd.collect().foreach { row =>
+      // Built-in functions do not belong to any catalog or namespace. We can only look it up with
+      // a single part name.
+      val name = row.getString(0)
+      functions += makeFunction(Seq(name))
+    }
 
-      CatalogImpl.makeDataset(functions.result(), sparkSession)
+    // List user functions.
+    val plan1 = ShowFunctions(UnresolvedNamespace(namespace),
+      userScope = true, systemScope = false, None)
+    sparkSession.sessionState.executePlan(plan1).toRdd.collect().foreach { row =>

Review Comment:
   sounds good



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org