You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2021/10/13 19:14:25 UTC

[spark] branch master updated: [SPARK-36982] Migrate SHOW NAMESPACES to use V2 command by default

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 5ac76d9  [SPARK-36982] Migrate SHOW NAMESPACES to use V2 command by default
5ac76d9 is described below

commit 5ac76d9cb45d58eeb4253d50e90060a68c3e87cb
Author: Terry Kim <yu...@gmail.com>
AuthorDate: Wed Oct 13 22:12:56 2021 +0300

    [SPARK-36982] Migrate SHOW NAMESPACES to use V2 command by default
    
    ### What changes were proposed in this pull request?
    
    This PR proposes to use V2 commands as default as outlined in [SPARK-36588](https://issues.apache.org/jira/browse/SPARK-36588), and this PR migrates `SHOW NAMESPACES` to use v2 command by default. (Technically speaking, there is no v1 command for `SHOW NAMESPACES/DATABASES`, but this PR removes an extra check in `ResolveSessionCatalog` to handle session catalog.)
    
    ### Why are the changes needed?
    
    It's been a while since we introduced the v2 commands,  and it seems reasonable to use v2 commands by default even for the session catalog, with a legacy config to fall back to the v1 commands.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No, the user can use v1 command by setting `spark.sql.legacy.useV1Command` to `true`.
    
    ### How was this patch tested?
    
    Added unit tests.
    
    Closes #34255 from imback82/migrate_show_namespaces.
    
    Authored-by: Terry Kim <yu...@gmail.com>
    Signed-off-by: Max Gekk <ma...@gmail.com>
---
 .../sql/catalyst/analysis/KeepLegacyOutputs.scala  |  5 +++-
 .../org/apache/spark/sql/internal/SQLConf.scala    |  2 +-
 .../catalyst/analysis/ResolveSessionCatalog.scala  | 16 +---------
 .../execution/command/DDLCommandTestUtils.scala    |  7 +++--
 .../command/ShowNamespacesSuiteBase.scala          |  6 ++++
 .../execution/command/TestsV1AndV2Commands.scala}  | 35 ++++++++++++----------
 .../execution/command/v1/CommandSuiteBase.scala    |  3 +-
 .../execution/command/v1/ShowNamespacesSuite.scala |  8 ++---
 .../sql/execution/command/v1/ShowTablesSuite.scala | 27 ++---------------
 .../execution/command/v2/CommandSuiteBase.scala    |  3 +-
 .../hive/execution/command/CommandSuiteBase.scala  |  3 +-
 .../execution/command/ShowNamespacesSuite.scala    |  2 ++
 .../hive/execution/command/ShowTablesSuite.scala   |  2 +-
 13 files changed, 49 insertions(+), 70 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/KeepLegacyOutputs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/KeepLegacyOutputs.scala
index baee2bd..3af5544 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/KeepLegacyOutputs.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/KeepLegacyOutputs.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
-import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, ShowTables}
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, ShowNamespaces, ShowTables}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.TreePattern.COMMAND
 import org.apache.spark.sql.internal.SQLConf
@@ -36,6 +36,9 @@ object KeepLegacyOutputs extends Rule[LogicalPlan] {
           assert(s.output.length == 3)
           val newOutput = s.output.head.withName("database") +: s.output.tail
           s.copy(output = newOutput)
+        case s: ShowNamespaces =>
+          assert(s.output.length == 1)
+          s.copy(output = Seq(s.output.head.withName("databaseName")))
       }
     }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 1dc4a8e..f0e7731 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -3364,7 +3364,7 @@ object SQLConf {
     buildConf("spark.sql.legacy.keepCommandOutputSchema")
       .internal()
       .doc("When true, Spark will keep the output schema of commands such as SHOW DATABASES " +
-        "unchanged, for v1 catalog and/or table.")
+        "unchanged.")
       .version("3.0.2")
       .booleanConf
       .createWithDefault(false)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index 2252225..c77be4e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -121,14 +121,6 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
     case SetNamespaceLocation(DatabaseInSessionCatalog(db), location) =>
       AlterDatabaseSetLocationCommand(db, location)
 
-    case s @ ShowNamespaces(ResolvedNamespace(cata, _), _, output) if isSessionCatalog(cata) =>
-      if (conf.getConf(SQLConf.LEGACY_KEEP_COMMAND_OUTPUT_SCHEMA)) {
-        assert(output.length == 1)
-        s.copy(output = Seq(output.head.withName("databaseName")))
-      } else {
-        s
-      }
-
     case RenameTable(ResolvedV1TableOrViewIdentifier(oldName), newName, isView) =>
       AlterTableRenameCommand(oldName.asTableIdentifier, newName.asTableIdentifier, isView)
 
@@ -270,13 +262,7 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
       DropDatabaseCommand(db, d.ifExists, d.cascade)
 
     case ShowTables(DatabaseInSessionCatalog(db), pattern, output) if conf.useV1Command =>
-      val newOutput = if (conf.getConf(SQLConf.LEGACY_KEEP_COMMAND_OUTPUT_SCHEMA)) {
-        assert(output.length == 3)
-        output.head.withName("database") +: output.tail
-      } else {
-        output
-      }
-      ShowTablesCommand(Some(db), pattern, newOutput)
+      ShowTablesCommand(Some(db), pattern, output)
 
     case ShowTableExtended(
         DatabaseInSessionCatalog(db),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala
index f9e26f8..af3e92a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala
@@ -39,7 +39,9 @@ import org.apache.spark.sql.test.SQLTestUtils
  */
 trait DDLCommandTestUtils extends SQLTestUtils {
   // The version of the catalog under testing such as "V1", "V2", "Hive V1".
-  protected def version: String
+  protected def catalogVersion: String
+  // The version of the SQL command under testing such as "V1", "V2".
+  protected def commandVersion: String
   // Name of the command as SQL statement, for instance "SHOW PARTITIONS"
   protected def command: String
   // The catalog name which can be used in SQL statements under testing
@@ -51,7 +53,8 @@ trait DDLCommandTestUtils extends SQLTestUtils {
   // the failed test in logs belongs to.
   override def test(testName: String, testTags: Tag*)(testFun: => Any)
     (implicit pos: Position): Unit = {
-    super.test(s"$command $version: " + testName, testTags: _*)(testFun)
+    val testNamePrefix = s"$command using $catalogVersion catalog $commandVersion command"
+    super.test(s"$testNamePrefix: $testName", testTags: _*)(testFun)
   }
 
   protected def withNamespaceAndTable(ns: String, tableName: String, cat: String = catalog)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowNamespacesSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowNamespacesSuiteBase.scala
index 790489e..1b37444 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowNamespacesSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowNamespacesSuiteBase.scala
@@ -128,4 +128,10 @@ trait ShowNamespacesSuiteBase extends QueryTest with DDLCommandTestUtils {
       spark.sessionState.catalogManager.reset()
     }
   }
+
+  test("SPARK-34359: keep the legacy output schema") {
+    withSQLConf(SQLConf.LEGACY_KEEP_COMMAND_OUTPUT_SCHEMA.key -> "true") {
+      assert(sql("SHOW NAMESPACES").schema.fieldNames.toSeq == Seq("databaseName"))
+    }
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/KeepLegacyOutputs.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/TestsV1AndV2Commands.scala
similarity index 52%
copy from sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/KeepLegacyOutputs.scala
copy to sql/core/src/test/scala/org/apache/spark/sql/execution/command/TestsV1AndV2Commands.scala
index baee2bd..15976f2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/KeepLegacyOutputs.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/TestsV1AndV2Commands.scala
@@ -15,27 +15,30 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalyst.analysis
+package org.apache.spark.sql.execution.command
+
+import org.scalactic.source.Position
+import org.scalatest.Tag
 
-import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, ShowTables}
-import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.catalyst.trees.TreePattern.COMMAND
 import org.apache.spark.sql.internal.SQLConf
 
 /**
- * A rule for keeping the SQL command's legacy outputs.
+ * The trait that enables running a test for both v1 and v2 command.
  */
-object KeepLegacyOutputs extends Rule[LogicalPlan] {
-  def apply(plan: LogicalPlan): LogicalPlan = {
-    if (!conf.getConf(SQLConf.LEGACY_KEEP_COMMAND_OUTPUT_SCHEMA)) {
-      plan
-    } else {
-      plan.resolveOperatorsUpWithPruning(
-        _.containsPattern(COMMAND)) {
-        case s: ShowTables =>
-          assert(s.output.length == 3)
-          val newOutput = s.output.head.withName("database") +: s.output.tail
-          s.copy(output = newOutput)
+trait TestsV1AndV2Commands extends DDLCommandTestUtils {
+  private var _version: String = ""
+  override def commandVersion: String = _version
+
+  // Tests using V1 catalogs will run with `spark.sql.legacy.useV1Command` on and off
+  // to test both V1 and V2 commands.
+  override def test(testName: String, testTags: Tag*)(testFun: => Any)
+    (implicit pos: Position): Unit = {
+    Seq(true, false).foreach { useV1Command =>
+      _version = if (useV1Command) "V1" else "V2"
+      super.test(testName, testTags: _*) {
+        withSQLConf(SQLConf.LEGACY_USE_V1_COMMAND.key -> useV1Command.toString) {
+          testFun
+        }
       }
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/CommandSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/CommandSuiteBase.scala
index 80c552d..24f3ac3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/CommandSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/CommandSuiteBase.scala
@@ -28,7 +28,8 @@ import org.apache.spark.sql.test.SharedSparkSession
  * settings for all unified datasource V1 and V2 test suites.
  */
 trait CommandSuiteBase extends SharedSparkSession {
-  def version: String = "V1" // The prefix is added to test names
+  def catalogVersion: String = "V1" // The catalog version is added to test names
+  def commandVersion: String = "V1" // The command version is added to test names
   def catalog: String = CatalogManager.SESSION_CATALOG_NAME
   def defaultUsing: String = "USING parquet" // The clause is used in creating tables under testing
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowNamespacesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowNamespacesSuite.scala
index 52742a2..54c5d22 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowNamespacesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowNamespacesSuite.scala
@@ -38,15 +38,11 @@ trait ShowNamespacesSuiteBase extends command.ShowNamespacesSuiteBase {
     }.getMessage
     assert(errMsg.contains("Namespace 'dummy' not found"))
   }
-
-  test("SPARK-34359: keep the legacy output schema") {
-    withSQLConf(SQLConf.LEGACY_KEEP_COMMAND_OUTPUT_SCHEMA.key -> "true") {
-      assert(sql("SHOW NAMESPACES").schema.fieldNames.toSeq == Seq("databaseName"))
-    }
-  }
 }
 
 class ShowNamespacesSuite extends ShowNamespacesSuiteBase with CommandSuiteBase {
+  override def commandVersion: String = "V2" // There is only V2 variant of SHOW NAMESPACES.
+
   test("case sensitivity") {
     Seq(true, false).foreach { caseSensitive =>
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
index 23b9b54..68ad1c4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
@@ -17,9 +17,6 @@
 
 package org.apache.spark.sql.execution.command.v1
 
-import org.scalactic.source.Position
-import org.scalatest.Tag
-
 import org.apache.spark.sql.{AnalysisException, Row, SaveMode}
 import org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException
 import org.apache.spark.sql.execution.command
@@ -33,28 +30,8 @@ import org.apache.spark.sql.internal.SQLConf
  *   - V1 In-Memory catalog: `org.apache.spark.sql.execution.command.v1.ShowTablesSuite`
  *   - V1 Hive External catalog: `org.apache.spark.sql.hive.execution.command.ShowTablesSuite`
  */
-trait ShowTablesSuiteBase extends command.ShowTablesSuiteBase {
+trait ShowTablesSuiteBase extends command.ShowTablesSuiteBase with command.TestsV1AndV2Commands {
   override def defaultNamespace: Seq[String] = Seq("default")
-  var _version: String = ""
-  override def version: String = _version
-
-  // Tests using V1 catalogs will run with `spark.sql.legacy.useV1Command` on and off
-  // to test both V1 and V2 commands.
-  override def test(testName: String, testTags: Tag*)(testFun: => Any)
-    (implicit pos: Position): Unit = {
-    Seq(true, false).foreach { useV1Command =>
-      _version = if (useV1Command) {
-        "using V1 catalog with V1 command"
-      } else {
-        "using V1 catalog with V2 command"
-      }
-      super.test(testName, testTags: _*) {
-        withSQLConf(SQLConf.LEGACY_USE_V1_COMMAND.key -> useV1Command.toString) {
-          testFun
-        }
-      }
-    }
-  }
 
   private def withSourceViews(f: => Unit): Unit = {
     withTable("source", "source2") {
@@ -165,7 +142,7 @@ trait ShowTablesSuiteBase extends command.ShowTablesSuiteBase {
  * The class contains tests for the `SHOW TABLES` command to check V1 In-Memory table catalog.
  */
 class ShowTablesSuite extends ShowTablesSuiteBase with CommandSuiteBase {
-  override def version: String = super[ShowTablesSuiteBase].version
+  override def commandVersion: String = super[ShowTablesSuiteBase].commandVersion
 
   test("SPARK-33670: show partitions from a datasource table") {
     import testImplicits._
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CommandSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CommandSuiteBase.scala
index bed04f4..1ff9e74 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CommandSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CommandSuiteBase.scala
@@ -30,7 +30,8 @@ import org.apache.spark.sql.test.SharedSparkSession
  * for all unified datasource V1 and V2 test suites.
  */
 trait CommandSuiteBase extends SharedSparkSession {
-  def version: String = "V2" // The prefix is added to test names
+  def catalogVersion: String = "V2" // The catalog version is added to test names
+  def commandVersion: String = "V2" // The command version is added to test names
   def catalog: String = "test_catalog" // The default V2 catalog for testing
   def defaultUsing: String = "USING _" // The clause is used in creating v2 tables under testing
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/CommandSuiteBase.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/CommandSuiteBase.scala
index 0709b12..b07c507 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/CommandSuiteBase.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/CommandSuiteBase.scala
@@ -29,7 +29,8 @@ import org.apache.spark.sql.hive.test.TestHiveSingleton
  * settings for all unified datasource V1 and V2 test suites.
  */
 trait CommandSuiteBase extends TestHiveSingleton {
-  def version: String = "Hive V1" // The prefix is added to test names
+  def catalogVersion: String = "Hive V1" // The catalog version is added to test names
+  def commandVersion: String = "V1" // The command version is added to test names
   def catalog: String = CatalogManager.SESSION_CATALOG_NAME
   def defaultUsing: String = "USING HIVE" // The clause is used in creating tables under testing
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowNamespacesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowNamespacesSuite.scala
index 7aae000..015001f 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowNamespacesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowNamespacesSuite.scala
@@ -25,6 +25,8 @@ import org.apache.spark.sql.internal.SQLConf
  * V1 Hive external table catalog.
  */
 class ShowNamespacesSuite extends v1.ShowNamespacesSuiteBase with CommandSuiteBase {
+  override def commandVersion: String = "V2" // There is only V2 variant of SHOW NAMESPACES.
+
   test("case sensitivity") {
     Seq(true, false).foreach { caseSensitive =>
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowTablesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowTablesSuite.scala
index 6050618..653a157 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowTablesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowTablesSuite.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.execution.command.v1
  * The class contains tests for the `SHOW TABLES` command to check V1 Hive external table catalog.
  */
 class ShowTablesSuite extends v1.ShowTablesSuiteBase with CommandSuiteBase {
-  override def version: String = super[ShowTablesSuiteBase].version
+  override def commandVersion: String = super[ShowTablesSuiteBase].commandVersion
 
   test("hive client calls") {
     withNamespaceAndTable("ns", "tbl") { t =>

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org