You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2021/10/13 19:14:25 UTC
[spark] branch master updated: [SPARK-36982] Migrate SHOW
NAMESPACES to use V2 command by default
This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 5ac76d9 [SPARK-36982] Migrate SHOW NAMESPACES to use V2 command by default
5ac76d9 is described below
commit 5ac76d9cb45d58eeb4253d50e90060a68c3e87cb
Author: Terry Kim <yu...@gmail.com>
AuthorDate: Wed Oct 13 22:12:56 2021 +0300
[SPARK-36982] Migrate SHOW NAMESPACES to use V2 command by default
### What changes were proposed in this pull request?
This PR proposes to use V2 commands as default as outlined in [SPARK-36588](https://issues.apache.org/jira/browse/SPARK-36588), and this PR migrates `SHOW NAMESPACES` to use v2 command by default. (Technically speaking, there is no v1 command for `SHOW NAMESPACES/DATABASES`, but this PR removes an extra check in `ResolveSessionCatalog` to handle session catalog.)
### Why are the changes needed?
It's been a while since we introduced the v2 commands, and it seems reasonable to use v2 commands by default even for the session catalog, with a legacy config to fall back to the v1 commands.
### Does this PR introduce _any_ user-facing change?
No, the user can use v1 command by setting `spark.sql.legacy.useV1Command` to `true`.
### How was this patch tested?
Added unit tests.
Closes #34255 from imback82/migrate_show_namespaces.
Authored-by: Terry Kim <yu...@gmail.com>
Signed-off-by: Max Gekk <ma...@gmail.com>
---
.../sql/catalyst/analysis/KeepLegacyOutputs.scala | 5 +++-
.../org/apache/spark/sql/internal/SQLConf.scala | 2 +-
.../catalyst/analysis/ResolveSessionCatalog.scala | 16 +---------
.../execution/command/DDLCommandTestUtils.scala | 7 +++--
.../command/ShowNamespacesSuiteBase.scala | 6 ++++
.../execution/command/TestsV1AndV2Commands.scala} | 35 ++++++++++++----------
.../execution/command/v1/CommandSuiteBase.scala | 3 +-
.../execution/command/v1/ShowNamespacesSuite.scala | 8 ++---
.../sql/execution/command/v1/ShowTablesSuite.scala | 27 ++---------------
.../execution/command/v2/CommandSuiteBase.scala | 3 +-
.../hive/execution/command/CommandSuiteBase.scala | 3 +-
.../execution/command/ShowNamespacesSuite.scala | 2 ++
.../hive/execution/command/ShowTablesSuite.scala | 2 +-
13 files changed, 49 insertions(+), 70 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/KeepLegacyOutputs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/KeepLegacyOutputs.scala
index baee2bd..3af5544 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/KeepLegacyOutputs.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/KeepLegacyOutputs.scala
@@ -17,7 +17,7 @@
package org.apache.spark.sql.catalyst.analysis
-import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, ShowTables}
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, ShowNamespaces, ShowTables}
import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.catalyst.trees.TreePattern.COMMAND
import org.apache.spark.sql.internal.SQLConf
@@ -36,6 +36,9 @@ object KeepLegacyOutputs extends Rule[LogicalPlan] {
assert(s.output.length == 3)
val newOutput = s.output.head.withName("database") +: s.output.tail
s.copy(output = newOutput)
+ case s: ShowNamespaces =>
+ assert(s.output.length == 1)
+ s.copy(output = Seq(s.output.head.withName("databaseName")))
}
}
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 1dc4a8e..f0e7731 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -3364,7 +3364,7 @@ object SQLConf {
buildConf("spark.sql.legacy.keepCommandOutputSchema")
.internal()
.doc("When true, Spark will keep the output schema of commands such as SHOW DATABASES " +
- "unchanged, for v1 catalog and/or table.")
+ "unchanged.")
.version("3.0.2")
.booleanConf
.createWithDefault(false)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index 2252225..c77be4e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -121,14 +121,6 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
case SetNamespaceLocation(DatabaseInSessionCatalog(db), location) =>
AlterDatabaseSetLocationCommand(db, location)
- case s @ ShowNamespaces(ResolvedNamespace(cata, _), _, output) if isSessionCatalog(cata) =>
- if (conf.getConf(SQLConf.LEGACY_KEEP_COMMAND_OUTPUT_SCHEMA)) {
- assert(output.length == 1)
- s.copy(output = Seq(output.head.withName("databaseName")))
- } else {
- s
- }
-
case RenameTable(ResolvedV1TableOrViewIdentifier(oldName), newName, isView) =>
AlterTableRenameCommand(oldName.asTableIdentifier, newName.asTableIdentifier, isView)
@@ -270,13 +262,7 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
DropDatabaseCommand(db, d.ifExists, d.cascade)
case ShowTables(DatabaseInSessionCatalog(db), pattern, output) if conf.useV1Command =>
- val newOutput = if (conf.getConf(SQLConf.LEGACY_KEEP_COMMAND_OUTPUT_SCHEMA)) {
- assert(output.length == 3)
- output.head.withName("database") +: output.tail
- } else {
- output
- }
- ShowTablesCommand(Some(db), pattern, newOutput)
+ ShowTablesCommand(Some(db), pattern, output)
case ShowTableExtended(
DatabaseInSessionCatalog(db),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala
index f9e26f8..af3e92a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala
@@ -39,7 +39,9 @@ import org.apache.spark.sql.test.SQLTestUtils
*/
trait DDLCommandTestUtils extends SQLTestUtils {
// The version of the catalog under testing such as "V1", "V2", "Hive V1".
- protected def version: String
+ protected def catalogVersion: String
+ // The version of the SQL command under testing such as "V1", "V2".
+ protected def commandVersion: String
// Name of the command as SQL statement, for instance "SHOW PARTITIONS"
protected def command: String
// The catalog name which can be used in SQL statements under testing
@@ -51,7 +53,8 @@ trait DDLCommandTestUtils extends SQLTestUtils {
// the failed test in logs belongs to.
override def test(testName: String, testTags: Tag*)(testFun: => Any)
(implicit pos: Position): Unit = {
- super.test(s"$command $version: " + testName, testTags: _*)(testFun)
+ val testNamePrefix = s"$command using $catalogVersion catalog $commandVersion command"
+ super.test(s"$testNamePrefix: $testName", testTags: _*)(testFun)
}
protected def withNamespaceAndTable(ns: String, tableName: String, cat: String = catalog)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowNamespacesSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowNamespacesSuiteBase.scala
index 790489e..1b37444 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowNamespacesSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowNamespacesSuiteBase.scala
@@ -128,4 +128,10 @@ trait ShowNamespacesSuiteBase extends QueryTest with DDLCommandTestUtils {
spark.sessionState.catalogManager.reset()
}
}
+
+ test("SPARK-34359: keep the legacy output schema") {
+ withSQLConf(SQLConf.LEGACY_KEEP_COMMAND_OUTPUT_SCHEMA.key -> "true") {
+ assert(sql("SHOW NAMESPACES").schema.fieldNames.toSeq == Seq("databaseName"))
+ }
+ }
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/KeepLegacyOutputs.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/TestsV1AndV2Commands.scala
similarity index 52%
copy from sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/KeepLegacyOutputs.scala
copy to sql/core/src/test/scala/org/apache/spark/sql/execution/command/TestsV1AndV2Commands.scala
index baee2bd..15976f2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/KeepLegacyOutputs.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/TestsV1AndV2Commands.scala
@@ -15,27 +15,30 @@
* limitations under the License.
*/
-package org.apache.spark.sql.catalyst.analysis
+package org.apache.spark.sql.execution.command
+
+import org.scalactic.source.Position
+import org.scalatest.Tag
-import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, ShowTables}
-import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.catalyst.trees.TreePattern.COMMAND
import org.apache.spark.sql.internal.SQLConf
/**
- * A rule for keeping the SQL command's legacy outputs.
+ * The trait that enables running a test for both v1 and v2 command.
*/
-object KeepLegacyOutputs extends Rule[LogicalPlan] {
- def apply(plan: LogicalPlan): LogicalPlan = {
- if (!conf.getConf(SQLConf.LEGACY_KEEP_COMMAND_OUTPUT_SCHEMA)) {
- plan
- } else {
- plan.resolveOperatorsUpWithPruning(
- _.containsPattern(COMMAND)) {
- case s: ShowTables =>
- assert(s.output.length == 3)
- val newOutput = s.output.head.withName("database") +: s.output.tail
- s.copy(output = newOutput)
+trait TestsV1AndV2Commands extends DDLCommandTestUtils {
+ private var _version: String = ""
+ override def commandVersion: String = _version
+
+ // Tests using V1 catalogs will run with `spark.sql.legacy.useV1Command` on and off
+ // to test both V1 and V2 commands.
+ override def test(testName: String, testTags: Tag*)(testFun: => Any)
+ (implicit pos: Position): Unit = {
+ Seq(true, false).foreach { useV1Command =>
+ _version = if (useV1Command) "V1" else "V2"
+ super.test(testName, testTags: _*) {
+ withSQLConf(SQLConf.LEGACY_USE_V1_COMMAND.key -> useV1Command.toString) {
+ testFun
+ }
}
}
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/CommandSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/CommandSuiteBase.scala
index 80c552d..24f3ac3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/CommandSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/CommandSuiteBase.scala
@@ -28,7 +28,8 @@ import org.apache.spark.sql.test.SharedSparkSession
* settings for all unified datasource V1 and V2 test suites.
*/
trait CommandSuiteBase extends SharedSparkSession {
- def version: String = "V1" // The prefix is added to test names
+ def catalogVersion: String = "V1" // The catalog version is added to test names
+ def commandVersion: String = "V1" // The command version is added to test names
def catalog: String = CatalogManager.SESSION_CATALOG_NAME
def defaultUsing: String = "USING parquet" // The clause is used in creating tables under testing
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowNamespacesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowNamespacesSuite.scala
index 52742a2..54c5d22 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowNamespacesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowNamespacesSuite.scala
@@ -38,15 +38,11 @@ trait ShowNamespacesSuiteBase extends command.ShowNamespacesSuiteBase {
}.getMessage
assert(errMsg.contains("Namespace 'dummy' not found"))
}
-
- test("SPARK-34359: keep the legacy output schema") {
- withSQLConf(SQLConf.LEGACY_KEEP_COMMAND_OUTPUT_SCHEMA.key -> "true") {
- assert(sql("SHOW NAMESPACES").schema.fieldNames.toSeq == Seq("databaseName"))
- }
- }
}
class ShowNamespacesSuite extends ShowNamespacesSuiteBase with CommandSuiteBase {
+ override def commandVersion: String = "V2" // There is only V2 variant of SHOW NAMESPACES.
+
test("case sensitivity") {
Seq(true, false).foreach { caseSensitive =>
withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
index 23b9b54..68ad1c4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
@@ -17,9 +17,6 @@
package org.apache.spark.sql.execution.command.v1
-import org.scalactic.source.Position
-import org.scalatest.Tag
-
import org.apache.spark.sql.{AnalysisException, Row, SaveMode}
import org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException
import org.apache.spark.sql.execution.command
@@ -33,28 +30,8 @@ import org.apache.spark.sql.internal.SQLConf
* - V1 In-Memory catalog: `org.apache.spark.sql.execution.command.v1.ShowTablesSuite`
* - V1 Hive External catalog: `org.apache.spark.sql.hive.execution.command.ShowTablesSuite`
*/
-trait ShowTablesSuiteBase extends command.ShowTablesSuiteBase {
+trait ShowTablesSuiteBase extends command.ShowTablesSuiteBase with command.TestsV1AndV2Commands {
override def defaultNamespace: Seq[String] = Seq("default")
- var _version: String = ""
- override def version: String = _version
-
- // Tests using V1 catalogs will run with `spark.sql.legacy.useV1Command` on and off
- // to test both V1 and V2 commands.
- override def test(testName: String, testTags: Tag*)(testFun: => Any)
- (implicit pos: Position): Unit = {
- Seq(true, false).foreach { useV1Command =>
- _version = if (useV1Command) {
- "using V1 catalog with V1 command"
- } else {
- "using V1 catalog with V2 command"
- }
- super.test(testName, testTags: _*) {
- withSQLConf(SQLConf.LEGACY_USE_V1_COMMAND.key -> useV1Command.toString) {
- testFun
- }
- }
- }
- }
private def withSourceViews(f: => Unit): Unit = {
withTable("source", "source2") {
@@ -165,7 +142,7 @@ trait ShowTablesSuiteBase extends command.ShowTablesSuiteBase {
* The class contains tests for the `SHOW TABLES` command to check V1 In-Memory table catalog.
*/
class ShowTablesSuite extends ShowTablesSuiteBase with CommandSuiteBase {
- override def version: String = super[ShowTablesSuiteBase].version
+ override def commandVersion: String = super[ShowTablesSuiteBase].commandVersion
test("SPARK-33670: show partitions from a datasource table") {
import testImplicits._
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CommandSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CommandSuiteBase.scala
index bed04f4..1ff9e74 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CommandSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CommandSuiteBase.scala
@@ -30,7 +30,8 @@ import org.apache.spark.sql.test.SharedSparkSession
* for all unified datasource V1 and V2 test suites.
*/
trait CommandSuiteBase extends SharedSparkSession {
- def version: String = "V2" // The prefix is added to test names
+ def catalogVersion: String = "V2" // The catalog version is added to test names
+ def commandVersion: String = "V2" // The command version is added to test names
def catalog: String = "test_catalog" // The default V2 catalog for testing
def defaultUsing: String = "USING _" // The clause is used in creating v2 tables under testing
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/CommandSuiteBase.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/CommandSuiteBase.scala
index 0709b12..b07c507 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/CommandSuiteBase.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/CommandSuiteBase.scala
@@ -29,7 +29,8 @@ import org.apache.spark.sql.hive.test.TestHiveSingleton
* settings for all unified datasource V1 and V2 test suites.
*/
trait CommandSuiteBase extends TestHiveSingleton {
- def version: String = "Hive V1" // The prefix is added to test names
+ def catalogVersion: String = "Hive V1" // The catalog version is added to test names
+ def commandVersion: String = "V1" // The command version is added to test names
def catalog: String = CatalogManager.SESSION_CATALOG_NAME
def defaultUsing: String = "USING HIVE" // The clause is used in creating tables under testing
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowNamespacesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowNamespacesSuite.scala
index 7aae000..015001f 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowNamespacesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowNamespacesSuite.scala
@@ -25,6 +25,8 @@ import org.apache.spark.sql.internal.SQLConf
* V1 Hive external table catalog.
*/
class ShowNamespacesSuite extends v1.ShowNamespacesSuiteBase with CommandSuiteBase {
+ override def commandVersion: String = "V2" // There is only V2 variant of SHOW NAMESPACES.
+
test("case sensitivity") {
Seq(true, false).foreach { caseSensitive =>
withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowTablesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowTablesSuite.scala
index 6050618..653a157 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowTablesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowTablesSuite.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.execution.command.v1
* The class contains tests for the `SHOW TABLES` command to check V1 Hive external table catalog.
*/
class ShowTablesSuite extends v1.ShowTablesSuiteBase with CommandSuiteBase {
- override def version: String = super[ShowTablesSuiteBase].version
+ override def commandVersion: String = super[ShowTablesSuiteBase].commandVersion
test("hive client calls") {
withNamespaceAndTable("ns", "tbl") { t =>
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org