You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2021/07/15 11:55:06 UTC
[spark] branch branch-3.2 updated: [SPARK-33898][SQL][FOLLOWUP] Fix
the behavior of `SHOW CREATE TABLE` to output deterministic results
This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.2 by this push:
new 4271099 [SPARK-33898][SQL][FOLLOWUP] Fix the behavior of `SHOW CREATE TABLE` to output deterministic results
4271099 is described below
commit 42710991e2e2964d2cc6dcbccb05e4a240917ca8
Author: Kousuke Saruta <sa...@oss.nttdata.com>
AuthorDate: Thu Jul 15 20:53:21 2021 +0900
[SPARK-33898][SQL][FOLLOWUP] Fix the behavior of `SHOW CREATE TABLE` to output deterministic results
### What changes were proposed in this pull request?
This PR fixes a behavior of `SHOW CREATE TABLE` added in `SPARK-33898` (#32931) to output deterministic result.
A test `SPARK-33898: SHOW CREATE TABLE` in `DataSourceV2SQLSuite` compares two `CREATE TABLE` statements. One is generated by `SHOW CREATE TABLE` against a created table and the other is expected `CREATE TABLE` statement.
The created table has options `from` and `to`, and they are declared in this order.
```
CREATE TABLE $t (
a bigint NOT NULL,
b bigint,
c bigint,
`extra col` ARRAY<INT>,
`<another>` STRUCT<x: INT, y: ARRAY<BOOLEAN>>
)
USING foo
OPTIONS (
from = 0,
to = 1)
COMMENT 'This is a comment'
TBLPROPERTIES ('prop1' = '1')
PARTITIONED BY (a)
LOCATION '/tmp'
```
And the expected `CREATE TABLE` in the test code is like as follows.
```
"CREATE TABLE testcat.ns1.ns2.tbl (",
"`a` BIGINT NOT NULL,",
"`b` BIGINT,",
"`c` BIGINT,",
"`extra col` ARRAY<INT>,",
"`<another>` STRUCT<`x`: INT, `y`: ARRAY<BOOLEAN>>)",
"USING foo",
"OPTIONS(",
"'from' = '0',",
"'to' = '1')",
"PARTITIONED BY (a)",
"COMMENT 'This is a comment'",
"LOCATION '/tmp'",
"TBLPROPERTIES(",
"'prop1' = '1')"
```
As you can see, the order of `from` and `to` is expected.
But options are implemented as `Map` so the order of key cannot be kept.
In fact, this test fails with Scala 2.13.
```
[info] - SPARK-33898: SHOW CREATE TABLE *** FAILED *** (515 milliseconds)
[info] Array("CREATE TABLE testcat.ns1.ns2.tbl (", "`a` BIGINT NOT NULL,", "`b` BIGINT,", "`c` BIGINT,", "`extra col` ARRAY<INT>,", "`<another>` STRUCT<`x`: INT, `y`: ARRAY<BOOLEAN>>)", "USING foo", "OPTIONS(", "'to' = '1',", "'from' = '0')", "PARTITIONED BY (a)", "COMMENT 'This is a comment'", "LOCATION '/tmp'", "TBLPROPERTIES(", "'prop1' = '1')") did not equal Array("CREATE TABLE testcat.ns1.ns2.tbl (", "`a` BIGINT NOT NULL,", "`b` BIGINT,", "`c` BIGINT,", "`extra col` ARRAY<INT>, [...]
```
In the current master, the test doesn't fail with Scala 2.12 but it's still non-deterministic.
### Why are the changes needed?
Bug fix.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
I confirmed that the modified test passed with both Scala 2.12 and Scala 2.13 with this change.
Closes #33343 from sarutak/fix-show-create-table-test.
Authored-by: Kousuke Saruta <sa...@oss.nttdata.com>
Signed-off-by: Hyukjin Kwon <gu...@apache.org>
(cherry picked from commit f95ca31c0f841975ac57e1d4ea2fc6ea4f622bab)
Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
.../sql/execution/datasources/v2/ShowCreateTableExec.scala | 4 ++--
.../apache/spark/sql/connector/DataSourceV2SQLSuite.scala | 13 +++++++++----
2 files changed, 11 insertions(+), 6 deletions(-)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
index fab0fd5..b2b90fb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
@@ -71,7 +71,7 @@ case class ShowCreateTableExec(
builder: StringBuilder,
tableOptions: Map[String, String]): Unit = {
if (tableOptions.nonEmpty) {
- val props = tableOptions.map { case (key, value) =>
+ val props = tableOptions.toSeq.sortBy(_._1).map { case (key, value) =>
s"'${escapeSingleQuotedString(key)}' = '${escapeSingleQuotedString(value)}'"
}
builder ++= "OPTIONS"
@@ -104,7 +104,7 @@ case class ShowCreateTableExec(
&& !key.startsWith(TableCatalog.OPTION_PREFIX)
&& !tableOptions.contains(key))
if (showProps.nonEmpty) {
- val props = showProps.map {
+ val props = showProps.toSeq.sortBy(_._1).map {
case (key, value) =>
s"'${escapeSingleQuotedString(key)}' = '${escapeSingleQuotedString(value)}'"
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 4f1f4c2..ac82721 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -1987,9 +1987,10 @@ class DataSourceV2SQLSuite
|USING foo
|OPTIONS (
| from = 0,
- | to = 1)
+ | to = 1,
+ | via = 2)
|COMMENT 'This is a comment'
- |TBLPROPERTIES ('prop1' = '1')
+ |TBLPROPERTIES ('prop1' = '1', 'prop2' = '2', 'prop3' = 3, 'prop4' = 4)
|PARTITIONED BY (a)
|LOCATION '/tmp'
""".stripMargin)
@@ -2004,12 +2005,16 @@ class DataSourceV2SQLSuite
"USING foo",
"OPTIONS(",
"'from' = '0',",
- "'to' = '1')",
+ "'to' = '1',",
+ "'via' = '2')",
"PARTITIONED BY (a)",
"COMMENT 'This is a comment'",
"LOCATION '/tmp'",
"TBLPROPERTIES(",
- "'prop1' = '1')"
+ "'prop1' = '1',",
+ "'prop2' = '2',",
+ "'prop3' = '3',",
+ "'prop4' = '4')"
))
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org