You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2021/07/15 11:55:06 UTC

[spark] branch branch-3.2 updated: [SPARK-33898][SQL][FOLLOWUP] Fix the behavior of `SHOW CREATE TABLE` to output deterministic results

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.2 by this push:
     new 4271099  [SPARK-33898][SQL][FOLLOWUP] Fix the behavior of `SHOW CREATE TABLE` to output deterministic results
4271099 is described below

commit 42710991e2e2964d2cc6dcbccb05e4a240917ca8
Author: Kousuke Saruta <sa...@oss.nttdata.com>
AuthorDate: Thu Jul 15 20:53:21 2021 +0900

    [SPARK-33898][SQL][FOLLOWUP] Fix the behavior of `SHOW CREATE TABLE` to output deterministic results
    
    ### What changes were proposed in this pull request?
    
    This PR fixes a behavior of `SHOW CREATE TABLE` added in `SPARK-33898` (#32931) to output deterministic result.
    A test `SPARK-33898: SHOW CREATE TABLE` in `DataSourceV2SQLSuite` compares two `CREATE TABLE` statements. One is generated by `SHOW CREATE TABLE` against a created table and the other is expected `CREATE TABLE` statement.
    
    The created table has options `from` and `to`, and they are declared in this order.
    ```
    CREATE TABLE $t (
      a bigint NOT NULL,
      b bigint,
      c bigint,
      `extra col` ARRAY<INT>,
      `<another>` STRUCT<x: INT, y: ARRAY<BOOLEAN>>
    )
    USING foo
    OPTIONS (
      from = 0,
      to = 1)
    COMMENT 'This is a comment'
    TBLPROPERTIES ('prop1' = '1')
    PARTITIONED BY (a)
    LOCATION '/tmp'
    ```
    
    And the expected `CREATE TABLE` in the test code is like as follows.
    ```
    "CREATE TABLE testcat.ns1.ns2.tbl (",
    "`a` BIGINT NOT NULL,",
    "`b` BIGINT,",
    "`c` BIGINT,",
    "`extra col` ARRAY<INT>,",
    "`<another>` STRUCT<`x`: INT, `y`: ARRAY<BOOLEAN>>)",
    "USING foo",
    "OPTIONS(",
    "'from' = '0',",
    "'to' = '1')",
    "PARTITIONED BY (a)",
    "COMMENT 'This is a comment'",
    "LOCATION '/tmp'",
    "TBLPROPERTIES(",
    "'prop1' = '1')"
    ```
    As you can see, the order of `from` and `to` is expected.
    But options are implemented as `Map` so the order of key cannot be kept.
    
    In fact, this test fails with Scala 2.13.
    ```
    [info] - SPARK-33898: SHOW CREATE TABLE *** FAILED *** (515 milliseconds)
    [info]   Array("CREATE TABLE testcat.ns1.ns2.tbl (", "`a` BIGINT NOT NULL,", "`b` BIGINT,", "`c` BIGINT,", "`extra col` ARRAY<INT>,", "`<another>` STRUCT<`x`: INT, `y`: ARRAY<BOOLEAN>>)", "USING foo", "OPTIONS(", "'to' = '1',", "'from' = '0')", "PARTITIONED BY (a)", "COMMENT 'This is a comment'", "LOCATION '/tmp'", "TBLPROPERTIES(", "'prop1' = '1')") did not equal Array("CREATE TABLE testcat.ns1.ns2.tbl (", "`a` BIGINT NOT NULL,", "`b` BIGINT,", "`c` BIGINT,", "`extra col` ARRAY<INT>, [...]
    ```
    In the current master, the test doesn't fail with Scala 2.12 but it's still non-deterministic.
    
    ### Why are the changes needed?
    
    Bug fix.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    I confirmed that the modified test passed with both Scala 2.12 and Scala 2.13 with this change.
    
    Closes #33343 from sarutak/fix-show-create-table-test.
    
    Authored-by: Kousuke Saruta <sa...@oss.nttdata.com>
    Signed-off-by: Hyukjin Kwon <gu...@apache.org>
    (cherry picked from commit f95ca31c0f841975ac57e1d4ea2fc6ea4f622bab)
    Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
 .../sql/execution/datasources/v2/ShowCreateTableExec.scala  |  4 ++--
 .../apache/spark/sql/connector/DataSourceV2SQLSuite.scala   | 13 +++++++++----
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
index fab0fd5..b2b90fb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
@@ -71,7 +71,7 @@ case class ShowCreateTableExec(
       builder: StringBuilder,
       tableOptions: Map[String, String]): Unit = {
     if (tableOptions.nonEmpty) {
-      val props = tableOptions.map { case (key, value) =>
+      val props = tableOptions.toSeq.sortBy(_._1).map { case (key, value) =>
         s"'${escapeSingleQuotedString(key)}' = '${escapeSingleQuotedString(value)}'"
       }
       builder ++= "OPTIONS"
@@ -104,7 +104,7 @@ case class ShowCreateTableExec(
         && !key.startsWith(TableCatalog.OPTION_PREFIX)
         && !tableOptions.contains(key))
     if (showProps.nonEmpty) {
-      val props = showProps.map {
+      val props = showProps.toSeq.sortBy(_._1).map {
         case (key, value) =>
           s"'${escapeSingleQuotedString(key)}' = '${escapeSingleQuotedString(value)}'"
       }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 4f1f4c2..ac82721 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -1987,9 +1987,10 @@ class DataSourceV2SQLSuite
            |USING foo
            |OPTIONS (
            |  from = 0,
-           |  to = 1)
+           |  to = 1,
+           |  via = 2)
            |COMMENT 'This is a comment'
-           |TBLPROPERTIES ('prop1' = '1')
+           |TBLPROPERTIES ('prop1' = '1', 'prop2' = '2', 'prop3' = 3, 'prop4' = 4)
            |PARTITIONED BY (a)
            |LOCATION '/tmp'
         """.stripMargin)
@@ -2004,12 +2005,16 @@ class DataSourceV2SQLSuite
         "USING foo",
         "OPTIONS(",
         "'from' = '0',",
-        "'to' = '1')",
+        "'to' = '1',",
+        "'via' = '2')",
         "PARTITIONED BY (a)",
         "COMMENT 'This is a comment'",
         "LOCATION '/tmp'",
         "TBLPROPERTIES(",
-        "'prop1' = '1')"
+        "'prop1' = '1',",
+        "'prop2' = '2',",
+        "'prop3' = '3',",
+        "'prop4' = '4')"
       ))
     }
   }

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org