You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2020/11/30 16:41:36 UTC
[spark] branch branch-3.0 updated: [SPARK-33588][SQL][3.0] Respect the `spark.sql.caseSensitive` config while resolving partition spec in v1 `SHOW TABLE EXTENDED`

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 03291c8  [SPARK-33588][SQL][3.0] Respect the `spark.sql.caseSensitive` config while resolving partition spec in v1 `SHOW TABLE EXTENDED`
03291c8 is described below

commit 03291c80c5b1aa2b18e53617676f36d40e01188f
Author: Max Gekk <ma...@gmail.com>
AuthorDate: Mon Nov 30 08:37:13 2020 -0800

    [SPARK-33588][SQL][3.0] Respect the `spark.sql.caseSensitive` config while resolving partition spec in v1 `SHOW TABLE EXTENDED`
    
    ### What changes were proposed in this pull request?
    Perform partition spec normalization in `ShowTablesCommand` according to the table schema before getting partitions from the catalog. The normalization via `PartitioningUtils.normalizePartitionSpec()` adjusts the column names in partition specification, w.r.t. the real partition column names and case sensitivity.
    
    ### Why are the changes needed?
    Even when `spark.sql.caseSensitive` is `false` which is the default value, v1 `SHOW TABLE EXTENDED` is case sensitive:
    ```sql
    spark-sql> CREATE TABLE tbl1 (price int, qty int, year int, month int)
             > USING parquet
             > partitioned by (year, month);
    spark-sql> INSERT INTO tbl1 PARTITION(year = 2015, month = 1) SELECT 1, 1;
    spark-sql> SHOW TABLE EXTENDED LIKE 'tbl1' PARTITION(YEAR = 2015, Month = 1);
    Error in query: Partition spec is invalid. The spec (YEAR, Month) must match the partition spec (year, month) defined in table '`default`.`tbl1`';
    ```
    
    ### Does this PR introduce _any_ user-facing change?
    Yes. After the changes, the `SHOW TABLE EXTENDED` command respects the SQL config. And for example above, it returns correct result:
    ```sql
    spark-sql> SHOW TABLE EXTENDED LIKE 'tbl1' PARTITION(YEAR = 2015, Month = 1);
    default	tbl1	false	Partition Values: [year=2015, month=1]
    Location: file:/Users/maximgekk/spark-warehouse/tbl1/year=2015/month=1
    Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
    InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
    OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat
    Storage Properties: [serialization.format=1, path=file:/Users/maximgekk/spark-warehouse/tbl1]
    Partition Parameters: {transient_lastDdlTime=1606595118, totalSize=623, numFiles=1}
    Created Time: Sat Nov 28 23:25:18 MSK 2020
    Last Access: UNKNOWN
    Partition Statistics: 623 bytes
    ```
    
    ### How was this patch tested?
    By running the modified test suite via:
    ```
    $ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *DDLSuite"
    ```
    
    Authored-by: Max Gekk <max.gekkgmail.com>
    Signed-off-by: Dongjoon Hyun <dongjoonapache.org>
    (cherry picked from commit 0054fc937f804660c6501d9d3f6319f3047a68f8)
    Signed-off-by: Max Gekk <max.gekkgmail.com>
    
    Closes #30549 from MaxGekk/show-table-case-sensitive-spec-3.0.
    
    Authored-by: Max Gekk <ma...@gmail.com>
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
 .../spark/sql/execution/command/tables.scala       | 17 +++++++++++------
 .../sql-tests/results/show-tables.sql.out          |  2 +-
 .../spark/sql/execution/command/DDLSuite.scala     | 22 ++++++++++++++++++++++
 3 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index fc8cc11..75e0d2c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -884,12 +884,17 @@ case class ShowTablesCommand(
       //
       // Note: tableIdentifierPattern should be non-empty, otherwise a [[ParseException]]
       // should have been thrown by the sql parser.
-      val tableIdent = TableIdentifier(tableIdentifierPattern.get, Some(db))
-      val table = catalog.getTableMetadata(tableIdent).identifier
-      val partition = catalog.getPartition(tableIdent, partitionSpec.get)
-      val database = table.database.getOrElse("")
-      val tableName = table.table
-      val isTemp = catalog.isTemporaryTable(table)
+      val table = catalog.getTableMetadata(TableIdentifier(tableIdentifierPattern.get, Some(db)))
+      val tableIdent = table.identifier
+      val normalizedSpec = PartitioningUtils.normalizePartitionSpec(
+        partitionSpec.get,
+        table.partitionColumnNames,
+        tableIdent.quotedString,
+        sparkSession.sessionState.conf.resolver)
+      val partition = catalog.getPartition(tableIdent, normalizedSpec)
+      val database = tableIdent.database.getOrElse("")
+      val tableName = tableIdent.table
+      val isTemp = catalog.isTemporaryTable(tableIdent)
       val information = partition.simpleString
       Seq(Row(database, tableName, isTemp, s"$information\n"))
     }
diff --git a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
index a95b02c..60c5e6d 100644
--- a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
@@ -224,7 +224,7 @@ SHOW TABLE EXTENDED LIKE 'show_t1' PARTITION(a='Us', d=1)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Partition spec is invalid. The spec (a, d) must match the partition spec (c, d) defined in table '`showdb`.`show_t1`';
+a is not a valid partition column in table `showdb`.`show_t1`.;
 
 
 -- !query
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index e4709e4..d0118ef1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -32,6 +32,7 @@ import org.apache.spark.sql.catalyst.{QualifiedTableName, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, NoSuchDatabaseException, NoSuchPartitionException, NoSuchTableException, TempTableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
+import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.connector.catalog.SupportsNamespaces.PROP_OWNER
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
@@ -3030,6 +3031,27 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
       }
     }
   }
+
+  test("SPARK-33588: case sensitivity of partition spec") {
+    val t = "part_table"
+    withTable(t) {
+      sql(s"""
+        |CREATE TABLE $t (price int, qty int, year int, month int)
+        |USING $dataSource
+        |PARTITIONED BY (year, month)""".stripMargin)
+      sql(s"INSERT INTO $t PARTITION(year = 2015, month = 1) SELECT 1, 1")
+      Seq(
+        true -> "PARTITION(year = 2015, month = 1)",
+        false -> "PARTITION(YEAR = 2015, Month = 1)"
+      ).foreach { case (caseSensitive, partitionSpec) =>
+        withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+          val df = sql(s"SHOW TABLE EXTENDED LIKE '$t' $partitionSpec")
+          val information = df.select("information").first().getString(0)
+          assert(information.contains("Partition Values: [year=2015, month=1]"))
+        }
+      }
+    }
+  }
 }
 
 object FakeLocalFsFileSystem {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org