You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2020/11/30 16:41:36 UTC
[spark] branch branch-3.0 updated: [SPARK-33588][SQL][3.0] Respect
the `spark.sql.caseSensitive` config while resolving partition spec in v1
`SHOW TABLE EXTENDED`
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 03291c8 [SPARK-33588][SQL][3.0] Respect the `spark.sql.caseSensitive` config while resolving partition spec in v1 `SHOW TABLE EXTENDED`
03291c8 is described below
commit 03291c80c5b1aa2b18e53617676f36d40e01188f
Author: Max Gekk <ma...@gmail.com>
AuthorDate: Mon Nov 30 08:37:13 2020 -0800
[SPARK-33588][SQL][3.0] Respect the `spark.sql.caseSensitive` config while resolving partition spec in v1 `SHOW TABLE EXTENDED`
### What changes were proposed in this pull request?
Perform partition spec normalization in `ShowTablesCommand` according to the table schema before getting partitions from the catalog. The normalization via `PartitioningUtils.normalizePartitionSpec()` adjusts the column names in partition specification, w.r.t. the real partition column names and case sensitivity.
### Why are the changes needed?
Even when `spark.sql.caseSensitive` is `false` which is the default value, v1 `SHOW TABLE EXTENDED` is case sensitive:
```sql
spark-sql> CREATE TABLE tbl1 (price int, qty int, year int, month int)
> USING parquet
> partitioned by (year, month);
spark-sql> INSERT INTO tbl1 PARTITION(year = 2015, month = 1) SELECT 1, 1;
spark-sql> SHOW TABLE EXTENDED LIKE 'tbl1' PARTITION(YEAR = 2015, Month = 1);
Error in query: Partition spec is invalid. The spec (YEAR, Month) must match the partition spec (year, month) defined in table '`default`.`tbl1`';
```
### Does this PR introduce _any_ user-facing change?
Yes. After the changes, the `SHOW TABLE EXTENDED` command respects the SQL config. And for example above, it returns correct result:
```sql
spark-sql> SHOW TABLE EXTENDED LIKE 'tbl1' PARTITION(YEAR = 2015, Month = 1);
default tbl1 false Partition Values: [year=2015, month=1]
Location: file:/Users/maximgekk/spark-warehouse/tbl1/year=2015/month=1
Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat
Storage Properties: [serialization.format=1, path=file:/Users/maximgekk/spark-warehouse/tbl1]
Partition Parameters: {transient_lastDdlTime=1606595118, totalSize=623, numFiles=1}
Created Time: Sat Nov 28 23:25:18 MSK 2020
Last Access: UNKNOWN
Partition Statistics: 623 bytes
```
### How was this patch tested?
By running the modified test suite via:
```
$ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *DDLSuite"
```
Authored-by: Max Gekk <max.gekkgmail.com>
Signed-off-by: Dongjoon Hyun <dongjoonapache.org>
(cherry picked from commit 0054fc937f804660c6501d9d3f6319f3047a68f8)
Signed-off-by: Max Gekk <max.gekkgmail.com>
Closes #30549 from MaxGekk/show-table-case-sensitive-spec-3.0.
Authored-by: Max Gekk <ma...@gmail.com>
Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
.../spark/sql/execution/command/tables.scala | 17 +++++++++++------
.../sql-tests/results/show-tables.sql.out | 2 +-
.../spark/sql/execution/command/DDLSuite.scala | 22 ++++++++++++++++++++++
3 files changed, 34 insertions(+), 7 deletions(-)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index fc8cc11..75e0d2c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -884,12 +884,17 @@ case class ShowTablesCommand(
//
// Note: tableIdentifierPattern should be non-empty, otherwise a [[ParseException]]
// should have been thrown by the sql parser.
- val tableIdent = TableIdentifier(tableIdentifierPattern.get, Some(db))
- val table = catalog.getTableMetadata(tableIdent).identifier
- val partition = catalog.getPartition(tableIdent, partitionSpec.get)
- val database = table.database.getOrElse("")
- val tableName = table.table
- val isTemp = catalog.isTemporaryTable(table)
+ val table = catalog.getTableMetadata(TableIdentifier(tableIdentifierPattern.get, Some(db)))
+ val tableIdent = table.identifier
+ val normalizedSpec = PartitioningUtils.normalizePartitionSpec(
+ partitionSpec.get,
+ table.partitionColumnNames,
+ tableIdent.quotedString,
+ sparkSession.sessionState.conf.resolver)
+ val partition = catalog.getPartition(tableIdent, normalizedSpec)
+ val database = tableIdent.database.getOrElse("")
+ val tableName = tableIdent.table
+ val isTemp = catalog.isTemporaryTable(tableIdent)
val information = partition.simpleString
Seq(Row(database, tableName, isTemp, s"$information\n"))
}
diff --git a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
index a95b02c..60c5e6d 100644
--- a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
@@ -224,7 +224,7 @@ SHOW TABLE EXTENDED LIKE 'show_t1' PARTITION(a='Us', d=1)
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
-Partition spec is invalid. The spec (a, d) must match the partition spec (c, d) defined in table '`showdb`.`show_t1`';
+a is not a valid partition column in table `showdb`.`show_t1`.;
-- !query
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index e4709e4..d0118ef1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -32,6 +32,7 @@ import org.apache.spark.sql.catalyst.{QualifiedTableName, TableIdentifier}
import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, NoSuchDatabaseException, NoSuchPartitionException, NoSuchTableException, TempTableAlreadyExistsException}
import org.apache.spark.sql.catalyst.catalog._
import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
+import org.apache.spark.sql.connector.catalog.CatalogManager
import org.apache.spark.sql.connector.catalog.SupportsNamespaces.PROP_OWNER
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
@@ -3030,6 +3031,27 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
}
}
}
+
+ test("SPARK-33588: case sensitivity of partition spec") {
+ val t = "part_table"
+ withTable(t) {
+ sql(s"""
+ |CREATE TABLE $t (price int, qty int, year int, month int)
+ |USING $dataSource
+ |PARTITIONED BY (year, month)""".stripMargin)
+ sql(s"INSERT INTO $t PARTITION(year = 2015, month = 1) SELECT 1, 1")
+ Seq(
+ true -> "PARTITION(year = 2015, month = 1)",
+ false -> "PARTITION(YEAR = 2015, Month = 1)"
+ ).foreach { case (caseSensitive, partitionSpec) =>
+ withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+ val df = sql(s"SHOW TABLE EXTENDED LIKE '$t' $partitionSpec")
+ val information = df.select("information").first().getString(0)
+ assert(information.contains("Partition Values: [year=2015, month=1]"))
+ }
+ }
+ }
+ }
}
object FakeLocalFsFileSystem {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org