You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2020/12/28 06:49:15 UTC
[spark] branch branch-3.1 updated: [SPARK-33901][SQL] Fix Char and
Varchar display error after DDLs
This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new 93ed055 [SPARK-33901][SQL] Fix Char and Varchar display error after DDLs
93ed055 is described below
commit 93ed055e7ab30ca081a1f7f2e01e971179b8e0b4
Author: Kent Yao <ya...@apache.org>
AuthorDate: Mon Dec 28 06:48:27 2020 +0000
[SPARK-33901][SQL] Fix Char and Varchar display error after DDLs
### What changes were proposed in this pull request?
After CTAS / CREATE TABLE LIKE / CVAS/ alter table add columns, the target tables will display string instead of char/varchar
### Why are the changes needed?
bugfix
### Does this PR introduce _any_ user-facing change?
no
### How was this patch tested?
new tests
Closes #30918 from yaooqinn/SPARK-33901.
Lead-authored-by: Kent Yao <ya...@apache.org>
Co-authored-by: Kent Yao <ya...@hotmail.com>
Signed-off-by: Wenchen Fan <we...@databricks.com>
(cherry picked from commit 3fdbc48373cdf12b8ba05632bc65ad49b7af1afb)
Signed-off-by: Wenchen Fan <we...@databricks.com>
---
.../spark/sql/catalyst/util/CharVarcharUtils.scala | 10 +
.../execution/command/createDataSourceTables.scala | 4 +-
.../apache/spark/sql/execution/command/ddl.scala | 8 +-
.../spark/sql/execution/command/tables.scala | 6 +-
.../apache/spark/sql/execution/command/views.scala | 7 +-
.../datasources/v2/WriteToDataSourceV2Exec.scala | 12 +-
.../apache/spark/sql/internal/CatalogImpl.scala | 3 +-
.../resources/sql-tests/inputs/charvarchar.sql | 62 ++
.../sql-tests/results/charvarchar.sql.out | 663 +++++++++++++++++++++
.../execution/command/CharVarcharDDLTestBase.scala | 42 +-
.../execution/CreateHiveTableAsSelectCommand.scala | 4 +-
11 files changed, 805 insertions(+), 16 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala
index cfdc50d..eaafe35 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala
@@ -127,6 +127,16 @@ object CharVarcharUtils extends Logging {
}
/**
+ * Re-construct the original schema from the type string in the given metadata of each field.
+ */
+ def getRawSchema(schema: StructType): StructType = {
+ val fields = schema.map { field =>
+ getRawType(field.metadata).map(dt => field.copy(dataType = dt)).getOrElse(field)
+ }
+ StructType(fields)
+ }
+
+ /**
* Returns expressions to apply read-side char type padding for the given attributes.
*
* For a CHAR(N) column/field and the length of string value is M
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
index 6ed40aa..be7fa7b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
@@ -22,6 +22,7 @@ import java.net.URI
import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.catalog._
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.util.CharVarcharUtils
import org.apache.spark.sql.execution.SparkPlan
import org.apache.spark.sql.execution.datasources._
import org.apache.spark.sql.sources.BaseRelation
@@ -174,12 +175,13 @@ case class CreateDataSourceTableAsSelectCommand(
}
val result = saveDataIntoTable(
sparkSession, table, tableLocation, child, SaveMode.Overwrite, tableExists = false)
+ val tableSchema = CharVarcharUtils.getRawSchema(result.schema)
val newTable = table.copy(
storage = table.storage.copy(locationUri = tableLocation),
// We will use the schema of resolved.relation as the schema of the table (instead of
// the schema of df). It is important since the nullability may be changed by the relation
// provider (for example, see org.apache.spark.sql.parquet.DefaultSource).
- schema = result.schema)
+ schema = tableSchema)
// Table location is already validated. No need to check it again during table creation.
sessionState.catalog.createTable(newTable, ignoreIfExists = false, validateLocation = false)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index 9300e25..601594b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -270,7 +270,7 @@ case class AlterTableSetPropertiesCommand(
override def run(sparkSession: SparkSession): Seq[Row] = {
val catalog = sparkSession.sessionState.catalog
- val table = catalog.getTableMetadata(tableName)
+ val table = catalog.getTableRawMetadata(tableName)
DDLUtils.verifyAlterTableType(catalog, table, isView)
// This overrides old properties and update the comment parameter of CatalogTable
// with the newly added/modified comment since CatalogTable also holds comment as its
@@ -302,7 +302,7 @@ case class AlterTableUnsetPropertiesCommand(
override def run(sparkSession: SparkSession): Seq[Row] = {
val catalog = sparkSession.sessionState.catalog
- val table = catalog.getTableMetadata(tableName)
+ val table = catalog.getTableRawMetadata(tableName)
DDLUtils.verifyAlterTableType(catalog, table, isView)
if (!ifExists) {
propKeys.foreach { k =>
@@ -414,7 +414,7 @@ case class AlterTableSerDePropertiesCommand(
override def run(sparkSession: SparkSession): Seq[Row] = {
val catalog = sparkSession.sessionState.catalog
- val table = catalog.getTableMetadata(tableName)
+ val table = catalog.getTableRawMetadata(tableName)
DDLUtils.verifyAlterTableType(catalog, table, isView = false)
// For datasource tables, disallow setting serde or specifying partition
if (partSpec.isDefined && DDLUtils.isDatasourceTable(table)) {
@@ -629,7 +629,7 @@ case class AlterTableRecoverPartitionsCommand(
override def run(spark: SparkSession): Seq[Row] = {
val catalog = spark.sessionState.catalog
- val table = catalog.getTableMetadata(tableName)
+ val table = catalog.getTableRawMetadata(tableName)
val tableIdentWithDB = table.identifier.quotedString
DDLUtils.verifyAlterTableType(catalog, table, isView = false)
if (table.partitionColumnNames.isEmpty) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index c29c609..e7a682d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -115,12 +115,13 @@ case class CreateTableLikeCommand(
CatalogTableType.EXTERNAL
}
+ val newTableSchema = CharVarcharUtils.getRawSchema(sourceTableDesc.schema)
val newTableDesc =
CatalogTable(
identifier = targetTable,
tableType = tblType,
storage = newStorage,
- schema = sourceTableDesc.schema,
+ schema = newTableSchema,
provider = newProvider,
partitionColumnNames = sourceTableDesc.partitionColumnNames,
bucketSpec = sourceTableDesc.bucketSpec,
@@ -236,7 +237,8 @@ case class AlterTableAddColumnsCommand(
conf.caseSensitiveAnalysis)
DDLUtils.checkDataColNames(catalogTable, colsToAdd.map(_.name))
- catalog.alterTableDataSchema(table, StructType(catalogTable.dataSchema ++ colsToAdd))
+ val existingSchema = CharVarcharUtils.getRawSchema(catalogTable.dataSchema)
+ catalog.alterTableDataSchema(table, StructType(existingSchema ++ colsToAdd))
Seq.empty[Row]
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
index 6f32f9d..7b8c44e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
@@ -29,6 +29,7 @@ import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable
import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, SubqueryExpression}
import org.apache.spark.sql.catalyst.plans.QueryPlan
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, View}
+import org.apache.spark.sql.catalyst.util.CharVarcharUtils
import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.NamespaceHelper
import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
import org.apache.spark.sql.types.{BooleanType, MetadataBuilder, StringType, StructType}
@@ -219,7 +220,8 @@ case class CreateViewCommand(
throw new AnalysisException(
"It is not allowed to create a persisted view from the Dataset API")
}
- val aliasedSchema = aliasPlan(session, analyzedPlan).schema
+ val aliasedSchema = CharVarcharUtils.getRawSchema(
+ aliasPlan(session, analyzedPlan).schema)
val newProperties = generateViewProperties(
properties, session, analyzedPlan, aliasedSchema.fieldNames)
@@ -296,8 +298,9 @@ case class AlterViewAsCommand(
val newProperties = generateViewProperties(
viewMeta.properties, session, analyzedPlan, analyzedPlan.schema.fieldNames)
+ val newSchema = CharVarcharUtils.getRawSchema(analyzedPlan.schema)
val updatedViewMeta = viewMeta.copy(
- schema = analyzedPlan.schema,
+ schema = newSchema,
properties = newProperties,
viewOriginalText = Some(originalText),
viewText = Some(originalText))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
index f5f77d3..b0aff4a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchTableException, TableAlreadyExistsException}
import org.apache.spark.sql.catalyst.expressions.Attribute
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.util.CharVarcharUtils
import org.apache.spark.sql.connector.catalog.{Identifier, StagedTable, StagingTableCatalog, SupportsWrite, Table, TableCatalog}
import org.apache.spark.sql.connector.expressions.Transform
import org.apache.spark.sql.connector.write.{BatchWrite, DataWriterFactory, LogicalWriteInfoImpl, PhysicalWriteInfoImpl, SupportsDynamicOverwrite, SupportsOverwrite, SupportsTruncate, V1WriteBuilder, WriteBuilder, WriterCommitMessage}
@@ -78,7 +79,8 @@ case class CreateTableAsSelectExec(
throw new TableAlreadyExistsException(ident)
}
- val table = catalog.createTable(ident, query.schema.asNullable,
+ val schema = CharVarcharUtils.getRawSchema(query.schema).asNullable
+ val table = catalog.createTable(ident, schema,
partitioning.toArray, properties.asJava)
writeToTable(catalog, table, writeOptions, ident)
}
@@ -111,8 +113,9 @@ case class AtomicCreateTableAsSelectExec(
throw new TableAlreadyExistsException(ident)
}
+ val schema = CharVarcharUtils.getRawSchema(query.schema).asNullable
val stagedTable = catalog.stageCreate(
- ident, query.schema.asNullable, partitioning.toArray, properties.asJava)
+ ident, schema, partitioning.toArray, properties.asJava)
writeToTable(catalog, stagedTable, writeOptions, ident)
}
}
@@ -154,8 +157,9 @@ case class ReplaceTableAsSelectExec(
} else if (!orCreate) {
throw new CannotReplaceMissingTableException(ident)
}
+ val schema = CharVarcharUtils.getRawSchema(query.schema).asNullable
val table = catalog.createTable(
- ident, query.schema.asNullable, partitioning.toArray, properties.asJava)
+ ident, schema, partitioning.toArray, properties.asJava)
writeToTable(catalog, table, writeOptions, ident)
}
}
@@ -184,7 +188,7 @@ case class AtomicReplaceTableAsSelectExec(
orCreate: Boolean) extends TableWriteExecHelper {
override protected def run(): Seq[InternalRow] = {
- val schema = query.schema.asNullable
+ val schema = CharVarcharUtils.getRawSchema(query.schema).asNullable
if (catalog.tableExists(ident)) {
val table = catalog.loadTable(ident)
uncacheTable(session, catalog, table, ident)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
index 8008a21..d817fed 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.{DefinedByConstructorParams, FunctionIdenti
import org.apache.spark.sql.catalyst.catalog._
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
+import org.apache.spark.sql.catalyst.util.CharVarcharUtils
import org.apache.spark.sql.execution.command.AlterTableRecoverPartitionsCommand
import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource}
import org.apache.spark.sql.types.StructType
@@ -181,7 +182,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
new Column(
name = c.name,
description = c.getComment().orNull,
- dataType = c.dataType.catalogString,
+ dataType = CharVarcharUtils.getRawType(c.metadata).getOrElse(c.dataType).catalogString,
nullable = c.nullable,
isPartition = partitionColumnNames.contains(c.name),
isBucket = bucketColumnNames.contains(c.name))
diff --git a/sql/core/src/test/resources/sql-tests/inputs/charvarchar.sql b/sql/core/src/test/resources/sql-tests/inputs/charvarchar.sql
new file mode 100644
index 0000000..dbdb8cc
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/charvarchar.sql
@@ -0,0 +1,62 @@
+create table char_tbl(c char(5), v varchar(6)) using parquet;
+desc formatted char_tbl;
+desc formatted char_tbl c;
+show create table char_tbl;
+
+create table char_tbl2 using parquet as select * from char_tbl;
+show create table char_tbl2;
+desc formatted char_tbl2;
+desc formatted char_tbl2 c;
+
+create table char_tbl3 like char_tbl;
+desc formatted char_tbl3;
+desc formatted char_tbl3 c;
+show create table char_tbl3;
+
+create view char_view as select * from char_tbl;
+desc formatted char_view;
+desc formatted char_view c;
+show create table char_view;
+
+alter table char_tbl rename to char_tbl1;
+desc formatted char_tbl1;
+
+alter table char_tbl1 change column c type char(6);
+alter table char_tbl1 change column c type char(5);
+desc formatted char_tbl1;
+
+alter table char_tbl1 add columns (d char(5));
+desc formatted char_tbl1;
+
+alter view char_view as select * from char_tbl2;
+desc formatted char_view;
+
+alter table char_tbl1 SET TBLPROPERTIES('yes'='no');
+desc formatted char_tbl1;
+
+alter view char_view SET TBLPROPERTIES('yes'='no');
+desc formatted char_view;
+
+alter table char_tbl1 UNSET TBLPROPERTIES('yes');
+desc formatted char_tbl1;
+
+alter view char_view UNSET TBLPROPERTIES('yes');
+desc formatted char_view;
+
+alter table char_tbl1 SET SERDEPROPERTIES('yes'='no');
+desc formatted char_tbl1;
+
+create table char_part(c1 char(5), c2 char(2), v1 varchar(6), v2 varchar(2)) using parquet partitioned by (v2, c2);
+desc formatted char_part;
+
+alter table char_part add partition (v2='ke', c2='nt') location 'loc1';
+desc formatted char_part;
+
+alter table char_part partition (v2='ke') rename to partition (v2='nt');
+desc formatted char_part;
+
+alter table char_part partition (v2='ke', c2='nt') set location 'loc2';
+desc formatted char_part;
+
+MSCK REPAIR TABLE char_part;
+desc formatted char_part;
diff --git a/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out b/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out
new file mode 100644
index 0000000..774235e
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out
@@ -0,0 +1,663 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 45
+
+
+-- !query
+create table char_tbl(c char(5), v varchar(6)) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+desc formatted char_tbl
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c char(5)
+v varchar(6)
+
+# Detailed Table Information
+Database default
+Table char_tbl
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type MANAGED
+Provider parquet
+Location [not included in comparison]/{warehouse_dir}/char_tbl
+
+
+-- !query
+desc formatted char_tbl c
+-- !query schema
+struct<info_name:string,info_value:string>
+-- !query output
+col_name c
+data_type char(5)
+comment NULL
+min NULL
+max NULL
+num_nulls NULL
+distinct_count NULL
+avg_col_len NULL
+max_col_len NULL
+histogram NULL
+
+
+-- !query
+show create table char_tbl
+-- !query schema
+struct<createtab_stmt:string>
+-- !query output
+CREATE TABLE `default`.`char_tbl` (
+ `c` CHAR(5),
+ `v` VARCHAR(6))
+USING parquet
+
+
+-- !query
+create table char_tbl2 using parquet as select * from char_tbl
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+show create table char_tbl2
+-- !query schema
+struct<createtab_stmt:string>
+-- !query output
+CREATE TABLE `default`.`char_tbl2` (
+ `c` CHAR(5),
+ `v` VARCHAR(6))
+USING parquet
+
+
+-- !query
+desc formatted char_tbl2
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c char(5)
+v varchar(6)
+
+# Detailed Table Information
+Database default
+Table char_tbl2
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type MANAGED
+Provider parquet
+Location [not included in comparison]/{warehouse_dir}/char_tbl2
+
+
+-- !query
+desc formatted char_tbl2 c
+-- !query schema
+struct<info_name:string,info_value:string>
+-- !query output
+col_name c
+data_type char(5)
+comment NULL
+min NULL
+max NULL
+num_nulls NULL
+distinct_count NULL
+avg_col_len NULL
+max_col_len NULL
+histogram NULL
+
+
+-- !query
+create table char_tbl3 like char_tbl
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+desc formatted char_tbl3
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c char(5)
+v varchar(6)
+
+# Detailed Table Information
+Database default
+Table char_tbl3
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type MANAGED
+Provider parquet
+Location [not included in comparison]/{warehouse_dir}/char_tbl3
+
+
+-- !query
+desc formatted char_tbl3 c
+-- !query schema
+struct<info_name:string,info_value:string>
+-- !query output
+col_name c
+data_type char(5)
+comment NULL
+min NULL
+max NULL
+num_nulls NULL
+distinct_count NULL
+avg_col_len NULL
+max_col_len NULL
+histogram NULL
+
+
+-- !query
+show create table char_tbl3
+-- !query schema
+struct<createtab_stmt:string>
+-- !query output
+CREATE TABLE `default`.`char_tbl3` (
+ `c` CHAR(5),
+ `v` VARCHAR(6))
+USING parquet
+
+
+-- !query
+create view char_view as select * from char_tbl
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+desc formatted char_view
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c char(5)
+v varchar(6)
+
+# Detailed Table Information
+Database default
+Table char_view
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type VIEW
+View Text select * from char_tbl
+View Original Text select * from char_tbl
+View Catalog and Namespace spark_catalog.default
+View Query Output Columns [c, v]
+Table Properties [view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=default, view.query.out.col.0=c, view.query.out.col.1=v, view.query.out.numCols=2, view.referredTempFunctionsNames=[], view.referredTempViewNames=[]]
+
+
+-- !query
+desc formatted char_view c
+-- !query schema
+struct<info_name:string,info_value:string>
+-- !query output
+col_name c
+data_type char(5)
+comment NULL
+min NULL
+max NULL
+num_nulls NULL
+distinct_count NULL
+avg_col_len NULL
+max_col_len NULL
+histogram NULL
+
+
+-- !query
+show create table char_view
+-- !query schema
+struct<createtab_stmt:string>
+-- !query output
+CREATE VIEW `default`.`char_view` (
+ `c`,
+ `v`)
+AS select * from char_tbl
+
+
+-- !query
+alter table char_tbl rename to char_tbl1
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+desc formatted char_tbl1
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c char(5)
+v varchar(6)
+
+# Detailed Table Information
+Database default
+Table char_tbl1
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type MANAGED
+Provider parquet
+Location [not included in comparison]/{warehouse_dir}/char_tbl1
+
+
+-- !query
+alter table char_tbl1 change column c type char(6)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+ALTER TABLE CHANGE COLUMN is not supported for changing column 'c' with type 'CharType(5)' to 'c' with type 'CharType(6)'
+
+
+-- !query
+alter table char_tbl1 change column c type char(5)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+desc formatted char_tbl1
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c char(5)
+v varchar(6)
+
+# Detailed Table Information
+Database default
+Table char_tbl1
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type MANAGED
+Provider parquet
+Location [not included in comparison]/{warehouse_dir}/char_tbl1
+
+
+-- !query
+alter table char_tbl1 add columns (d char(5))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+desc formatted char_tbl1
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c char(5)
+v varchar(6)
+d char(5)
+
+# Detailed Table Information
+Database default
+Table char_tbl1
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type MANAGED
+Provider parquet
+Location [not included in comparison]/{warehouse_dir}/char_tbl1
+
+
+-- !query
+alter view char_view as select * from char_tbl2
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+desc formatted char_view
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c char(5)
+v varchar(6)
+
+# Detailed Table Information
+Database default
+Table char_view
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type VIEW
+View Text select * from char_tbl2
+View Original Text select * from char_tbl2
+View Catalog and Namespace spark_catalog.default
+View Query Output Columns [c, v]
+Table Properties [view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=default, view.query.out.col.0=c, view.query.out.col.1=v, view.query.out.numCols=2, view.referredTempFunctionsNames=[], view.referredTempViewNames=[]]
+
+
+-- !query
+alter table char_tbl1 SET TBLPROPERTIES('yes'='no')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+desc formatted char_tbl1
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c char(5)
+v varchar(6)
+d char(5)
+
+# Detailed Table Information
+Database default
+Table char_tbl1
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type MANAGED
+Provider parquet
+Table Properties [yes=no]
+Location [not included in comparison]/{warehouse_dir}/char_tbl1
+
+
+-- !query
+alter view char_view SET TBLPROPERTIES('yes'='no')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+desc formatted char_view
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c char(5)
+v varchar(6)
+
+# Detailed Table Information
+Database default
+Table char_view
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type VIEW
+View Text select * from char_tbl2
+View Original Text select * from char_tbl2
+View Catalog and Namespace spark_catalog.default
+View Query Output Columns [c, v]
+Table Properties [view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=default, view.query.out.col.0=c, view.query.out.col.1=v, view.query.out.numCols=2, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], yes=no]
+
+
+-- !query
+alter table char_tbl1 UNSET TBLPROPERTIES('yes')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+desc formatted char_tbl1
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c char(5)
+v varchar(6)
+d char(5)
+
+# Detailed Table Information
+Database default
+Table char_tbl1
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type MANAGED
+Provider parquet
+Location [not included in comparison]/{warehouse_dir}/char_tbl1
+
+
+-- !query
+alter view char_view UNSET TBLPROPERTIES('yes')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+desc formatted char_view
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c char(5)
+v varchar(6)
+
+# Detailed Table Information
+Database default
+Table char_view
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type VIEW
+View Text select * from char_tbl2
+View Original Text select * from char_tbl2
+View Catalog and Namespace spark_catalog.default
+View Query Output Columns [c, v]
+Table Properties [view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=default, view.query.out.col.0=c, view.query.out.col.1=v, view.query.out.numCols=2, view.referredTempFunctionsNames=[], view.referredTempViewNames=[]]
+
+
+-- !query
+alter table char_tbl1 SET SERDEPROPERTIES('yes'='no')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+desc formatted char_tbl1
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c char(5)
+v varchar(6)
+d char(5)
+
+# Detailed Table Information
+Database default
+Table char_tbl1
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type MANAGED
+Provider parquet
+Location [not included in comparison]/{warehouse_dir}/char_tbl1
+Storage Properties [yes=no]
+
+
+-- !query
+create table char_part(c1 char(5), c2 char(2), v1 varchar(6), v2 varchar(2)) using parquet partitioned by (v2, c2)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+desc formatted char_part
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1 char(5)
+v1 varchar(6)
+v2 varchar(2)
+c2 char(2)
+# Partition Information
+# col_name data_type comment
+v2 varchar(2)
+c2 char(2)
+
+# Detailed Table Information
+Database default
+Table char_part
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type MANAGED
+Provider parquet
+Location [not included in comparison]/{warehouse_dir}/char_part
+Partition Provider Catalog
+
+
+-- !query
+alter table char_part add partition (v2='ke', c2='nt') location 'loc1'
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+desc formatted char_part
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1 char(5)
+v1 varchar(6)
+v2 varchar(2)
+c2 char(2)
+# Partition Information
+# col_name data_type comment
+v2 varchar(2)
+c2 char(2)
+
+# Detailed Table Information
+Database default
+Table char_part
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type MANAGED
+Provider parquet
+Location [not included in comparison]/{warehouse_dir}/char_part
+Partition Provider Catalog
+
+
+-- !query
+alter table char_part partition (v2='ke') rename to partition (v2='nt')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Partition spec is invalid. The spec (v2) must match the partition spec (v2, c2) defined in table '`default`.`char_part`'
+
+
+-- !query
+desc formatted char_part
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1 char(5)
+v1 varchar(6)
+v2 varchar(2)
+c2 char(2)
+# Partition Information
+# col_name data_type comment
+v2 varchar(2)
+c2 char(2)
+
+# Detailed Table Information
+Database default
+Table char_part
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type MANAGED
+Provider parquet
+Location [not included in comparison]/{warehouse_dir}/char_part
+Partition Provider Catalog
+
+
+-- !query
+alter table char_part partition (v2='ke', c2='nt') set location 'loc2'
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+desc formatted char_part
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1 char(5)
+v1 varchar(6)
+v2 varchar(2)
+c2 char(2)
+# Partition Information
+# col_name data_type comment
+v2 varchar(2)
+c2 char(2)
+
+# Detailed Table Information
+Database default
+Table char_part
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type MANAGED
+Provider parquet
+Location [not included in comparison]/{warehouse_dir}/char_part
+Partition Provider Catalog
+
+
+-- !query
+MSCK REPAIR TABLE char_part
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+desc formatted char_part
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1 char(5)
+v1 varchar(6)
+v2 varchar(2)
+c2 char(2)
+# Partition Information
+# col_name data_type comment
+v2 varchar(2)
+c2 char(2)
+
+# Detailed Table Information
+Database default
+Table char_part
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type MANAGED
+Provider parquet
+Location [not included in comparison]/{warehouse_dir}/char_part
+Partition Provider Catalog
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CharVarcharDDLTestBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CharVarcharDDLTestBase.scala
index 748dd7e..1f47744 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CharVarcharDDLTestBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CharVarcharDDLTestBase.scala
@@ -18,7 +18,7 @@
package org.apache.spark.sql.execution.command
import org.apache.spark.SparkConf
-import org.apache.spark.sql.{AnalysisException, QueryTest}
+import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
import org.apache.spark.sql.catalyst.util.CharVarcharUtils
import org.apache.spark.sql.connector.InMemoryPartitionTableCatalog
import org.apache.spark.sql.internal.SQLConf
@@ -97,6 +97,26 @@ trait CharVarcharDDLTestBase extends QueryTest with SQLTestUtils {
assert(v1 || v2)
}
}
+
+ def checkTableSchemaTypeStr(expected: Seq[Row]): Unit = {
+ checkAnswer(sql("desc t").selectExpr("data_type").where("data_type like '%char%'"), expected)
+ }
+
+ test("SPARK-33901: alter table add columns should not change original table's schema") {
+ withTable("t") {
+ sql(s"CREATE TABLE t(i CHAR(5), c VARCHAR(4)) USING $format")
+ sql("ALTER TABLE t ADD COLUMNS (d VARCHAR(5))")
+ checkTableSchemaTypeStr(Seq(Row("char(5)"), Row("varchar(4)"), Row("varchar(5)")))
+ }
+ }
+
+ test("SPARK-33901: ctas should should not change table's schema") {
+ withTable("t", "tt") {
+ sql(s"CREATE TABLE tt(i CHAR(5), c VARCHAR(4)) USING $format")
+ sql(s"CREATE TABLE t USING $format AS SELECT * FROM tt")
+ checkTableSchemaTypeStr(Seq(Row("char(5)"), Row("varchar(4)")))
+ }
+ }
}
class FileSourceCharVarcharDDLTestSuite extends CharVarcharDDLTestBase with SharedSparkSession {
@@ -104,6 +124,26 @@ class FileSourceCharVarcharDDLTestSuite extends CharVarcharDDLTestBase with Shar
override protected def sparkConf: SparkConf = {
super.sparkConf.set(SQLConf.USE_V1_SOURCE_LIST, "parquet")
}
+
+ // TODO(SPARK-33902): MOVE TO SUPER CLASS AFTER THE TARGET TICKET RESOLVED
+ test("SPARK-33901: create table like should should not change table's schema") {
+ withTable("t", "tt") {
+ sql(s"CREATE TABLE tt(i CHAR(5), c VARCHAR(4)) USING $format")
+ sql("CREATE TABLE t LIKE tt")
+ checkTableSchemaTypeStr(Seq(Row("char(5)"), Row("varchar(4)")))
+ }
+ }
+
+ // TODO(SPARK-33903): MOVE TO SUPER CLASS AFTER THE TARGET TICKET RESOLVED
+ test("SPARK-33901: cvas should should not change view's schema") {
+ withTable( "tt") {
+ sql(s"CREATE TABLE tt(i CHAR(5), c VARCHAR(4)) USING $format")
+ withView("t") {
+ sql("CREATE VIEW t AS SELECT * FROM tt")
+ checkTableSchemaTypeStr(Seq(Row("char(5)"), Row("varchar(4)")))
+ }
+ }
+ }
}
class DSV2CharVarcharDDLTestSuite extends CharVarcharDDLTestBase
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala
index 9f79997..ccaa450 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala
@@ -22,6 +22,7 @@ import scala.util.control.NonFatal
import org.apache.spark.sql.{AnalysisException, Row, SaveMode, SparkSession}
import org.apache.spark.sql.catalyst.catalog.{CatalogTable, SessionCatalog}
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.util.CharVarcharUtils
import org.apache.spark.sql.execution.SparkPlan
import org.apache.spark.sql.execution.command.{DataWritingCommand, DDLUtils}
import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, InsertIntoHadoopFsRelationCommand, LogicalRelation}
@@ -58,9 +59,10 @@ trait CreateHiveTableAsSelectBase extends DataWritingCommand {
// TODO ideally, we should get the output data ready first and then
// add the relation into catalog, just in case of failure occurs while data
// processing.
+ val tableSchema = CharVarcharUtils.getRawSchema(outputColumns.toStructType)
assert(tableDesc.schema.isEmpty)
catalog.createTable(
- tableDesc.copy(schema = outputColumns.toStructType), ignoreIfExists = false)
+ tableDesc.copy(schema = tableSchema), ignoreIfExists = false)
try {
// Read back the metadata of the table which was created just now.
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org