You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2022/08/03 17:37:24 UTC
[spark] branch master updated: [SPARK-39936][SQL] Store schema in properties for Spark Views

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 047c7c2ed9f [SPARK-39936][SQL] Store schema in properties for Spark Views
047c7c2ed9f is described below

commit 047c7c2ed9fe1dc8afe290c444288d1afabb66b3
Author: Jeffrey Chen <je...@databricks.com>
AuthorDate: Thu Aug 4 01:36:59 2022 +0800

    [SPARK-39936][SQL] Store schema in properties for Spark Views
    
    ### What changes were proposed in this pull request?
    
    We normally store the tableSchema in table properties rather than as an actual schema. We normally do this as it helps bypass some hive-metastore issues including the parsing with the SparkSQLDataType parser.
    
    However, the issue is that we were not emptying out the tableSchema for spark views (which are incompatible with Hive to start with). Thus, the hive DataType parser would try to parse the schema and it would break. The fix is to empty out the tableSchema for spark views when saving them to Hive Metastore.
    
    ### Why are the changes needed?
    
    This fixes the following bug:
    ```
    -- this works since we use backticks in `date-of-creation`
    create table table_with_hyphen (f array<struct<validColumnTypeName:string>>) using parquet;
    -- this should work since we use backticks in `date-of-creation` but will break without this bugfix
    create or replace view view_with_hyphen as select f from table_with_hyphen;
    ```
    
    ### Does this PR introduce _any_ user-facing change?
    
    Bugfix as described above.
    
    ### How was this patch tested?
    
    Tested in spark and added a unit test.
    
    Closes #37364 from Jeffreychen99/SPARK-39936.
    
    Authored-by: Jeffrey Chen <je...@databricks.com>
    Signed-off-by: Wenchen Fan <we...@databricks.com>
---
 .../org/apache/spark/sql/hive/HiveExternalCatalog.scala      | 12 +++++++++++-
 .../org/apache/spark/sql/hive/HiveParquetSourceSuite.scala   |  7 +++++++
 .../org/apache/spark/sql/hive/execution/HiveDDLSuite.scala   |  3 +--
 3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index 32e45391179..00803e3fbe5 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -284,7 +284,17 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
         // about not case preserving and make Hive serde table and view support mixed-case column
         // names.
         properties = tableDefinition.properties ++ tableMetaToTableProps(tableDefinition))
-      client.createTable(tableWithDataSourceProps, ignoreIfExists)
+      try {
+        client.createTable(tableWithDataSourceProps, ignoreIfExists)
+      } catch {
+        case NonFatal(e) if (tableDefinition.tableType == CatalogTableType.VIEW) =>
+          // If for some reason we fail to store the schema we store it as empty there
+          // since we already store the real schema in the table properties. This try-catch
+          // should only be necessary for Spark views which are incompatible with Hive
+          client.createTable(
+            tableWithDataSourceProps.copy(schema = EMPTY_DATA_SCHEMA),
+            ignoreIfExists)
+      }
     }
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala
index 5778b259c7d..7c67f34560e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala
@@ -378,4 +378,11 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest with ParquetTest {
       }
     }
   }
+
+  test("Create view with dashes in column type") {
+    withView("t") {
+      sql("CREATE VIEW t AS SELECT STRUCT('a' AS `$a`, 1 AS b) q")
+      checkAnswer(spark.table("t"), Row(Row("a", 1)))
+    }
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 4b28e16928f..9fd2e6e7f62 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -162,8 +162,7 @@ class HiveCatalogedDDLSuite extends DDLSuite with TestHiveSingleton with BeforeA
   test("SPARK-22431: illegal nested type") {
     val queries = Seq(
       "CREATE TABLE t USING hive AS SELECT STRUCT('a' AS `$a`, 1 AS b) q",
-      "CREATE TABLE t(q STRUCT<`$a`:INT, col2:STRING>, i1 INT) USING hive",
-      "CREATE VIEW t AS SELECT STRUCT('a' AS `$a`, 1 AS b) q")
+      "CREATE TABLE t(q STRUCT<`$a`:INT, col2:STRING>, i1 INT) USING hive")
 
     queries.foreach(query => {
       val err = intercept[SparkException] {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org