You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2016/09/03 16:16:13 UTC

spark git commit: [SPARK-16663][SQL] desc table should be consistent between data source and hive serde tables

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 a7f5e7066 -> 3500dbc9b


[SPARK-16663][SQL] desc table should be consistent between data source and hive serde tables

Currently there are 2 inconsistence:

1. for data source table, we only print partition names, for hive table, we also print partition schema. After this PR, we will always print schema
2. if column doesn't have comment, data source table will print empty string, hive table will print null. After this PR, we will always print null

new test in `HiveDDLSuite`

Author: Wenchen Fan <we...@databricks.com>

Closes #14302 from cloud-fan/minor3.

(cherry picked from commit a2abb583caaec9a2cecd5d65b05d172fc096c125)
Signed-off-by: Wenchen Fan <we...@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3500dbc9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3500dbc9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3500dbc9

Branch: refs/heads/branch-2.0
Commit: 3500dbc9bcce243b6656f308ee4941de0350d198
Parents: a7f5e70
Author: Wenchen Fan <we...@databricks.com>
Authored: Tue Jul 26 18:46:12 2016 +0800
Committer: Wenchen Fan <we...@databricks.com>
Committed: Sun Sep 4 00:15:57 2016 +0800

----------------------------------------------------------------------
 .../spark/sql/execution/command/tables.scala    | 11 +++----
 .../apache/spark/sql/sources/DDLTestSuite.scala | 30 ++++++++++----------
 .../sql/hive/MetastoreDataSourcesSuite.scala    |  2 +-
 .../spark/sql/hive/execution/HiveDDLSuite.scala | 30 +++++++++++++++-----
 .../sql/hive/execution/HiveQuerySuite.scala     |  4 +--
 5 files changed, 47 insertions(+), 30 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/3500dbc9/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index a5ccbcf..7e6a352 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -436,11 +436,12 @@ case class DescribeTableCommand(table: TableIdentifier, isExtended: Boolean, isF
 
   private def describePartitionInfo(table: CatalogTable, buffer: ArrayBuffer[Row]): Unit = {
     if (DDLUtils.isDatasourceTable(table)) {
-      val partCols = DDLUtils.getPartitionColumnsFromTableProperties(table)
-      if (partCols.nonEmpty) {
+      val userSpecifiedSchema = DDLUtils.getSchemaFromTableProperties(table)
+      val partColNames = DDLUtils.getPartitionColumnsFromTableProperties(table)
+      for (schema <- userSpecifiedSchema if partColNames.nonEmpty) {
         append(buffer, "# Partition Information", "", "")
-        append(buffer, s"# ${output.head.name}", "", "")
-        partCols.foreach(col => append(buffer, col, "", ""))
+        append(buffer, s"# ${output.head.name}", output(1).name, output(2).name)
+        describeSchema(StructType(partColNames.map(schema(_))), buffer)
       }
     } else {
       if (table.partitionColumns.nonEmpty) {
@@ -527,7 +528,7 @@ case class DescribeTableCommand(table: TableIdentifier, isExtended: Boolean, isF
   private def describeSchema(schema: StructType, buffer: ArrayBuffer[Row]): Unit = {
     schema.foreach { column =>
       val comment =
-        if (column.metadata.contains("comment")) column.metadata.getString("comment") else ""
+        if (column.metadata.contains("comment")) column.metadata.getString("comment") else null
       append(buffer, column.name, column.dataType.simpleString, comment)
     }
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/3500dbc9/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala
index 5a7a907..c2aedff 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala
@@ -98,21 +98,21 @@ class DDLTestSuite extends DataSourceTest with SharedSQLContext {
       "describe ddlPeople",
       Seq(
         Row("intType", "int", "test comment test1"),
-        Row("stringType", "string", ""),
-        Row("dateType", "date", ""),
-        Row("timestampType", "timestamp", ""),
-        Row("doubleType", "double", ""),
-        Row("bigintType", "bigint", ""),
-        Row("tinyintType", "tinyint", ""),
-        Row("decimalType", "decimal(10,0)", ""),
-        Row("fixedDecimalType", "decimal(5,1)", ""),
-        Row("binaryType", "binary", ""),
-        Row("booleanType", "boolean", ""),
-        Row("smallIntType", "smallint", ""),
-        Row("floatType", "float", ""),
-        Row("mapType", "map<string,string>", ""),
-        Row("arrayType", "array<string>", ""),
-        Row("structType", "struct<f1:string,f2:int>", "")
+        Row("stringType", "string", null),
+        Row("dateType", "date", null),
+        Row("timestampType", "timestamp", null),
+        Row("doubleType", "double", null),
+        Row("bigintType", "bigint", null),
+        Row("tinyintType", "tinyint", null),
+        Row("decimalType", "decimal(10,0)", null),
+        Row("fixedDecimalType", "decimal(5,1)", null),
+        Row("binaryType", "binary", null),
+        Row("booleanType", "boolean", null),
+        Row("smallIntType", "smallint", null),
+        Row("floatType", "float", null),
+        Row("mapType", "map<string,string>", null),
+        Row("arrayType", "array<string>", null),
+        Row("structType", "struct<f1:string,f2:int>", null)
       ))
 
   test("SPARK-7686 DescribeCommand should have correct physical plan output attributes") {

http://git-wip-us.apache.org/repos/asf/spark/blob/3500dbc9/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index af071f9..a0b3b37 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -748,7 +748,7 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
       assert(schema === actualSchema)
 
       // Checks the DESCRIBE output.
-      checkAnswer(sql("DESCRIBE spark6655"), Row("int", "int", "") :: Nil)
+      checkAnswer(sql("DESCRIBE spark6655"), Row("int", "int", null) :: Nil)
     }
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/3500dbc9/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 1416409..3cf3c6a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -472,6 +472,22 @@ class HiveDDLSuite
     }
   }
 
+  test("desc table for Hive table - partitioned table") {
+    withTable("tbl") {
+      sql("CREATE TABLE tbl(a int) PARTITIONED BY (b int)")
+
+      assert(sql("DESC tbl").collect().containsSlice(
+        Seq(
+          Row("a", "int", null),
+          Row("b", "int", null),
+          Row("# Partition Information", "", ""),
+          Row("# col_name", "data_type", "comment"),
+          Row("b", "int", null)
+        )
+      ))
+    }
+  }
+
   test("desc table for data source table using Hive Metastore") {
     assume(spark.sparkContext.conf.get(CATALOG_IMPLEMENTATION) == "hive")
     val tabName = "tab1"
@@ -677,7 +693,7 @@ class HiveDDLSuite
 
           val desc = sql("DESC FORMATTED t1").collect().toSeq
 
-          assert(desc.contains(Row("id", "bigint", "")))
+          assert(desc.contains(Row("id", "bigint", null)))
         }
       }
     }
@@ -694,13 +710,13 @@ class HiveDDLSuite
 
       assert(formattedDesc.containsSlice(
         Seq(
-          Row("a", "bigint", ""),
-          Row("b", "bigint", ""),
-          Row("c", "bigint", ""),
-          Row("d", "bigint", ""),
+          Row("a", "bigint", null),
+          Row("b", "bigint", null),
+          Row("c", "bigint", null),
+          Row("d", "bigint", null),
           Row("# Partition Information", "", ""),
-          Row("# col_name", "", ""),
-          Row("d", "", ""),
+          Row("# col_name", "data_type", "comment"),
+          Row("d", "bigint", null),
           Row("", "", ""),
           Row("# Detailed Table Information", "", ""),
           Row("Database:", "default", "")

http://git-wip-us.apache.org/repos/asf/spark/blob/3500dbc9/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index db6c64a..55d7905 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -825,8 +825,8 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
 
     assertResult(
       Array(
-        Row("a", "int", ""),
-        Row("b", "string", ""))
+        Row("a", "int", null),
+        Row("b", "string", null))
     ) {
       sql("DESCRIBE test_describe_commands2")
         .select('col_name, 'data_type, 'comment)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org