You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2022/07/25 16:25:01 UTC
[spark] branch master updated: [SPARK-39852][SQL][TESTS] Unify v1 and v2 `DESCRIBE TABLE` tests for columns
This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new c0b86fe8d37 [SPARK-39852][SQL][TESTS] Unify v1 and v2 `DESCRIBE TABLE` tests for columns
c0b86fe8d37 is described below
commit c0b86fe8d378be35f6997c50858c3e2f7c8cc43e
Author: Max Gekk <ma...@gmail.com>
AuthorDate: Mon Jul 25 21:24:40 2022 +0500
[SPARK-39852][SQL][TESTS] Unify v1 and v2 `DESCRIBE TABLE` tests for columns
### What changes were proposed in this pull request?
1. Move `DESCRIBE TABLE` parsing tests for columns to `DescribeRelationParserSuite`.
2. Move the column tests for `DESCRIBE TABLE` from `describe-table-column.sql` to v1 `DescribeTableSuite`.
### Why are the changes needed?
1. The unification will allow to run common `DESCRIBE TABLE` columns tests for both DSv1/Hive DSv1 and DSv2
2. We can detect missing features and differences between DSv1 and DSv2 implementations.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
By running the modified test suites:
```
$ build/sbt "testOnly *DDLParserSuite"
$ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z describe-table-column.sql"
```
and new test suites:
```
$ build/sbt "sql/test:testOnly *DescribeTableParserSuite"
$ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *DescribeTableSuite"
```
Closes #37266 from MaxGekk/test-describe-table-columns.
Authored-by: Max Gekk <ma...@gmail.com>
Signed-off-by: Max Gekk <ma...@gmail.com>
---
.../spark/sql/catalyst/parser/DDLParserSuite.scala | 44 ---
.../sql-tests/inputs/describe-table-column.sql | 70 -----
.../results/describe-table-column.sql.out | 330 ---------------------
.../command/DescribeTableParserSuite.scala | 49 ++-
.../execution/command/DescribeTableSuiteBase.scala | 53 ++++
.../execution/command/v1/DescribeTableSuite.scala | 119 ++++++++
.../execution/command/v2/DescribeTableSuite.scala | 65 ++++
7 files changed, 284 insertions(+), 446 deletions(-)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index eb3e9baaacd..3a5f0bb6297 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -1169,50 +1169,6 @@ class DDLParserSuite extends AnalysisTest {
isView = true))
}
- test("describe table column") {
- comparePlans(parsePlan("DESCRIBE t col"),
- DescribeColumn(
- UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true),
- UnresolvedAttribute(Seq("col")),
- isExtended = false))
- comparePlans(parsePlan("DESCRIBE t `abc.xyz`"),
- DescribeColumn(
- UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true),
- UnresolvedAttribute(Seq("abc.xyz")),
- isExtended = false))
- comparePlans(parsePlan("DESCRIBE t abc.xyz"),
- DescribeColumn(
- UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true),
- UnresolvedAttribute(Seq("abc", "xyz")),
- isExtended = false))
- comparePlans(parsePlan("DESCRIBE t `a.b`.`x.y`"),
- DescribeColumn(
- UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true),
- UnresolvedAttribute(Seq("a.b", "x.y")),
- isExtended = false))
-
- comparePlans(parsePlan("DESCRIBE TABLE t col"),
- DescribeColumn(
- UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true),
- UnresolvedAttribute(Seq("col")),
- isExtended = false))
- comparePlans(parsePlan("DESCRIBE TABLE EXTENDED t col"),
- DescribeColumn(
- UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true),
- UnresolvedAttribute(Seq("col")),
- isExtended = true))
- comparePlans(parsePlan("DESCRIBE TABLE FORMATTED t col"),
- DescribeColumn(
- UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true),
- UnresolvedAttribute(Seq("col")),
- isExtended = true))
-
- val caught = intercept[AnalysisException](
- parsePlan("DESCRIBE TABLE t PARTITION (ds='1970-01-01') col"))
- assert(caught.getMessage.contains(
- "The feature is not supported: DESC TABLE COLUMN for a specific partition."))
- }
-
test("insert table: basic append") {
Seq(
"INSERT INTO TABLE testcat.ns1.ns2.tbl SELECT * FROM source",
diff --git a/sql/core/src/test/resources/sql-tests/inputs/describe-table-column.sql b/sql/core/src/test/resources/sql-tests/inputs/describe-table-column.sql
deleted file mode 100644
index 146977c8061..00000000000
--- a/sql/core/src/test/resources/sql-tests/inputs/describe-table-column.sql
+++ /dev/null
@@ -1,70 +0,0 @@
--- Test temp table
-CREATE TEMPORARY VIEW desc_col_temp_view (key int COMMENT 'column_comment', col struct<x:int, y:string>) USING PARQUET;
-
-DESC desc_col_temp_view key;
-
-DESC EXTENDED desc_col_temp_view key;
-
-DESC FORMATTED desc_col_temp_view key;
-
--- Describe a column with qualified name
-DESC FORMATTED desc_col_temp_view desc_col_temp_view.key;
-
--- Describe a non-existent column
-DESC desc_col_temp_view key1;
-
--- Describe a nested column
-DESC desc_col_temp_view col.x;
-
--- Test persistent table
-CREATE TABLE desc_col_table (key int COMMENT 'column_comment') USING PARQUET;
-
-ANALYZE TABLE desc_col_table COMPUTE STATISTICS FOR COLUMNS key;
-
-DESC desc_col_table key;
-
-DESC EXTENDED desc_col_table key;
-
-DESC FORMATTED desc_col_table key;
-
--- Describe a non-existent column
-DESC desc_col_table key1;
-
--- Test complex columns
-CREATE TABLE desc_complex_col_table (`a.b` int, col struct<x:int, y:string>) USING PARQUET;
-
-DESC FORMATTED desc_complex_col_table `a.b`;
-
-DESC FORMATTED desc_complex_col_table col;
-
--- Describe a nested column
-DESC FORMATTED desc_complex_col_table col.x;
-
--- Test output for histogram statistics
-SET spark.sql.statistics.histogram.enabled=true;
-SET spark.sql.statistics.histogram.numBins=2;
-
-INSERT INTO desc_col_table values 1, 2, 3, 4;
-
-ANALYZE TABLE desc_col_table COMPUTE STATISTICS FOR COLUMNS key;
-
-DESC EXTENDED desc_col_table key;
-
-DROP VIEW desc_col_temp_view;
-
-DROP TABLE desc_col_table;
-
-DROP TABLE desc_complex_col_table;
-
---Test case insensitive
-
-CREATE TABLE customer(CName STRING) USING PARQUET;
-
-INSERT INTO customer VALUES('Maria');
-
-ANALYZE TABLE customer COMPUTE STATISTICS FOR COLUMNS cname;
-
-DESC EXTENDED customer cname;
-
-DROP TABLE customer;
-
diff --git a/sql/core/src/test/resources/sql-tests/results/describe-table-column.sql.out b/sql/core/src/test/resources/sql-tests/results/describe-table-column.sql.out
deleted file mode 100644
index f650d248425..00000000000
--- a/sql/core/src/test/resources/sql-tests/results/describe-table-column.sql.out
+++ /dev/null
@@ -1,330 +0,0 @@
--- Automatically generated by SQLQueryTestSuite
--- !query
-CREATE TEMPORARY VIEW desc_col_temp_view (key int COMMENT 'column_comment', col struct<x:int, y:string>) USING PARQUET
--- !query schema
-struct<>
--- !query output
-
-
-
--- !query
-DESC desc_col_temp_view key
--- !query schema
-struct<info_name:string,info_value:string>
--- !query output
-col_name key
-data_type int
-comment column_comment
-
-
--- !query
-DESC EXTENDED desc_col_temp_view key
--- !query schema
-struct<info_name:string,info_value:string>
--- !query output
-col_name key
-data_type int
-comment column_comment
-min NULL
-max NULL
-num_nulls NULL
-distinct_count NULL
-avg_col_len NULL
-max_col_len NULL
-histogram NULL
-
-
--- !query
-DESC FORMATTED desc_col_temp_view key
--- !query schema
-struct<info_name:string,info_value:string>
--- !query output
-col_name key
-data_type int
-comment column_comment
-min NULL
-max NULL
-num_nulls NULL
-distinct_count NULL
-avg_col_len NULL
-max_col_len NULL
-histogram NULL
-
-
--- !query
-DESC FORMATTED desc_col_temp_view desc_col_temp_view.key
--- !query schema
-struct<info_name:string,info_value:string>
--- !query output
-col_name key
-data_type int
-comment column_comment
-min NULL
-max NULL
-num_nulls NULL
-distinct_count NULL
-avg_col_len NULL
-max_col_len NULL
-histogram NULL
-
-
--- !query
-DESC desc_col_temp_view key1
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.AnalysisException
-Column key1 does not exist
-
-
--- !query
-DESC desc_col_temp_view col.x
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.AnalysisException
-DESC TABLE COLUMN does not support nested column: col.x
-
-
--- !query
-CREATE TABLE desc_col_table (key int COMMENT 'column_comment') USING PARQUET
--- !query schema
-struct<>
--- !query output
-
-
-
--- !query
-ANALYZE TABLE desc_col_table COMPUTE STATISTICS FOR COLUMNS key
--- !query schema
-struct<>
--- !query output
-
-
-
--- !query
-DESC desc_col_table key
--- !query schema
-struct<info_name:string,info_value:string>
--- !query output
-col_name key
-data_type int
-comment column_comment
-
-
--- !query
-DESC EXTENDED desc_col_table key
--- !query schema
-struct<info_name:string,info_value:string>
--- !query output
-col_name key
-data_type int
-comment column_comment
-min NULL
-max NULL
-num_nulls 0
-distinct_count 0
-avg_col_len 4
-max_col_len 4
-histogram NULL
-
-
--- !query
-DESC FORMATTED desc_col_table key
--- !query schema
-struct<info_name:string,info_value:string>
--- !query output
-col_name key
-data_type int
-comment column_comment
-min NULL
-max NULL
-num_nulls 0
-distinct_count 0
-avg_col_len 4
-max_col_len 4
-histogram NULL
-
-
--- !query
-DESC desc_col_table key1
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.AnalysisException
-Column key1 does not exist
-
-
--- !query
-CREATE TABLE desc_complex_col_table (`a.b` int, col struct<x:int, y:string>) USING PARQUET
--- !query schema
-struct<>
--- !query output
-
-
-
--- !query
-DESC FORMATTED desc_complex_col_table `a.b`
--- !query schema
-struct<info_name:string,info_value:string>
--- !query output
-col_name a.b
-data_type int
-comment NULL
-min NULL
-max NULL
-num_nulls NULL
-distinct_count NULL
-avg_col_len NULL
-max_col_len NULL
-histogram NULL
-
-
--- !query
-DESC FORMATTED desc_complex_col_table col
--- !query schema
-struct<info_name:string,info_value:string>
--- !query output
-col_name col
-data_type struct<x:int,y:string>
-comment NULL
-min NULL
-max NULL
-num_nulls NULL
-distinct_count NULL
-avg_col_len NULL
-max_col_len NULL
-histogram NULL
-
-
--- !query
-DESC FORMATTED desc_complex_col_table col.x
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.AnalysisException
-DESC TABLE COLUMN does not support nested column: col.x
-
-
--- !query
-SET spark.sql.statistics.histogram.enabled=true
--- !query schema
-struct<key:string,value:string>
--- !query output
-spark.sql.statistics.histogram.enabled true
-
-
--- !query
-SET spark.sql.statistics.histogram.numBins=2
--- !query schema
-struct<key:string,value:string>
--- !query output
-spark.sql.statistics.histogram.numBins 2
-
-
--- !query
-INSERT INTO desc_col_table values 1, 2, 3, 4
--- !query schema
-struct<>
--- !query output
-
-
-
--- !query
-ANALYZE TABLE desc_col_table COMPUTE STATISTICS FOR COLUMNS key
--- !query schema
-struct<>
--- !query output
-
-
-
--- !query
-DESC EXTENDED desc_col_table key
--- !query schema
-struct<info_name:string,info_value:string>
--- !query output
-col_name key
-data_type int
-comment column_comment
-min 1
-max 4
-num_nulls 0
-distinct_count 4
-avg_col_len 4
-max_col_len 4
-histogram height: 2.0, num_of_bins: 2
-bin_0 lower_bound: 1.0, upper_bound: 2.0, distinct_count: 2
-bin_1 lower_bound: 2.0, upper_bound: 4.0, distinct_count: 2
-
-
--- !query
-DROP VIEW desc_col_temp_view
--- !query schema
-struct<>
--- !query output
-
-
-
--- !query
-DROP TABLE desc_col_table
--- !query schema
-struct<>
--- !query output
-
-
-
--- !query
-DROP TABLE desc_complex_col_table
--- !query schema
-struct<>
--- !query output
-
-
-
--- !query
-CREATE TABLE customer(CName STRING) USING PARQUET
--- !query schema
-struct<>
--- !query output
-
-
-
--- !query
-INSERT INTO customer VALUES('Maria')
--- !query schema
-struct<>
--- !query output
-
-
-
--- !query
-ANALYZE TABLE customer COMPUTE STATISTICS FOR COLUMNS cname
--- !query schema
-struct<>
--- !query output
-
-
-
--- !query
-DESC EXTENDED customer cname
--- !query schema
-struct<info_name:string,info_value:string>
--- !query output
-col_name cname
-data_type string
-comment NULL
-min NULL
-max NULL
-num_nulls 0
-distinct_count 1
-avg_col_len 5
-max_col_len 5
-histogram NULL
-
-
--- !query
-DROP TABLE customer
--- !query schema
-struct<>
--- !query output
-
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableParserSuite.scala
index 58c71f9563f..5f3b3eda418 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableParserSuite.scala
@@ -17,9 +17,10 @@
package org.apache.spark.sql.execution.command
-import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedTableOrView}
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedAttribute, UnresolvedTableOrView}
import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
-import org.apache.spark.sql.catalyst.plans.logical.DescribeRelation
+import org.apache.spark.sql.catalyst.plans.logical.{DescribeColumn, DescribeRelation}
class DescribeTableParserSuite extends AnalysisTest {
test("SPARK-17328: Fix NPE with EXPLAIN DESCRIBE TABLE") {
@@ -36,4 +37,48 @@ class DescribeTableParserSuite extends AnalysisTest {
DescribeRelation(
UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true), Map.empty, isExtended = true))
}
+
+ test("describe table column") {
+ comparePlans(parsePlan("DESCRIBE t col"),
+ DescribeColumn(
+ UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true),
+ UnresolvedAttribute(Seq("col")),
+ isExtended = false))
+ comparePlans(parsePlan("DESCRIBE t `abc.xyz`"),
+ DescribeColumn(
+ UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true),
+ UnresolvedAttribute(Seq("abc.xyz")),
+ isExtended = false))
+ comparePlans(parsePlan("DESCRIBE t abc.xyz"),
+ DescribeColumn(
+ UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true),
+ UnresolvedAttribute(Seq("abc", "xyz")),
+ isExtended = false))
+ comparePlans(parsePlan("DESCRIBE t `a.b`.`x.y`"),
+ DescribeColumn(
+ UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true),
+ UnresolvedAttribute(Seq("a.b", "x.y")),
+ isExtended = false))
+
+ comparePlans(parsePlan("DESCRIBE TABLE t col"),
+ DescribeColumn(
+ UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true),
+ UnresolvedAttribute(Seq("col")),
+ isExtended = false))
+ comparePlans(parsePlan("DESCRIBE TABLE EXTENDED t col"),
+ DescribeColumn(
+ UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true),
+ UnresolvedAttribute(Seq("col")),
+ isExtended = true))
+ comparePlans(parsePlan("DESCRIBE TABLE FORMATTED t col"),
+ DescribeColumn(
+ UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true),
+ UnresolvedAttribute(Seq("col")),
+ isExtended = true))
+
+ val caught = intercept[AnalysisException](
+ parsePlan("DESCRIBE TABLE t PARTITION (ds='1970-01-01') col"))
+ assert(caught.getMessage.contains(
+ "The feature is not supported: DESC TABLE COLUMN for a specific partition."))
+ }
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableSuiteBase.scala
index 7ecc38848cf..c1b92699c6e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableSuiteBase.scala
@@ -120,4 +120,57 @@ trait DescribeTableSuiteBase extends QueryTest with DDLCommandTestUtils {
assert(isNullDataset.schema === expectedSchema.add("is_null", BooleanType, false))
}
}
+
+ test("describe a column") {
+ withNamespaceAndTable("ns", "tbl") { tbl =>
+ sql(s"""
+ |CREATE TABLE $tbl
+ |(key int COMMENT 'column_comment', col struct<x:int, y:string>)
+ |$defaultUsing""".stripMargin)
+ val descriptionDf = sql(s"DESC $tbl key")
+ assert(descriptionDf.schema.map(field => (field.name, field.dataType)) === Seq(
+ ("info_name", StringType),
+ ("info_value", StringType)))
+ QueryTest.checkAnswer(
+ descriptionDf,
+ Seq(
+ Row("col_name", "key"),
+ Row("data_type", "int"),
+ Row("comment", "column_comment")))
+ }
+ }
+
+ test("describe a column with fully qualified name") {
+ withNamespaceAndTable("ns", "tbl") { tbl =>
+ sql(s"CREATE TABLE $tbl (key int COMMENT 'comment1') $defaultUsing")
+ QueryTest.checkAnswer(
+ sql(s"DESC $tbl $tbl.key"),
+ Seq(Row("col_name", "key"), Row("data_type", "int"), Row("comment", "comment1")))
+ }
+ }
+
+ test("describe complex columns") {
+ withNamespaceAndTable("ns", "tbl") { tbl =>
+ sql(s"CREATE TABLE $tbl (`a.b` int, col struct<x:int, y:string>) $defaultUsing")
+ QueryTest.checkAnswer(
+ sql(s"DESC $tbl `a.b`"),
+ Seq(Row("col_name", "a.b"), Row("data_type", "int"), Row("comment", "NULL")))
+ QueryTest.checkAnswer(
+ sql(s"DESCRIBE $tbl col"),
+ Seq(
+ Row("col_name", "col"),
+ Row("data_type", "struct<x:int,y:string>"),
+ Row("comment", "NULL")))
+ }
+ }
+
+ test("describe a nested column") {
+ withNamespaceAndTable("ns", "tbl") { tbl =>
+ sql(s"CREATE TABLE $tbl (`a.b` int, col struct<x:int, y:string>) $defaultUsing")
+ val errMsg = intercept[AnalysisException] {
+ sql(s"DESCRIBE TABLE $tbl col.x")
+ }.getMessage
+ assert(errMsg === "DESC TABLE COLUMN does not support nested column: col.x")
+ }
+ }
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala
index 6ee01107fe2..f8e53fee723 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala
@@ -22,6 +22,7 @@ import java.util.Locale
import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
import org.apache.spark.sql.execution.command
+import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types.StringType
/**
@@ -48,6 +49,124 @@ trait DescribeTableSuiteBase extends command.DescribeTableSuiteBase
assert(e.message === "Partition not found in table 'table' database 'ns':\nid -> 1")
}
}
+
+ test("describe a non-existent column") {
+ withNamespaceAndTable("ns", "tbl") { tbl =>
+ sql(s"""
+ |CREATE TABLE $tbl
+ |(key int COMMENT 'column_comment', col struct<x:int, y:string>)
+ |$defaultUsing""".stripMargin)
+ val errMsg = intercept[AnalysisException] {
+ sql(s"DESC $tbl key1").collect()
+ }.getMessage
+ assert(errMsg === "Column key1 does not exist")
+ }
+ }
+
+ test("describe a column in case insensitivity") {
+ withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+ withNamespaceAndTable("ns", "tbl") { tbl =>
+ sql(s"CREATE TABLE $tbl (key int COMMENT 'comment1') $defaultUsing")
+ QueryTest.checkAnswer(
+ sql(s"DESC $tbl KEY"),
+ Seq(Row("col_name", "KEY"), Row("data_type", "int"), Row("comment", "comment1")))
+ }
+ }
+
+ withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+ withNamespaceAndTable("ns", "tbl") { tbl =>
+ sql(s"CREATE TABLE $tbl (key int COMMENT 'comment1') $defaultUsing")
+ val errMsg = intercept[AnalysisException] {
+ sql(s"DESC $tbl KEY").collect()
+ }.getMessage
+ assert(errMsg === "Column KEY does not exist")
+ }
+ }
+ }
+
+ test("describe extended (formatted) a column") {
+ withNamespaceAndTable("ns", "tbl") { tbl =>
+ sql(s"""
+ |CREATE TABLE $tbl
+ |(key INT COMMENT 'column_comment', col STRING)
+ |$defaultUsing""".stripMargin)
+ sql(s"INSERT INTO $tbl SELECT 1, 'a'")
+ sql(s"INSERT INTO $tbl SELECT 2, 'b'")
+ sql(s"INSERT INTO $tbl SELECT 3, 'c'")
+ sql(s"INSERT INTO $tbl SELECT null, 'd'")
+
+ val descriptionDf = sql(s"DESCRIBE TABLE EXTENDED $tbl key")
+ assert(descriptionDf.schema.map(field => (field.name, field.dataType)) === Seq(
+ ("info_name", StringType),
+ ("info_value", StringType)))
+ QueryTest.checkAnswer(
+ descriptionDf,
+ Seq(
+ Row("col_name", "key"),
+ Row("data_type", "int"),
+ Row("comment", "column_comment"),
+ Row("min", "NULL"),
+ Row("max", "NULL"),
+ Row("num_nulls", "NULL"),
+ Row("distinct_count", "NULL"),
+ Row("avg_col_len", "NULL"),
+ Row("max_col_len", "NULL"),
+ Row("histogram", "NULL")))
+ sql(s"ANALYZE TABLE $tbl COMPUTE STATISTICS FOR COLUMNS key")
+
+ Seq("EXTENDED", "FORMATTED").foreach { extended =>
+ val descriptionDf2 = sql(s"DESCRIBE TABLE $extended $tbl key")
+ QueryTest.checkAnswer(
+ descriptionDf2,
+ Seq(
+ Row("col_name", "key"),
+ Row("data_type", "int"),
+ Row("comment", "column_comment"),
+ Row("min", "1"),
+ Row("max", "3"),
+ Row("num_nulls", "1"),
+ Row("distinct_count", "3"),
+ Row("avg_col_len", "4"),
+ Row("max_col_len", "4"),
+ Row("histogram", "NULL")))
+ }
+ }
+ }
+
+ test("describe a column with histogram statistics") {
+ withSQLConf(
+ SQLConf.HISTOGRAM_ENABLED.key -> "true",
+ SQLConf.HISTOGRAM_NUM_BINS.key -> "2") {
+ withNamespaceAndTable("ns", "tbl") { tbl =>
+ sql(s"""
+ |CREATE TABLE $tbl
+ |(key INT COMMENT 'column_comment', col STRING)
+ |$defaultUsing""".stripMargin)
+ sql(s"INSERT INTO $tbl SELECT 1, 'a'")
+ sql(s"INSERT INTO $tbl SELECT 2, 'b'")
+ sql(s"INSERT INTO $tbl SELECT 3, 'c'")
+ sql(s"INSERT INTO $tbl SELECT null, 'd'")
+ sql(s"ANALYZE TABLE $tbl COMPUTE STATISTICS FOR COLUMNS key")
+
+ val descriptionDf = sql(s"DESCRIBE TABLE EXTENDED $tbl key")
+ QueryTest.checkAnswer(
+ descriptionDf,
+ Seq(
+ Row("col_name", "key"),
+ Row("data_type", "int"),
+ Row("comment", "column_comment"),
+ Row("min", "1"),
+ Row("max", "3"),
+ Row("num_nulls", "1"),
+ Row("distinct_count", "3"),
+ Row("avg_col_len", "4"),
+ Row("max_col_len", "4"),
+ Row("histogram", "height: 1.5, num_of_bins: 2"),
+ Row("bin_0", "lower_bound: 1.0, upper_bound: 2.0, distinct_count: 2"),
+ Row("bin_1", "lower_bound: 2.0, upper_bound: 3.0, distinct_count: 1")))
+ }
+ }
+ }
}
/**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeTableSuite.scala
index b09abec6bc3..a317c562276 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeTableSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.command.v2
import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
import org.apache.spark.sql.connector.catalog.TableCatalog
import org.apache.spark.sql.execution.command
+import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types.StringType
import org.apache.spark.util.Utils
@@ -91,4 +92,68 @@ class DescribeTableSuite extends command.DescribeTableSuiteBase with CommandSuit
Row("Table Properties", "[bar=baz]", "")))
}
}
+
+ test("describe a non-existent column") {
+ withNamespaceAndTable("ns", "tbl") { tbl =>
+ sql(s"""
+ |CREATE TABLE $tbl
+ |(key int COMMENT 'column_comment', col struct<x:int, y:string>)
+ |$defaultUsing""".stripMargin)
+ checkError(
+ exception = intercept[AnalysisException] {
+ sql(s"DESC $tbl key1").collect()
+ },
+ errorClass = "UNRESOLVED_COLUMN",
+ sqlState = "42000",
+ parameters = Map(
+ "objectName" -> "`key1`",
+ "objectList" -> "`test_catalog`.`ns`.`tbl`.`key`, `test_catalog`.`ns`.`tbl`.`col`"))
+ }
+ }
+
+ test("describe a column in case insensitivity") {
+ withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+ withNamespaceAndTable("ns", "tbl") { tbl =>
+ sql(s"CREATE TABLE $tbl (key int COMMENT 'comment1') $defaultUsing")
+ QueryTest.checkAnswer(
+ sql(s"DESC $tbl KEY"),
+ Seq(Row("col_name", "KEY"), Row("data_type", "int"), Row("comment", "comment1")))
+ }
+ }
+
+ withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+ withNamespaceAndTable("ns", "tbl") { tbl =>
+ sql(s"CREATE TABLE $tbl (key int COMMENT 'comment1') $defaultUsing")
+ checkError(
+ exception = intercept[AnalysisException] {
+ sql(s"DESC $tbl KEY").collect()
+ },
+ errorClass = "UNRESOLVED_COLUMN",
+ sqlState = "42000",
+ parameters = Map(
+ "objectName" -> "`KEY`",
+ "objectList" -> "`test_catalog`.`ns`.`tbl`.`key`"))
+ }
+ }
+ }
+
+ // TODO(SPARK-39859): Support v2 `DESCRIBE TABLE EXTENDED` for columns
+ test("describe extended (formatted) a column") {
+ withNamespaceAndTable("ns", "tbl") { tbl =>
+ sql(s"""
+ |CREATE TABLE $tbl
+ |(key INT COMMENT 'column_comment', col STRING)
+ |$defaultUsing""".stripMargin)
+ val descriptionDf = sql(s"DESCRIBE TABLE EXTENDED $tbl key")
+ assert(descriptionDf.schema.map(field => (field.name, field.dataType)) === Seq(
+ ("info_name", StringType),
+ ("info_value", StringType)))
+ QueryTest.checkAnswer(
+ descriptionDf,
+ Seq(
+ Row("col_name", "key"),
+ Row("data_type", "int"),
+ Row("comment", "column_comment")))
+ }
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org