You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ge...@apache.org on 2023/02/03 05:11:57 UTC
[spark] branch branch-3.4 updated: [SPARK-42294][SQL] Include column default values in DESCRIBE output for V2 tables

This is an automated email from the ASF dual-hosted git repository.

gengliang pushed a commit to branch branch-3.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.4 by this push:
     new ea141854550 [SPARK-42294][SQL] Include column default values in DESCRIBE output for V2 tables
ea141854550 is described below

commit ea141854550271057f5db06b8abc70214ac76488
Author: Daniel Tenedorio <da...@databricks.com>
AuthorDate: Thu Feb 2 21:11:29 2023 -0800

    [SPARK-42294][SQL] Include column default values in DESCRIBE output for V2 tables
    
    ### What changes were proposed in this pull request?
    
    Include column default values in DESCRIBE output for V2 tables.
    
    This was previously implemented for V1 tables, but missed V2 tables.
    
    ### Why are the changes needed?
    
    DESCRIBE commands make it easier to work with tables by inspecting their metadata.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes, it adds more information to DESCRIBE commands for tables with default column values.
    
    ### How was this patch tested?
    
    This PR adds unit test coverage.
    
    Closes #39863 from dtenedor/descibe-defaults-v2.
    
    Authored-by: Daniel Tenedorio <da...@databricks.com>
    Signed-off-by: Gengliang Wang <ge...@apache.org>
    (cherry picked from commit c5f72b3e2086d66c72699778915d2ab6ee64a6eb)
    Signed-off-by: Gengliang Wang <ge...@apache.org>
---
 .../catalyst/util/ResolveDefaultColumnsUtil.scala  | 17 ++++++++++++++
 .../spark/sql/execution/command/tables.scala       |  9 ++------
 .../datasources/v2/DescribeTableExec.scala         |  7 +++++-
 .../spark/sql/connector/DataSourceV2SQLSuite.scala | 27 ++++++++++++++++++++++
 4 files changed, 52 insertions(+), 8 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
index 9a0d3a435b4..667c0988d0c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.catalyst.util
 
+import scala.collection.mutable.ArrayBuffer
+
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis._
@@ -267,6 +269,21 @@ object ResolveDefaultColumns {
     }
   }
 
+  /** If any fields in a schema have default values, appends them to the result. */
+  def getDescribeMetadata(schema: StructType): Seq[(String, String, String)] = {
+    val rows = new ArrayBuffer[(String, String, String)]()
+    if (schema.fields.exists(_.metadata.contains(CURRENT_DEFAULT_COLUMN_METADATA_KEY))) {
+      rows.append(("", "", ""))
+      rows.append(("# Column Default Values", "", ""))
+      schema.foreach { column =>
+        column.getCurrentDefaultValue().map { value =>
+          rows.append((column.name, column.dataType.simpleString, value))
+        }
+      }
+    }
+    rows.toSeq
+  }
+
   /**
    * This is an Analyzer for processing default column values using built-in functions only.
    */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 30f77b11ec0..f6266bcb33f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -648,13 +648,8 @@ case class DescribeTableCommand(
       }
 
       // If any columns have default values, append them to the result.
-      if (metadata.schema.fields.exists(_.metadata.contains(CURRENT_DEFAULT_COLUMN_METADATA_KEY))) {
-        append(result, "", "", "")
-        append(result, "# Column Default Values", "", "")
-        metadata.schema.foreach { column =>
-          column.getCurrentDefaultValue().map(
-            append(result, column.name, column.dataType.simpleString, _))
-        }
+      ResolveDefaultColumns.getDescribeMetadata(metadata.schema).foreach { row =>
+        append(result, row._1, row._2, row._3)
       }
     }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
index acb861d7679..8b0098f14fe 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
@@ -23,7 +23,7 @@ import scala.collection.mutable.ArrayBuffer
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.catalog.CatalogTableType
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.util.quoteIfNeeded
+import org.apache.spark.sql.catalyst.util.{quoteIfNeeded, ResolveDefaultColumns}
 import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsMetadataColumns, Table, TableCatalog}
 import org.apache.spark.sql.connector.expressions.IdentityTransform
 
@@ -68,6 +68,11 @@ case class DescribeTableExec(
         case (key, value) => key + "=" + value
       }.mkString("[", ",", "]")
     rows += toCatalystRow("Table Properties", properties, "")
+
+    // If any columns have default values, append them to the result.
+    ResolveDefaultColumns.getDescribeMetadata(table.schema).foreach { row =>
+      rows += toCatalystRow(row._1, row._2, row._3)
+    }
   }
 
   private def addSchema(rows: ArrayBuffer[InternalRow]): Unit = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index a4b7f762dba..58ed4b2a55c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -2890,6 +2890,33 @@ class DataSourceV2SQLSuiteV1Filter
     }
   }
 
+  test("DESCRIBE TABLE EXTENDED of a V2 table with a default column value") {
+    withSQLConf(SQLConf.DEFAULT_COLUMN_ALLOWED_PROVIDERS.key -> v2Source) {
+      withTable("t") {
+        spark.sql(s"CREATE TABLE t (id bigint default 42) USING $v2Source")
+        val descriptionDf = spark.sql(s"DESCRIBE TABLE EXTENDED t")
+        assert(descriptionDf.schema.map { field =>
+          (field.name, field.dataType)
+        } === Seq(
+          ("col_name", StringType),
+          ("data_type", StringType),
+          ("comment", StringType)))
+        QueryTest.checkAnswer(
+          descriptionDf.filter(
+            "!(col_name in ('Catalog', 'Created Time', 'Created By', 'Database', " +
+              "'index', 'Location', 'Name', 'Owner', 'Provider', 'Table', 'Table Properties', " +
+              "'Type', '_partition', ''))"),
+          Seq(
+            Row("# Detailed Table Information", "", ""),
+            Row("# Column Default Values", "", ""),
+            Row("# Metadata Columns", "", ""),
+            Row("id", "bigint", "42"),
+            Row("id", "bigint", null)
+          ))
+      }
+    }
+  }
+
   private def testNotSupportedV2Command(sqlCommand: String, sqlParams: String): Unit = {
     checkError(
       exception = intercept[AnalysisException] {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org