You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2017/08/09 15:46:30 UTC
spark git commit: [SPARK-21504][SQL] Add spark version info into
table metadata
Repository: spark
Updated Branches:
refs/heads/master b78cf13bf -> 2d799d080
[SPARK-21504][SQL] Add spark version info into table metadata
## What changes were proposed in this pull request?
This PR is to add the spark version info in the table metadata. When creating the table, this value is assigned. It can help users find which version of Spark was used to create the table.
## How was this patch tested?
N/A
Author: gatorsmile <ga...@gmail.com>
Closes #18709 from gatorsmile/addVersion.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2d799d08
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2d799d08
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2d799d08
Branch: refs/heads/master
Commit: 2d799d08081032828cc2c95cbf58a268653c7a05
Parents: b78cf13
Author: gatorsmile <ga...@gmail.com>
Authored: Wed Aug 9 08:46:25 2017 -0700
Committer: gatorsmile <ga...@gmail.com>
Committed: Wed Aug 9 08:46:25 2017 -0700
----------------------------------------------------------------------
.../sql/catalyst/catalog/ExternalCatalog.scala | 4 +++-
.../spark/sql/catalyst/catalog/interface.scala | 7 ++++++-
.../spark/sql/catalyst/trees/TreeNodeSuite.scala | 4 +++-
.../describe-table-after-alter-table.sql.out | 15 ++++++++++-----
.../resources/sql-tests/results/describe.sql.out | 18 ++++++++++++------
.../sql-tests/results/show-tables.sql.out | 9 ++++++---
.../org/apache/spark/sql/SQLQueryTestSuite.scala | 3 ++-
.../spark/sql/execution/command/DDLSuite.scala | 1 +
.../spark/sql/hive/HiveExternalCatalog.scala | 12 +++++++++++-
.../spark/sql/hive/execution/HiveDDLSuite.scala | 1 +
10 files changed, 55 insertions(+), 19 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/2d799d08/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
index 6000d48..68644f4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
@@ -106,8 +106,10 @@ abstract class ExternalCatalog
final def createTable(tableDefinition: CatalogTable, ignoreIfExists: Boolean): Unit = {
val db = tableDefinition.database
val name = tableDefinition.identifier.table
+ val tableDefinitionWithVersion =
+ tableDefinition.copy(createVersion = org.apache.spark.SPARK_VERSION)
postToAll(CreateTablePreEvent(db, name))
- doCreateTable(tableDefinition, ignoreIfExists)
+ doCreateTable(tableDefinitionWithVersion, ignoreIfExists)
postToAll(CreateTableEvent(db, name))
}
http://git-wip-us.apache.org/repos/asf/spark/blob/2d799d08/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index 9531456..f865106 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -205,6 +205,9 @@ case class BucketSpec(
* configured.
* @param ignoredProperties is a list of table properties that are used by the underlying table
* but ignored by Spark SQL yet.
+ * @param createVersion records the version of Spark that created this table metadata. The default
+ * is an empty string. We expect it will be read from the catalog or filled by
+ * ExternalCatalog.createTable. For temporary views, the value will be empty.
*/
case class CatalogTable(
identifier: TableIdentifier,
@@ -217,6 +220,7 @@ case class CatalogTable(
owner: String = "",
createTime: Long = System.currentTimeMillis,
lastAccessTime: Long = -1,
+ createVersion: String = "",
properties: Map[String, String] = Map.empty,
stats: Option[CatalogStatistics] = None,
viewText: Option[String] = None,
@@ -302,8 +306,9 @@ case class CatalogTable(
identifier.database.foreach(map.put("Database", _))
map.put("Table", identifier.table)
if (owner.nonEmpty) map.put("Owner", owner)
- map.put("Created", new Date(createTime).toString)
+ map.put("Created Time", new Date(createTime).toString)
map.put("Last Access", new Date(lastAccessTime).toString)
+ map.put("Created By", "Spark " + createVersion)
map.put("Type", tableType.name)
provider.foreach(map.put("Provider", _))
bucketSpec.foreach(map ++= _.toLinkedHashMap)
http://git-wip-us.apache.org/repos/asf/spark/blob/2d799d08/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
index 4fc947a..f83c637 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
@@ -480,7 +480,8 @@ class TreeNodeSuite extends SparkFunSuite {
CatalogTableType.MANAGED,
CatalogStorageFormat.empty,
StructType(StructField("a", IntegerType, true) :: Nil),
- createTime = 0L),
+ createTime = 0L,
+ createVersion = "2.x"),
JObject(
"product-class" -> classOf[CatalogTable].getName,
@@ -509,6 +510,7 @@ class TreeNodeSuite extends SparkFunSuite {
"owner" -> "",
"createTime" -> 0,
"lastAccessTime" -> -1,
+ "createVersion" -> "2.x",
"tracksPartitionsInCatalog" -> false,
"properties" -> JNull,
"unsupportedFeatures" -> List.empty[String],
http://git-wip-us.apache.org/repos/asf/spark/blob/2d799d08/sql/core/src/test/resources/sql-tests/results/describe-table-after-alter-table.sql.out
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/results/describe-table-after-alter-table.sql.out b/sql/core/src/test/resources/sql-tests/results/describe-table-after-alter-table.sql.out
index 4bf4633..7873085 100644
--- a/sql/core/src/test/resources/sql-tests/results/describe-table-after-alter-table.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/describe-table-after-alter-table.sql.out
@@ -23,8 +23,9 @@ d string
# Detailed Table Information
Database default
Table table_with_comment
-Created [not included in comparison]
+Created Time [not included in comparison]
Last Access [not included in comparison]
+Created By [not included in comparison]
Type MANAGED
Provider parquet
Comment added
@@ -52,8 +53,9 @@ d string
# Detailed Table Information
Database default
Table table_with_comment
-Created [not included in comparison]
+Created Time [not included in comparison]
Last Access [not included in comparison]
+Created By [not included in comparison]
Type MANAGED
Provider parquet
Comment modified comment
@@ -88,8 +90,9 @@ b int
# Detailed Table Information
Database default
Table table_comment
-Created [not included in comparison]
+Created Time [not included in comparison]
Last Access [not included in comparison]
+Created By [not included in comparison]
Type MANAGED
Provider parquet
Location [not included in comparison]sql/core/spark-warehouse/table_comment
@@ -114,8 +117,9 @@ b int
# Detailed Table Information
Database default
Table table_comment
-Created [not included in comparison]
+Created Time [not included in comparison]
Last Access [not included in comparison]
+Created By [not included in comparison]
Type MANAGED
Provider parquet
Comment added comment
@@ -141,8 +145,9 @@ b int
# Detailed Table Information
Database default
Table table_comment
-Created [not included in comparison]
+Created Time [not included in comparison]
Last Access [not included in comparison]
+Created By [not included in comparison]
Type MANAGED
Provider parquet
Location [not included in comparison]sql/core/spark-warehouse/table_comment
http://git-wip-us.apache.org/repos/asf/spark/blob/2d799d08/sql/core/src/test/resources/sql-tests/results/describe.sql.out
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/results/describe.sql.out b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
index e2b79e8..b91f2c0 100644
--- a/sql/core/src/test/resources/sql-tests/results/describe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
@@ -120,8 +120,9 @@ d string
# Detailed Table Information
Database default
Table t
-Created [not included in comparison]
+Created Time [not included in comparison]
Last Access [not included in comparison]
+Created By [not included in comparison]
Type MANAGED
Provider parquet
Num Buckets 2
@@ -151,8 +152,9 @@ d string
# Detailed Table Information
Database default
Table t
-Created [not included in comparison]
+Created Time [not included in comparison]
Last Access [not included in comparison]
+Created By [not included in comparison]
Type MANAGED
Provider parquet
Num Buckets 2
@@ -190,8 +192,9 @@ d string
# Detailed Table Information
Database default
Table t
-Created [not included in comparison]
+Created Time [not included in comparison]
Last Access [not included in comparison]
+Created By [not included in comparison]
Type MANAGED
Provider parquet
Num Buckets 2
@@ -228,8 +231,9 @@ d string
# Detailed Table Information
Database default
Table t
-Created [not included in comparison]
+Created Time [not included in comparison]
Last Access [not included in comparison]
+Created By [not included in comparison]
Type MANAGED
Provider parquet
Num Buckets 2
@@ -458,8 +462,9 @@ d string
# Detailed Table Information
Database default
Table v
-Created [not included in comparison]
+Created Time [not included in comparison]
Last Access [not included in comparison]
+Created By [not included in comparison]
Type VIEW
View Text SELECT * FROM t
View Default Database default
@@ -480,8 +485,9 @@ d string
# Detailed Table Information
Database default
Table v
-Created [not included in comparison]
+Created Time [not included in comparison]
Last Access [not included in comparison]
+Created By [not included in comparison]
Type VIEW
View Text SELECT * FROM t
View Default Database default
http://git-wip-us.apache.org/repos/asf/spark/blob/2d799d08/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
index 8f2a54f..da729cd 100644
--- a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
@@ -119,8 +119,9 @@ SHOW TABLE EXTENDED LIKE 'show_t*'
struct<database:string,tableName:string,isTemporary:boolean,information:string>
-- !query 12 output
show_t3 true Table: show_t3
-Created [not included in comparison]
+Created Time [not included in comparison]
Last Access [not included in comparison]
+Created By [not included in comparison]
Type: VIEW
Schema: root
|-- e: integer (nullable = true)
@@ -128,8 +129,9 @@ Schema: root
showdb show_t1 false Database: showdb
Table: show_t1
-Created [not included in comparison]
+Created Time [not included in comparison]
Last Access [not included in comparison]
+Created By [not included in comparison]
Type: MANAGED
Provider: parquet
Location [not included in comparison]sql/core/spark-warehouse/showdb.db/show_t1
@@ -144,8 +146,9 @@ Schema: root
showdb show_t2 false Database: showdb
Table: show_t2
-Created [not included in comparison]
+Created Time [not included in comparison]
Last Access [not included in comparison]
+Created By [not included in comparison]
Type: MANAGED
Provider: parquet
Location [not included in comparison]sql/core/spark-warehouse/showdb.db/show_t2
http://git-wip-us.apache.org/repos/asf/spark/blob/2d799d08/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index d9130fd..aa000bd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -228,7 +228,8 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext {
// Get answer, but also get rid of the #1234 expression ids that show up in explain plans
val answer = df.queryExecution.hiveResultString().map(_.replaceAll("#\\d+", "#x")
.replaceAll("Location.*/sql/core/", s"Location ${notIncludedMsg}sql/core/")
- .replaceAll("Created.*", s"Created $notIncludedMsg")
+ .replaceAll("Created By.*", s"Created By $notIncludedMsg")
+ .replaceAll("Created Time.*", s"Created Time $notIncludedMsg")
.replaceAll("Last Access.*", s"Last Access $notIncludedMsg"))
// If the output is not pre-sorted, sort it.
http://git-wip-us.apache.org/repos/asf/spark/blob/2d799d08/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 5c0a6aa..9332f77 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -68,6 +68,7 @@ class InMemoryCatalogedDDLSuite extends DDLSuite with SharedSQLContext with Befo
provider = Some("parquet"),
partitionColumnNames = Seq("a", "b"),
createTime = 0L,
+ createVersion = org.apache.spark.SPARK_VERSION,
tracksPartitionsInCatalog = true)
}
http://git-wip-us.apache.org/repos/asf/spark/blob/2d799d08/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index 172317c..19e5f78 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -390,6 +390,9 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
val bucketSpec = table.bucketSpec
val properties = new mutable.HashMap[String, String]
+
+ properties.put(CREATED_SPARK_VERSION, table.createVersion)
+
// Serialized JSON schema string may be too long to be stored into a single metastore table
// property. In this case, we split the JSON string and store each part as a separate table
// property.
@@ -594,7 +597,8 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
// Set the `schema`, `partitionColumnNames` and `bucketSpec` from the old table definition,
// to retain the spark specific format if it is.
val propsFromOldTable = oldTableDef.properties.filter { case (k, v) =>
- k.startsWith(DATASOURCE_PREFIX) || k.startsWith(STATISTICS_PREFIX)
+ k.startsWith(DATASOURCE_PREFIX) || k.startsWith(STATISTICS_PREFIX) ||
+ k.startsWith(CREATED_SPARK_VERSION)
}
val newTableProps = propsFromOldTable ++ tableDefinition.properties + partitionProviderProp
val newDef = tableDefinition.copy(
@@ -700,6 +704,9 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
table = restoreDataSourceTable(table, provider)
}
+ // Restore version info
+ val version: String = table.properties.getOrElse(CREATED_SPARK_VERSION, "2.2 or prior")
+
// Restore Spark's statistics from information in Metastore.
val statsProps = table.properties.filterKeys(_.startsWith(STATISTICS_PREFIX))
@@ -735,6 +742,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
// Get the original table properties as defined by the user.
table.copy(
+ createVersion = version,
properties = table.properties.filterNot { case (key, _) => key.startsWith(SPARK_SQL_PREFIX) })
}
@@ -1208,6 +1216,8 @@ object HiveExternalCatalog {
val TABLE_PARTITION_PROVIDER_CATALOG = "catalog"
val TABLE_PARTITION_PROVIDER_FILESYSTEM = "filesystem"
+ val CREATED_SPARK_VERSION = SPARK_SQL_PREFIX + "create.version"
+
/**
* Returns the fully qualified name used in table properties for a particular column stat.
* For example, for column "mycol", and "min" stat, this should return
http://git-wip-us.apache.org/repos/asf/spark/blob/2d799d08/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 5b62e37..0007d25 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -90,6 +90,7 @@ class HiveCatalogedDDLSuite extends DDLSuite with TestHiveSingleton with BeforeA
provider = if (isDataSource) Some("parquet") else Some("hive"),
partitionColumnNames = Seq("a", "b"),
createTime = 0L,
+ createVersion = org.apache.spark.SPARK_VERSION,
tracksPartitionsInCatalog = true)
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org