You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by "zzzzming95 (via GitHub)" <gi...@apache.org> on 2023/09/05 13:34:01 UTC

[GitHub] [spark] zzzzming95 commented on a diff in pull request #42574: [SPARK-43149][SQL] `CreateDataSourceTableCommand` should create metadata first

zzzzming95 commented on code in PR #42574:
URL: https://github.com/apache/spark/pull/42574#discussion_r1315907482


##########
sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala:
##########
@@ -191,16 +193,26 @@ case class CreateDataSourceTableAsSelectCommand(
         schema = tableSchema)
       // Table location is already validated. No need to check it again during table creation.
       sessionState.catalog.createTable(newTable, ignoreIfExists = false, validateLocation = false)
+      try {
+        val result = saveDataIntoTable(
+          sparkSession, table, tableLocation, SaveMode.Overwrite, tableExists = false)
 
-      result match {
-        case fs: HadoopFsRelation if table.partitionColumnNames.nonEmpty &&
+        result match {
+          case fs: HadoopFsRelation if table.partitionColumnNames.nonEmpty &&
             sparkSession.sqlContext.conf.manageFilesourcePartitions =>
-          // Need to recover partitions into the metastore so our saved data is visible.
-          sessionState.executePlan(RepairTableCommand(
-            table.identifier,
-            enableAddPartitions = true,
-            enableDropPartitions = false), CommandExecutionMode.SKIP).toRdd
-        case _ =>
+            // Need to recover partitions into the metastore so our saved data is visible.
+            sessionState.executePlan(RepairTableCommand(
+              table.identifier,
+              enableAddPartitions = true,
+              enableDropPartitions = false), CommandExecutionMode.SKIP).toRdd
+          case _ =>
+        }
+      } catch {
+        case NonFatal(e) =>
+          // drop the created table.
+          sessionState.catalog.dropTable(newTable.identifier,

Review Comment:
   > 
   There is no good solution here, changing the order introduces a new issue: we may have metadata created but no data exist
   
   Drop table when meet exception during writing. @cloud-fan 
   
   



##########
sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala:
##########
@@ -191,16 +193,26 @@ case class CreateDataSourceTableAsSelectCommand(
         schema = tableSchema)
       // Table location is already validated. No need to check it again during table creation.
       sessionState.catalog.createTable(newTable, ignoreIfExists = false, validateLocation = false)
+      try {
+        val result = saveDataIntoTable(
+          sparkSession, table, tableLocation, SaveMode.Overwrite, tableExists = false)
 
-      result match {
-        case fs: HadoopFsRelation if table.partitionColumnNames.nonEmpty &&
+        result match {
+          case fs: HadoopFsRelation if table.partitionColumnNames.nonEmpty &&
             sparkSession.sqlContext.conf.manageFilesourcePartitions =>
-          // Need to recover partitions into the metastore so our saved data is visible.
-          sessionState.executePlan(RepairTableCommand(
-            table.identifier,
-            enableAddPartitions = true,
-            enableDropPartitions = false), CommandExecutionMode.SKIP).toRdd
-        case _ =>
+            // Need to recover partitions into the metastore so our saved data is visible.
+            sessionState.executePlan(RepairTableCommand(
+              table.identifier,
+              enableAddPartitions = true,
+              enableDropPartitions = false), CommandExecutionMode.SKIP).toRdd
+          case _ =>
+        }
+      } catch {
+        case NonFatal(e) =>
+          // drop the created table.
+          sessionState.catalog.dropTable(newTable.identifier,

Review Comment:
   > There is no good solution here, changing the order introduces a new issue: we may have metadata created but no data exist
   
   Drop table when meet exception during writing. @cloud-fan 
   
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org