You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by si...@apache.org on 2022/01/07 18:06:24 UTC

[hudi] 02/21: [HUDI-3108] Fix Purge Drop MOR Table Cause error (#4455)

This is an automated email from the ASF dual-hosted git repository.

sivabalan pushed a commit to branch release-0.10.1-rc1
in repository https://gitbox.apache.org/repos/asf/hudi.git

commit 9f9e41c7a36b80ce58fa118b9b55e51994c67115
Author: ForwardXu <fo...@gmail.com>
AuthorDate: Wed Dec 29 20:23:23 2021 +0800

    [HUDI-3108] Fix Purge Drop MOR Table Cause error (#4455)
---
 .../sql/hudi/command/DropHoodieTableCommand.scala  | 131 +++++++++++++++++++++
 1 file changed, 131 insertions(+)

diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/DropHoodieTableCommand.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/DropHoodieTableCommand.scala
new file mode 100644
index 0000000..aa65480
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/DropHoodieTableCommand.scala
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hudi.command
+
+import org.apache.hadoop.fs.Path
+import org.apache.hudi.SparkAdapterSupport
+import org.apache.hudi.client.common.HoodieSparkEngineContext
+import org.apache.hudi.common.fs.FSUtils
+import org.apache.hudi.common.model.HoodieTableType
+import org.apache.spark.sql._
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException
+import org.apache.spark.sql.catalyst.catalog.{CatalogTableType, HoodieCatalogTable}
+import org.apache.spark.sql.execution.command.RunnableCommand
+import org.apache.spark.sql.hive.HiveClientUtils
+import org.apache.spark.sql.hudi.HoodieSqlUtils.isEnableHive
+
+import scala.util.control.NonFatal
+
+case class DropHoodieTableCommand(
+    tableIdentifier: TableIdentifier,
+    ifExists: Boolean,
+    isView: Boolean,
+    purge: Boolean) extends RunnableCommand
+  with SparkAdapterSupport {
+
+  val MOR_SNAPSHOT_TABLE_SUFFIX = "_rt"
+  val MOR_READ_OPTIMIZED_TABLE_SUFFIX = "_ro"
+
+  override def run(sparkSession: SparkSession): Seq[Row] = {
+    val fullTableName = s"${tableIdentifier.database}.${tableIdentifier.table}"
+    logInfo(s"start execute drop table command for $fullTableName")
+    sparkSession.catalog.refreshTable(tableIdentifier.unquotedString)
+
+    try {
+      // drop catalog table for this hoodie table
+      dropTableInCatalog(sparkSession, tableIdentifier, ifExists, purge)
+    } catch {
+      case NonFatal(e) =>
+        logWarning(s"Failed to drop catalog table in metastore: ${e.getMessage}")
+    }
+
+    logInfo(s"Finish execute drop table command for $fullTableName")
+    Seq.empty[Row]
+  }
+
+  def dropTableInCatalog(sparkSession: SparkSession,
+          tableIdentifier: TableIdentifier,
+          ifExists: Boolean,
+          purge: Boolean): Unit = {
+    val hoodieCatalogTable = HoodieCatalogTable(sparkSession, tableIdentifier)
+    val table = hoodieCatalogTable.table
+    assert(table.tableType != CatalogTableType.VIEW)
+
+    val basePath = hoodieCatalogTable.tableLocation
+    val catalog = sparkSession.sessionState.catalog
+
+    // Drop table in the catalog
+    val enableHive = isEnableHive(sparkSession)
+    if (enableHive) {
+      dropHiveDataSourceTable(sparkSession, hoodieCatalogTable)
+    } else {
+      if (catalog.tableExists(tableIdentifier)) {
+        catalog.dropTable(tableIdentifier, ifExists, purge)
+      }
+    }
+
+    // Recursively delete table directories
+    if (purge) {
+      logInfo("Clean up " + basePath)
+      val targetPath = new Path(basePath)
+      val engineContext = new HoodieSparkEngineContext(sparkSession.sparkContext)
+      val fs = FSUtils.getFs(basePath, sparkSession.sparkContext.hadoopConfiguration)
+      FSUtils.deleteDir(engineContext, fs, targetPath, sparkSession.sparkContext.defaultParallelism)
+    }
+  }
+
+  private def dropHiveDataSourceTable(
+       sparkSession: SparkSession,
+       hoodieCatalogTable: HoodieCatalogTable): Unit = {
+    val table = hoodieCatalogTable.table
+    val dbName = table.identifier.database.get
+    val tableName = hoodieCatalogTable.tableName
+
+    // check database exists
+    val dbExists = sparkSession.sessionState.catalog.databaseExists(dbName)
+    if (!dbExists) {
+      throw new NoSuchDatabaseException(dbName)
+    }
+
+    if (HoodieTableType.MERGE_ON_READ == hoodieCatalogTable.tableType && purge) {
+      val snapshotTableName = tableName + MOR_SNAPSHOT_TABLE_SUFFIX
+      val roTableName = tableName + MOR_READ_OPTIMIZED_TABLE_SUFFIX
+
+      dropHiveTable(sparkSession, dbName, snapshotTableName)
+      dropHiveTable(sparkSession, dbName, roTableName)
+    }
+
+    dropHiveTable(sparkSession, dbName, tableName, purge)
+  }
+
+  private def dropHiveTable(
+       sparkSession: SparkSession,
+       dbName: String,
+       tableName: String,
+       purge: Boolean = false): Unit = {
+    // check table exists
+    if (sparkSession.sessionState.catalog.tableExists(new TableIdentifier(tableName, Option(dbName)))) {
+      val client = HiveClientUtils.newClientForMetadata(sparkSession.sparkContext.conf,
+        sparkSession.sessionState.newHadoopConf())
+
+      // drop hive table.
+      client.dropTable(dbName, tableName, ifExists, purge)
+    }
+  }
+}