You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by GitBox <gi...@apache.org> on 2022/04/13 22:21:30 UTC

[GitHub] [hudi] alexeykudinkin commented on a diff in pull request #5272: [HUDI-3826] Commands deleting partitions do so incorrectly

alexeykudinkin commented on code in PR #5272:
URL: https://github.com/apache/hudi/pull/5272#discussion_r849935685


##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/AlterHoodieTableDropPartitionCommand.scala:
##########
@@ -57,6 +55,7 @@ case class AlterHoodieTableDropPartitionCommand(
         sparkSession.sessionState.conf.resolver)
     }
 
+    // drop partitions to lazy clean

Review Comment:
   Can you please expand on that comment? I get the general direction from it but it isn't clear what it holistically is referring to 



##########
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestAlterTableDropPartition.scala:
##########
@@ -265,53 +187,4 @@ class TestAlterTableDropPartition extends TestHoodieSqlBase {
       }
     }
   }
-
-  Seq(false, true).foreach { hiveStyle =>
-    test(s"Purge drop multi-level partitioned table's partitions, isHiveStylePartitioning: $hiveStyle") {

Review Comment:
   Let's make sure there's a ticket for this



##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/TruncateHoodieTableCommand.scala:
##########
@@ -18,115 +18,83 @@
 package org.apache.spark.sql.hudi.command
 
 import org.apache.hadoop.fs.Path
+import org.apache.hudi.HoodieSparkSqlWriter
+import org.apache.hudi.client.common.HoodieSparkEngineContext
 import org.apache.hudi.common.fs.FSUtils
 import org.apache.hudi.common.table.HoodieTableMetaClient
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
-import org.apache.spark.sql.catalyst.catalog.{CatalogStatistics, CatalogTableType, HoodieCatalogTable}
+import org.apache.spark.sql.catalyst.catalog.{CatalogTableType, HoodieCatalogTable}
 import org.apache.spark.sql.hudi.HoodieSqlCommonUtils.{getPartitionPathToDrop, normalizePartitionSpec}
-import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
-
-import scala.util.control.NonFatal
+import org.apache.spark.sql.hudi.ProvidesHoodieConfig
+import org.apache.spark.sql.{AnalysisException, Row, SaveMode, SparkSession}
 
 /**
  * Command for truncate hudi table.
  */
 case class TruncateHoodieTableCommand(
    tableIdentifier: TableIdentifier,
-   partitionSpec: Option[TablePartitionSpec])
-  extends HoodieLeafRunnableCommand {
+   specs: Option[TablePartitionSpec])
+  extends HoodieLeafRunnableCommand with ProvidesHoodieConfig {
 
-  override def run(spark: SparkSession): Seq[Row] = {
+  override def run(sparkSession: SparkSession): Seq[Row] = {
     val fullTableName = s"${tableIdentifier.database}.${tableIdentifier.table}"
     logInfo(s"start execute truncate table command for $fullTableName")
 
-    val hoodieCatalogTable = HoodieCatalogTable(spark, tableIdentifier)
-    val properties = hoodieCatalogTable.tableConfig.getProps
-
-    try {
-      // Delete all data in the table directory
-      val catalog = spark.sessionState.catalog
-      val table = catalog.getTableMetadata(tableIdentifier)
-      val tableIdentWithDB = table.identifier.quotedString
-
-      if (table.tableType == CatalogTableType.VIEW) {
-        throw new AnalysisException(
-          s"Operation not allowed: TRUNCATE TABLE on views: $tableIdentWithDB")
-      }
-
-      if (table.partitionColumnNames.isEmpty && partitionSpec.isDefined) {
-        throw new AnalysisException(
-          s"Operation not allowed: TRUNCATE TABLE ... PARTITION is not supported " +
-            s"for tables that are not partitioned: $tableIdentWithDB")
-      }
+    val hoodieCatalogTable = HoodieCatalogTable(sparkSession, tableIdentifier)
 
-      val basePath = hoodieCatalogTable.tableLocation
-      val partCols = table.partitionColumnNames
-      val locations = if (partitionSpec.isEmpty || partCols.isEmpty) {
-        Seq(basePath)
-      } else {
-        val normalizedSpec: Seq[Map[String, String]] = Seq(partitionSpec.map { spec =>
-          normalizePartitionSpec(
-            spec,
-            partCols,
-            table.identifier.quotedString,
-            spark.sessionState.conf.resolver)
-        }.get)
+    val catalog = sparkSession.sessionState.catalog
+    val table = catalog.getTableMetadata(tableIdentifier)
+    val tableIdentWithDB = table.identifier.quotedString

Review Comment:
   I'd suggest uncommon shortening, you can just call it `tableId`



##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/TruncateHoodieTableCommand.scala:
##########
@@ -18,115 +18,83 @@
 package org.apache.spark.sql.hudi.command
 
 import org.apache.hadoop.fs.Path
+import org.apache.hudi.HoodieSparkSqlWriter
+import org.apache.hudi.client.common.HoodieSparkEngineContext
 import org.apache.hudi.common.fs.FSUtils
 import org.apache.hudi.common.table.HoodieTableMetaClient
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
-import org.apache.spark.sql.catalyst.catalog.{CatalogStatistics, CatalogTableType, HoodieCatalogTable}
+import org.apache.spark.sql.catalyst.catalog.{CatalogTableType, HoodieCatalogTable}
 import org.apache.spark.sql.hudi.HoodieSqlCommonUtils.{getPartitionPathToDrop, normalizePartitionSpec}
-import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
-
-import scala.util.control.NonFatal
+import org.apache.spark.sql.hudi.ProvidesHoodieConfig
+import org.apache.spark.sql.{AnalysisException, Row, SaveMode, SparkSession}
 
 /**
  * Command for truncate hudi table.
  */
 case class TruncateHoodieTableCommand(
    tableIdentifier: TableIdentifier,
-   partitionSpec: Option[TablePartitionSpec])
-  extends HoodieLeafRunnableCommand {
+   specs: Option[TablePartitionSpec])

Review Comment:
   Why plural? I think previous name was more eloquent 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@hudi.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org