You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by GitBox <gi...@apache.org> on 2022/04/13 22:21:30 UTC
[GitHub] [hudi] alexeykudinkin commented on a diff in pull request #5272: [HUDI-3826] Commands deleting partitions do so incorrectly
alexeykudinkin commented on code in PR #5272:
URL: https://github.com/apache/hudi/pull/5272#discussion_r849935685
##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/AlterHoodieTableDropPartitionCommand.scala:
##########
@@ -57,6 +55,7 @@ case class AlterHoodieTableDropPartitionCommand(
sparkSession.sessionState.conf.resolver)
}
+ // drop partitions to lazy clean
Review Comment:
Can you please expand on that comment? I get the general direction from it but it isn't clear what it holistically is referring to
##########
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestAlterTableDropPartition.scala:
##########
@@ -265,53 +187,4 @@ class TestAlterTableDropPartition extends TestHoodieSqlBase {
}
}
}
-
- Seq(false, true).foreach { hiveStyle =>
- test(s"Purge drop multi-level partitioned table's partitions, isHiveStylePartitioning: $hiveStyle") {
Review Comment:
Let's make sure there's a ticket for this
##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/TruncateHoodieTableCommand.scala:
##########
@@ -18,115 +18,83 @@
package org.apache.spark.sql.hudi.command
import org.apache.hadoop.fs.Path
+import org.apache.hudi.HoodieSparkSqlWriter
+import org.apache.hudi.client.common.HoodieSparkEngineContext
import org.apache.hudi.common.fs.FSUtils
import org.apache.hudi.common.table.HoodieTableMetaClient
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
-import org.apache.spark.sql.catalyst.catalog.{CatalogStatistics, CatalogTableType, HoodieCatalogTable}
+import org.apache.spark.sql.catalyst.catalog.{CatalogTableType, HoodieCatalogTable}
import org.apache.spark.sql.hudi.HoodieSqlCommonUtils.{getPartitionPathToDrop, normalizePartitionSpec}
-import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
-
-import scala.util.control.NonFatal
+import org.apache.spark.sql.hudi.ProvidesHoodieConfig
+import org.apache.spark.sql.{AnalysisException, Row, SaveMode, SparkSession}
/**
* Command for truncate hudi table.
*/
case class TruncateHoodieTableCommand(
tableIdentifier: TableIdentifier,
- partitionSpec: Option[TablePartitionSpec])
- extends HoodieLeafRunnableCommand {
+ specs: Option[TablePartitionSpec])
+ extends HoodieLeafRunnableCommand with ProvidesHoodieConfig {
- override def run(spark: SparkSession): Seq[Row] = {
+ override def run(sparkSession: SparkSession): Seq[Row] = {
val fullTableName = s"${tableIdentifier.database}.${tableIdentifier.table}"
logInfo(s"start execute truncate table command for $fullTableName")
- val hoodieCatalogTable = HoodieCatalogTable(spark, tableIdentifier)
- val properties = hoodieCatalogTable.tableConfig.getProps
-
- try {
- // Delete all data in the table directory
- val catalog = spark.sessionState.catalog
- val table = catalog.getTableMetadata(tableIdentifier)
- val tableIdentWithDB = table.identifier.quotedString
-
- if (table.tableType == CatalogTableType.VIEW) {
- throw new AnalysisException(
- s"Operation not allowed: TRUNCATE TABLE on views: $tableIdentWithDB")
- }
-
- if (table.partitionColumnNames.isEmpty && partitionSpec.isDefined) {
- throw new AnalysisException(
- s"Operation not allowed: TRUNCATE TABLE ... PARTITION is not supported " +
- s"for tables that are not partitioned: $tableIdentWithDB")
- }
+ val hoodieCatalogTable = HoodieCatalogTable(sparkSession, tableIdentifier)
- val basePath = hoodieCatalogTable.tableLocation
- val partCols = table.partitionColumnNames
- val locations = if (partitionSpec.isEmpty || partCols.isEmpty) {
- Seq(basePath)
- } else {
- val normalizedSpec: Seq[Map[String, String]] = Seq(partitionSpec.map { spec =>
- normalizePartitionSpec(
- spec,
- partCols,
- table.identifier.quotedString,
- spark.sessionState.conf.resolver)
- }.get)
+ val catalog = sparkSession.sessionState.catalog
+ val table = catalog.getTableMetadata(tableIdentifier)
+ val tableIdentWithDB = table.identifier.quotedString
Review Comment:
I'd suggest uncommon shortening, you can just call it `tableId`
##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/TruncateHoodieTableCommand.scala:
##########
@@ -18,115 +18,83 @@
package org.apache.spark.sql.hudi.command
import org.apache.hadoop.fs.Path
+import org.apache.hudi.HoodieSparkSqlWriter
+import org.apache.hudi.client.common.HoodieSparkEngineContext
import org.apache.hudi.common.fs.FSUtils
import org.apache.hudi.common.table.HoodieTableMetaClient
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
-import org.apache.spark.sql.catalyst.catalog.{CatalogStatistics, CatalogTableType, HoodieCatalogTable}
+import org.apache.spark.sql.catalyst.catalog.{CatalogTableType, HoodieCatalogTable}
import org.apache.spark.sql.hudi.HoodieSqlCommonUtils.{getPartitionPathToDrop, normalizePartitionSpec}
-import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
-
-import scala.util.control.NonFatal
+import org.apache.spark.sql.hudi.ProvidesHoodieConfig
+import org.apache.spark.sql.{AnalysisException, Row, SaveMode, SparkSession}
/**
* Command for truncate hudi table.
*/
case class TruncateHoodieTableCommand(
tableIdentifier: TableIdentifier,
- partitionSpec: Option[TablePartitionSpec])
- extends HoodieLeafRunnableCommand {
+ specs: Option[TablePartitionSpec])
Review Comment:
Why plural? I think previous name was more eloquent
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@hudi.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org