You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by xu...@apache.org on 2022/04/14 08:49:08 UTC

[hudi] branch master updated: [HUDI-3845] Fix delete mor table's partition with urlencode's error (#5282)

This is an automated email from the ASF dual-hosted git repository.

xushiyan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 6621f3cdbb [HUDI-3845] Fix delete mor table's partition with urlencode's error (#5282)
6621f3cdbb is described below

commit 6621f3cdbba1d913a4bc7f534c4837d010f45fd9
Author: ForwardXu <fo...@gmail.com>
AuthorDate: Thu Apr 14 16:49:00 2022 +0800

    [HUDI-3845] Fix delete mor table's partition with urlencode's error (#5282)
---
 .../org/apache/hudi/HoodieMergeOnReadRDD.scala     |  2 +-
 .../apache/spark/sql/hudi/TestDeleteTable.scala    | 47 ++++++++++++++++++++++
 2 files changed, 48 insertions(+), 1 deletion(-)

diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala
index c0c47cff42..a7ca60865f 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala
@@ -345,7 +345,7 @@ private object HoodieMergeOnReadRDD {
       val logRecordScannerBuilder = HoodieMergedLogRecordScanner.newBuilder()
         .withFileSystem(fs)
         .withBasePath(tablePath)
-        .withLogFilePaths(logFiles.map(logFile => getFilePath(logFile.getPath)).asJava)
+        .withLogFilePaths(logFiles.map(logFile => logFile.getPath.toString).asJava)
         .withReaderSchema(logSchema)
         .withLatestInstantTime(tableState.latestCommitTimestamp)
         .withReadBlocksLazily(
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestDeleteTable.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestDeleteTable.scala
index 9c693f9626..b2e888a5f3 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestDeleteTable.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestDeleteTable.scala
@@ -17,6 +17,11 @@
 
 package org.apache.spark.sql.hudi
 
+import org.apache.hudi.DataSourceWriteOptions._
+import org.apache.hudi.config.HoodieWriteConfig
+import org.apache.hudi.keygen.SimpleKeyGenerator
+import org.apache.spark.sql.SaveMode
+
 class TestDeleteTable extends TestHoodieSqlBase {
 
   test("Test Delete Table") {
@@ -198,4 +203,46 @@ class TestDeleteTable extends TestHoodieSqlBase {
       }
     }
   }
+
+  Seq(false, true).foreach { urlencode =>
+    test(s"Test Delete single-partition table' partitions, urlencode: $urlencode") {
+      withTempDir { tmp =>
+        val tableName = generateTableName
+        val tablePath = s"${tmp.getCanonicalPath}/$tableName"
+
+        import spark.implicits._
+        val df = Seq((1, "z3", "v1", "2021/10/01"), (2, "l4", "v1", "2021/10/02"))
+          .toDF("id", "name", "ts", "dt")
+
+        df.write.format("hudi")
+          .option(HoodieWriteConfig.TBL_NAME.key, tableName)
+          .option(TABLE_TYPE.key, MOR_TABLE_TYPE_OPT_VAL)
+          .option(RECORDKEY_FIELD.key, "id")
+          .option(PRECOMBINE_FIELD.key, "ts")
+          .option(PARTITIONPATH_FIELD.key, "dt")
+          .option(URL_ENCODE_PARTITIONING.key(), urlencode)
+          .option(KEYGENERATOR_CLASS_NAME.key, classOf[SimpleKeyGenerator].getName)
+          .option(HoodieWriteConfig.INSERT_PARALLELISM_VALUE.key, "1")
+          .option(HoodieWriteConfig.UPSERT_PARALLELISM_VALUE.key, "1")
+          .mode(SaveMode.Overwrite)
+          .save(tablePath)
+
+        // register meta to spark catalog by creating table
+        spark.sql(
+          s"""
+             |create table $tableName using hudi
+             |location '$tablePath'
+             |""".stripMargin)
+
+        // delete 2021-10-01 partition
+        if (urlencode) {
+          spark.sql(s"""delete from $tableName where dt="2021/10/01"""")
+        } else {
+          spark.sql(s"delete from $tableName where dt='2021/10/01'")
+        }
+
+        checkAnswer(s"select dt from $tableName")(Seq(s"2021/10/02"))
+      }
+    }
+  }
 }