You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by co...@apache.org on 2023/04/14 01:48:56 UTC
[hudi] branch master updated: [HUDI-6048] Check if partition exists before list partition by path prefix (#8402)

This is an automated email from the ASF dual-hosted git repository.

codope pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 657b837aaa6 [HUDI-6048] Check if partition exists before list partition by path prefix (#8402)
657b837aaa6 is described below

commit 657b837aaa6fa825945625579c52ff7365b1ecfd
Author: Zouxxyy <zo...@alibaba-inc.com>
AuthorDate: Fri Apr 14 09:48:48 2023 +0800

    [HUDI-6048] Check if partition exists before list partition by path prefix (#8402)
---
 .../src/main/scala/org/apache/hudi/SparkHoodieTableFileIndex.scala | 4 +++-
 .../src/test/scala/org/apache/hudi/TestHoodieFileIndex.scala       | 7 ++++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkHoodieTableFileIndex.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkHoodieTableFileIndex.scala
index a9a20057795..6459c967c56 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkHoodieTableFileIndex.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkHoodieTableFileIndex.scala
@@ -300,7 +300,9 @@ class SparkHoodieTableFileIndex(spark: SparkSession,
       // prefix to try to reduce the scope of the required file-listing
       val relativePartitionPathPrefix = composeRelativePartitionPath(staticPartitionColumnNameValuePairs)
 
-      if (staticPartitionColumnNameValuePairs.length == partitionColumnNames.length) {
+      if (!metaClient.getFs.exists(new Path(getBasePath, relativePartitionPathPrefix))) {
+        Seq()
+      } else if (staticPartitionColumnNameValuePairs.length == partitionColumnNames.length) {
         // In case composed partition path is complete, we can return it directly avoiding extra listing operation
         Seq(new PartitionPath(relativePartitionPathPrefix, staticPartitionColumnNameValuePairs.map(_._2._2.asInstanceOf[AnyRef]).toArray))
       } else {
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieFileIndex.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieFileIndex.scala
index e69819fb6f4..ed73940186d 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieFileIndex.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieFileIndex.scala
@@ -519,7 +519,12 @@ class TestHoodieFileIndex extends HoodieSparkClientTestBase with ScalaAssertionS
         EqualTo(attribute("region_code"), literal("1"))),
         "dt = '2023/01/01' and region_code = '1'",
         enablePartitionPathPrefixAnalysis,
-        Seq(("1", "2023/01/01")))
+        Seq(("1", "2023/01/01"))),
+      // no partition matched
+      (Seq(EqualTo(attribute("region_code"), literal("0"))),
+        "region_code = '0'",
+        enablePartitionPathPrefixAnalysis,
+        Seq())
     )
 
     testCases.foreach(testCase => {