You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by co...@apache.org on 2023/04/14 01:48:56 UTC
[hudi] branch master updated: [HUDI-6048] Check if partition exists before list partition by path prefix (#8402)
This is an automated email from the ASF dual-hosted git repository.
codope pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 657b837aaa6 [HUDI-6048] Check if partition exists before list partition by path prefix (#8402)
657b837aaa6 is described below
commit 657b837aaa6fa825945625579c52ff7365b1ecfd
Author: Zouxxyy <zo...@alibaba-inc.com>
AuthorDate: Fri Apr 14 09:48:48 2023 +0800
[HUDI-6048] Check if partition exists before list partition by path prefix (#8402)
---
.../src/main/scala/org/apache/hudi/SparkHoodieTableFileIndex.scala | 4 +++-
.../src/test/scala/org/apache/hudi/TestHoodieFileIndex.scala | 7 ++++++-
2 files changed, 9 insertions(+), 2 deletions(-)
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkHoodieTableFileIndex.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkHoodieTableFileIndex.scala
index a9a20057795..6459c967c56 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkHoodieTableFileIndex.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkHoodieTableFileIndex.scala
@@ -300,7 +300,9 @@ class SparkHoodieTableFileIndex(spark: SparkSession,
// prefix to try to reduce the scope of the required file-listing
val relativePartitionPathPrefix = composeRelativePartitionPath(staticPartitionColumnNameValuePairs)
- if (staticPartitionColumnNameValuePairs.length == partitionColumnNames.length) {
+ if (!metaClient.getFs.exists(new Path(getBasePath, relativePartitionPathPrefix))) {
+ Seq()
+ } else if (staticPartitionColumnNameValuePairs.length == partitionColumnNames.length) {
// In case composed partition path is complete, we can return it directly avoiding extra listing operation
Seq(new PartitionPath(relativePartitionPathPrefix, staticPartitionColumnNameValuePairs.map(_._2._2.asInstanceOf[AnyRef]).toArray))
} else {
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieFileIndex.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieFileIndex.scala
index e69819fb6f4..ed73940186d 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieFileIndex.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieFileIndex.scala
@@ -519,7 +519,12 @@ class TestHoodieFileIndex extends HoodieSparkClientTestBase with ScalaAssertionS
EqualTo(attribute("region_code"), literal("1"))),
"dt = '2023/01/01' and region_code = '1'",
enablePartitionPathPrefixAnalysis,
- Seq(("1", "2023/01/01")))
+ Seq(("1", "2023/01/01"))),
+ // no partition matched
+ (Seq(EqualTo(attribute("region_code"), literal("0"))),
+ "region_code = '0'",
+ enablePartitionPathPrefixAnalysis,
+ Seq())
)
testCases.foreach(testCase => {