You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2022/12/05 01:51:34 UTC
[impala] 02/02: IMPALA-11767: Ignore exceptions for invalid paths in Hudi search
This is an automated email from the ASF dual-hosted git repository.
stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 2e6bbbba7680fe8aba425ed2eb5d861b4b3a371f
Author: Michael Smith <mi...@cloudera.com>
AuthorDate: Fri Dec 2 09:33:37 2022 -0800
IMPALA-11767: Ignore exceptions for invalid paths in Hudi search
When testing potential paths for a Hudi table, Hudi starts from the
filesystem root and looks for a `.hoodie` file. With Ozone,
`ofs:///.hoodie` and `ofs:///<volume>/.hoodie` are invalid paths, and
after HDDS-7132 an exception will be thrown testing for existence of
those paths. This caused
query_test/test_scanners.py::TestHudiParquet::test_hudiparquet to fail
with Ozone 1.3.0 prerelease builds.
Log the exception and continue searching.
Testing: ran test_hudiparquet with Ozone 1.3.0 prerelease build
Change-Id: Iecc572ce78b13f7ddc177ad371fcf3048241620d
Reviewed-on: http://gerrit.cloudera.org:8080/19312
Reviewed-by: Andrew Sherman <as...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
fe/src/main/java/org/apache/impala/util/HudiUtil.java | 16 +++++++++++++++-
1 file changed, 15 insertions(+), 1 deletion(-)
diff --git a/fe/src/main/java/org/apache/impala/util/HudiUtil.java b/fe/src/main/java/org/apache/impala/util/HudiUtil.java
index 5d6750f5b..45c0c61a2 100644
--- a/fe/src/main/java/org/apache/impala/util/HudiUtil.java
+++ b/fe/src/main/java/org/apache/impala/util/HudiUtil.java
@@ -17,12 +17,17 @@
package org.apache.impala.util;
import org.apache.hadoop.fs.FileStatus;
+import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.hadoop.HoodieROTablePathFilter;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.List;
public class HudiUtil {
+ private final static Logger LOG = LoggerFactory.getLogger(HudiUtil.class);
+
/**
* This method will remove invalid FileStatus from the list based on Hudi's timestamp,
* and return a list of file status contains only the latest version parquet files.
@@ -32,7 +37,16 @@ public class HudiUtil {
public static List<FileStatus> filterFilesForHudiROPath(List<FileStatus> stats) {
List<FileStatus> validStats = new ArrayList<>(stats);
HoodieROTablePathFilter hudiFilter = new HoodieROTablePathFilter();
- validStats.removeIf(f -> !hudiFilter.accept(f.getPath()));
+ validStats.removeIf(f -> {
+ try {
+ // Ozone can throw an exception if it considers the path invalid. Since we're
+ // searching for a valid path, log the exception and continue.
+ return !hudiFilter.accept(f.getPath());
+ } catch (HoodieException e) {
+ LOG.debug("Unable to check Hudi path {}", f.getPath(), e);
+ return false;
+ }
+ });
return validStats;
}
}