You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2022/12/05 01:51:34 UTC

[impala] 02/02: IMPALA-11767: Ignore exceptions for invalid paths in Hudi search

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 2e6bbbba7680fe8aba425ed2eb5d861b4b3a371f
Author: Michael Smith <mi...@cloudera.com>
AuthorDate: Fri Dec 2 09:33:37 2022 -0800

    IMPALA-11767: Ignore exceptions for invalid paths in Hudi search
    
    When testing potential paths for a Hudi table, Hudi starts from the
    filesystem root and looks for a `.hoodie` file. With Ozone,
    `ofs:///.hoodie` and `ofs:///<volume>/.hoodie` are invalid paths, and
    after HDDS-7132 an exception will be thrown testing for existence of
    those paths. This caused
    query_test/test_scanners.py::TestHudiParquet::test_hudiparquet to fail
    with Ozone 1.3.0 prerelease builds.
    
    Log the exception and continue searching.
    
    Testing: ran test_hudiparquet with Ozone 1.3.0 prerelease build
    
    Change-Id: Iecc572ce78b13f7ddc177ad371fcf3048241620d
    Reviewed-on: http://gerrit.cloudera.org:8080/19312
    Reviewed-by: Andrew Sherman <as...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 fe/src/main/java/org/apache/impala/util/HudiUtil.java | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/fe/src/main/java/org/apache/impala/util/HudiUtil.java b/fe/src/main/java/org/apache/impala/util/HudiUtil.java
index 5d6750f5b..45c0c61a2 100644
--- a/fe/src/main/java/org/apache/impala/util/HudiUtil.java
+++ b/fe/src/main/java/org/apache/impala/util/HudiUtil.java
@@ -17,12 +17,17 @@
 package org.apache.impala.util;
 
 import org.apache.hadoop.fs.FileStatus;
+import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.hadoop.HoodieROTablePathFilter;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.util.ArrayList;
 import java.util.List;
 
 public class HudiUtil {
+  private final static Logger LOG = LoggerFactory.getLogger(HudiUtil.class);
+
   /**
    * This method will remove invalid FileStatus from the list based on Hudi's timestamp,
    * and return a list of file status contains only the latest version parquet files.
@@ -32,7 +37,16 @@ public class HudiUtil {
   public static List<FileStatus> filterFilesForHudiROPath(List<FileStatus> stats) {
     List<FileStatus> validStats = new ArrayList<>(stats);
     HoodieROTablePathFilter hudiFilter = new HoodieROTablePathFilter();
-    validStats.removeIf(f -> !hudiFilter.accept(f.getPath()));
+    validStats.removeIf(f -> {
+      try {
+        // Ozone can throw an exception if it considers the path invalid. Since we're
+        // searching for a valid path, log the exception and continue.
+        return !hudiFilter.accept(f.getPath());
+      } catch (HoodieException e) {
+        LOG.debug("Unable to check Hudi path {}", f.getPath(), e);
+        return false;
+      }
+    });
     return validStats;
   }
 }