You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ay...@apache.org on 2022/02/22 14:14:19 UTC

[hive] branch master updated: HIVE-25960: Fix S3a recursive listing logic. (#3031). (Ayush Saxena reviewed by Laszlo Bodor)

This is an automated email from the ASF dual-hosted git repository.

ayushsaxena pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 756a8fc  HIVE-25960: Fix S3a recursive listing logic. (#3031). (Ayush Saxena reviewed by Laszlo Bodor)
756a8fc is described below

commit 756a8fce9f75e139fd7c5cdc45ff0eb629ad9504
Author: Ayush Saxena <ay...@apache.org>
AuthorDate: Tue Feb 22 19:44:02 2022 +0530

    HIVE-25960: Fix S3a recursive listing logic. (#3031). (Ayush Saxena reviewed by Laszlo Bodor)
---
 .../java/org/apache/hadoop/hive/common/FileUtils.java   | 16 +++++++++++++++-
 .../org/apache/hadoop/hive/common/TestFileUtils.java    | 17 +++++++++++++++++
 .../apache/hadoop/hive/metastore/utils/FileUtils.java   | 16 +++++++++++++++-
 3 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
index b617623..e92b700 100644
--- a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
+++ b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
@@ -361,13 +361,27 @@ public final class FileUtils {
     RemoteIterator<LocatedFileStatus> remoteIterator = fs.listFiles(base.getPath(), true);
     while (remoteIterator.hasNext()) {
       LocatedFileStatus each = remoteIterator.next();
-      Path relativePath = new Path(each.getPath().toString().replace(base.toString(), ""));
+      Path relativePath = makeRelative(base.getPath(), each.getPath());
       if (org.apache.hadoop.hive.metastore.utils.FileUtils.RemoteIteratorWithFilter.HIDDEN_FILES_FULL_PATH_FILTER.accept(relativePath)) {
         results.add(each);
       }
     }
   }
 
+  /**
+   * Returns a relative path wrt the parent path.
+   * @param parentPath the parent path.
+   * @param childPath the child path.
+   * @return childPath relative to parent path.
+   */
+  public static Path makeRelative(Path parentPath, Path childPath) {
+    String parentString =
+        parentPath.toString().endsWith(Path.SEPARATOR) ? parentPath.toString() : parentPath.toString() + Path.SEPARATOR;
+    String childString =
+        childPath.toString().endsWith(Path.SEPARATOR) ? childPath.toString() : childPath.toString() + Path.SEPARATOR;
+    return new Path(childString.replaceFirst(parentString, ""));
+  }
+
   public static boolean isS3a(FileSystem fs) {
     try {
       return "s3a".equalsIgnoreCase(fs.getScheme());
diff --git a/common/src/test/org/apache/hadoop/hive/common/TestFileUtils.java b/common/src/test/org/apache/hadoop/hive/common/TestFileUtils.java
index 15e74db..4f4d604 100644
--- a/common/src/test/org/apache/hadoop/hive/common/TestFileUtils.java
+++ b/common/src/test/org/apache/hadoop/hive/common/TestFileUtils.java
@@ -264,4 +264,21 @@ public class TestFileUtils {
               equalsIgnoreCase("Distcp is called with doAsUser and delete source set as true"));
     }
   }
+
+  @Test
+  public void testMakeRelative() {
+    Path parentPath = new Path("/user/hive/database");
+    Path childPath = new Path(parentPath, "table/dir/subdir");
+    Path relativePath = FileUtils.makeRelative(parentPath, childPath);
+    assertEquals("table/dir/subdir", relativePath.toString());
+
+    // try with parent as Root.
+    relativePath = FileUtils.makeRelative(new Path(Path.SEPARATOR), childPath);
+    assertEquals("user/hive/database/table/dir/subdir", relativePath.toString());
+
+    // try with non child path, it should return the child path as is.
+    childPath = new Path("/user/hive/database1/table/dir/subdir");
+    relativePath = FileUtils.makeRelative(parentPath, childPath);
+    assertEquals(childPath.toString(), relativePath.toString());
+  }
 }
diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/FileUtils.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/FileUtils.java
index dad71b7..b8ab28b 100644
--- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/FileUtils.java
+++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/FileUtils.java
@@ -404,13 +404,27 @@ public class FileUtils {
     RemoteIterator<LocatedFileStatus> remoteIterator = fs.listFiles(base, true);
     while (remoteIterator.hasNext()) {
       LocatedFileStatus each = remoteIterator.next();
-      Path relativePath = new Path(each.getPath().toString().replace(base.toString(), ""));
+      Path relativePath = makeRelative(base, each.getPath());
       if (RemoteIteratorWithFilter.HIDDEN_FILES_FULL_PATH_FILTER.accept(relativePath)) {
         results.add(each);
       }
     }
   }
 
+  /**
+   * Returns a relative path wrt the parent path.
+   * @param parentPath the parent path.
+   * @param childPath the child path.
+   * @return childPath relative to parent path.
+   */
+  public static Path makeRelative(Path parentPath, Path childPath) {
+    String parentString =
+        parentPath.toString().endsWith(Path.SEPARATOR) ? parentPath.toString() : parentPath.toString() + Path.SEPARATOR;
+    String childString =
+        childPath.toString().endsWith(Path.SEPARATOR) ? childPath.toString() : childPath.toString() + Path.SEPARATOR;
+    return new Path(childString.replaceFirst(parentString, ""));
+  }
+
   public static boolean isS3a(FileSystem fs) {
     try {
       return "s3a".equalsIgnoreCase(fs.getScheme());