You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by si...@apache.org on 2020/05/28 00:19:05 UTC

[hudi] 22/40: [HUDI-795] Handle auto-deleted empty aux folder (#1515)

This is an automated email from the ASF dual-hosted git repository.

sivabalan pushed a commit to branch release-0.5.3
in repository https://gitbox.apache.org/repos/asf/hudi.git

commit 500b3907a5057b3945115132beb97ef6cae37687
Author: Alexander Filipchik <af...@gmail.com>
AuthorDate: Wed Apr 22 09:47:32 2020 -0700

    [HUDI-795] Handle auto-deleted empty aux folder (#1515)
    
    Co-authored-by: Alex Filipchik <al...@csscompany.com>
---
 .../apache/hudi/table/HoodieCommitArchiveLog.java  | 24 +++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/hudi-client/src/main/java/org/apache/hudi/table/HoodieCommitArchiveLog.java b/hudi-client/src/main/java/org/apache/hudi/table/HoodieCommitArchiveLog.java
index fcc9673..aa2465d 100644
--- a/hudi-client/src/main/java/org/apache/hudi/table/HoodieCommitArchiveLog.java
+++ b/hudi-client/src/main/java/org/apache/hudi/table/HoodieCommitArchiveLog.java
@@ -55,6 +55,7 @@ import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.apache.spark.api.java.JavaSparkContext;
 
+import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
@@ -219,14 +220,31 @@ public class HoodieCommitArchiveLog {
    * @throws IOException in case of error
    */
   private boolean deleteAllInstantsOlderorEqualsInAuxMetaFolder(HoodieInstant thresholdInstant) throws IOException {
-    List<HoodieInstant> instants = metaClient.scanHoodieInstantsFromFileSystem(
-        new Path(metaClient.getMetaAuxiliaryPath()), HoodieActiveTimeline.VALID_EXTENSIONS_IN_ACTIVE_TIMELINE, false);
+    List<HoodieInstant> instants = null;
+    boolean success = true;
+    try {
+      instants =
+          metaClient.scanHoodieInstantsFromFileSystem(
+              new Path(metaClient.getMetaAuxiliaryPath()),
+              HoodieActiveTimeline.VALID_EXTENSIONS_IN_ACTIVE_TIMELINE,
+              false);
+    } catch (FileNotFoundException e) {
+      /*
+       * On some FSs deletion of all files in the directory can auto remove the directory itself.
+       * GCS is one example, as it doesn't have real directories and subdirectories. When client
+       * removes all the files from a "folder" on GCS is has to create a special "/" to keep the folder
+       * around. If this doesn't happen (timeout, misconfigured client, ...) folder will be deleted and
+       * in this case we should not break when aux folder is not found.
+       * GCS information: (https://cloud.google.com/storage/docs/gsutil/addlhelp/HowSubdirectoriesWork)
+       */
+      LOG.warn("Aux path not found. Skipping: " + metaClient.getMetaAuxiliaryPath());
+      return success;
+    }
 
     List<HoodieInstant> instantsToBeDeleted =
         instants.stream().filter(instant1 -> HoodieTimeline.compareTimestamps(instant1.getTimestamp(),
             thresholdInstant.getTimestamp(), HoodieTimeline.LESSER_OR_EQUAL)).collect(Collectors.toList());
 
-    boolean success = true;
     for (HoodieInstant deleteInstant : instantsToBeDeleted) {
       LOG.info("Deleting instant " + deleteInstant + " in auxiliary meta path " + metaClient.getMetaAuxiliaryPath());
       Path metaFile = new Path(metaClient.getMetaAuxiliaryPath(), deleteInstant.getFileName());