You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by si...@apache.org on 2020/05/28 00:19:05 UTC
[hudi] 22/40: [HUDI-795] Handle auto-deleted empty aux folder
(#1515)
This is an automated email from the ASF dual-hosted git repository.
sivabalan pushed a commit to branch release-0.5.3
in repository https://gitbox.apache.org/repos/asf/hudi.git
commit 500b3907a5057b3945115132beb97ef6cae37687
Author: Alexander Filipchik <af...@gmail.com>
AuthorDate: Wed Apr 22 09:47:32 2020 -0700
[HUDI-795] Handle auto-deleted empty aux folder (#1515)
Co-authored-by: Alex Filipchik <al...@csscompany.com>
---
.../apache/hudi/table/HoodieCommitArchiveLog.java | 24 +++++++++++++++++++---
1 file changed, 21 insertions(+), 3 deletions(-)
diff --git a/hudi-client/src/main/java/org/apache/hudi/table/HoodieCommitArchiveLog.java b/hudi-client/src/main/java/org/apache/hudi/table/HoodieCommitArchiveLog.java
index fcc9673..aa2465d 100644
--- a/hudi-client/src/main/java/org/apache/hudi/table/HoodieCommitArchiveLog.java
+++ b/hudi-client/src/main/java/org/apache/hudi/table/HoodieCommitArchiveLog.java
@@ -55,6 +55,7 @@ import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaSparkContext;
+import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
@@ -219,14 +220,31 @@ public class HoodieCommitArchiveLog {
* @throws IOException in case of error
*/
private boolean deleteAllInstantsOlderorEqualsInAuxMetaFolder(HoodieInstant thresholdInstant) throws IOException {
- List<HoodieInstant> instants = metaClient.scanHoodieInstantsFromFileSystem(
- new Path(metaClient.getMetaAuxiliaryPath()), HoodieActiveTimeline.VALID_EXTENSIONS_IN_ACTIVE_TIMELINE, false);
+ List<HoodieInstant> instants = null;
+ boolean success = true;
+ try {
+ instants =
+ metaClient.scanHoodieInstantsFromFileSystem(
+ new Path(metaClient.getMetaAuxiliaryPath()),
+ HoodieActiveTimeline.VALID_EXTENSIONS_IN_ACTIVE_TIMELINE,
+ false);
+ } catch (FileNotFoundException e) {
+ /*
+ * On some FSs deletion of all files in the directory can auto remove the directory itself.
+ * GCS is one example, as it doesn't have real directories and subdirectories. When client
+ * removes all the files from a "folder" on GCS is has to create a special "/" to keep the folder
+ * around. If this doesn't happen (timeout, misconfigured client, ...) folder will be deleted and
+ * in this case we should not break when aux folder is not found.
+ * GCS information: (https://cloud.google.com/storage/docs/gsutil/addlhelp/HowSubdirectoriesWork)
+ */
+ LOG.warn("Aux path not found. Skipping: " + metaClient.getMetaAuxiliaryPath());
+ return success;
+ }
List<HoodieInstant> instantsToBeDeleted =
instants.stream().filter(instant1 -> HoodieTimeline.compareTimestamps(instant1.getTimestamp(),
thresholdInstant.getTimestamp(), HoodieTimeline.LESSER_OR_EQUAL)).collect(Collectors.toList());
- boolean success = true;
for (HoodieInstant deleteInstant : instantsToBeDeleted) {
LOG.info("Deleting instant " + deleteInstant + " in auxiliary meta path " + metaClient.getMetaAuxiliaryPath());
Path metaFile = new Path(metaClient.getMetaAuxiliaryPath(), deleteInstant.getFileName());