You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by yi...@apache.org on 2023/01/30 04:56:40 UTC

[hudi] 07/19: [HUDI-5629] Clean CDC log files for enable/disable scenario (#7767)

This is an automated email from the ASF dual-hosted git repository.

yihua pushed a commit to branch release-0.13.0
in repository https://gitbox.apache.org/repos/asf/hudi.git

commit 281b29eb7b3e6a48a0598f6b01b5148fe4afc541
Author: Yann Byron <bi...@gmail.com>
AuthorDate: Sat Jan 28 14:24:33 2023 +0800

    [HUDI-5629] Clean CDC log files for enable/disable scenario (#7767)
---
 .../hudi/table/action/clean/CleanPlanner.java      | 26 +++++-----------------
 1 file changed, 6 insertions(+), 20 deletions(-)

diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java
index 982800cc246..c6ff62ee764 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java
@@ -29,10 +29,8 @@ import org.apache.hudi.common.model.HoodieCleaningPolicy;
 import org.apache.hudi.common.model.HoodieCommitMetadata;
 import org.apache.hudi.common.model.HoodieFileGroup;
 import org.apache.hudi.common.model.HoodieFileGroupId;
-import org.apache.hudi.common.model.HoodieLogFile;
 import org.apache.hudi.common.model.HoodieReplaceCommitMetadata;
 import org.apache.hudi.common.model.HoodieTableType;
-import org.apache.hudi.common.table.cdc.HoodieCDCUtils;
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
@@ -62,7 +60,6 @@ import java.util.Date;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
-import java.util.function.Predicate;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
@@ -436,23 +433,12 @@ public class CleanPlanner<T, I, K, O> implements Serializable {
         cleanPaths.add(new CleanFileInfo(dataFile.getBootstrapBaseFile().get().getPath(), true));
       }
     }
-    if (hoodieTable.getMetaClient().getTableType() == HoodieTableType.MERGE_ON_READ) {
-      // If merge on read, then clean the log files for the commits as well
-      Predicate<HoodieLogFile> notCDCLogFile =
-          hoodieLogFile -> !hoodieLogFile.getFileName().endsWith(HoodieCDCUtils.CDC_LOGFILE_SUFFIX);
-      cleanPaths.addAll(
-          nextSlice.getLogFiles().filter(notCDCLogFile).map(lf -> new CleanFileInfo(lf.getPath().toString(), false))
-              .collect(Collectors.toList()));
-    }
-    if (hoodieTable.getMetaClient().getTableConfig().isCDCEnabled()) {
-      // The cdc log files will be written out in cdc scenario, no matter the table type is mor or cow.
-      // Here we need to clean uo these cdc log files.
-      Predicate<HoodieLogFile> isCDCLogFile =
-          hoodieLogFile -> hoodieLogFile.getFileName().endsWith(HoodieCDCUtils.CDC_LOGFILE_SUFFIX);
-      cleanPaths.addAll(
-          nextSlice.getLogFiles().filter(isCDCLogFile).map(lf -> new CleanFileInfo(lf.getPath().toString(), false))
-              .collect(Collectors.toList()));
-    }
+
+    // clean the log files for the commits, which contain cdc log files in cdc scenario
+    // and normal log files for mor tables.
+    cleanPaths.addAll(
+        nextSlice.getLogFiles().map(lf -> new CleanFileInfo(lf.getPath().toString(), false))
+            .collect(Collectors.toList()));
     return cleanPaths;
   }