You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by yi...@apache.org on 2023/01/30 04:56:40 UTC
[hudi] 07/19: [HUDI-5629] Clean CDC log files for enable/disable scenario (#7767)
This is an automated email from the ASF dual-hosted git repository.
yihua pushed a commit to branch release-0.13.0
in repository https://gitbox.apache.org/repos/asf/hudi.git
commit 281b29eb7b3e6a48a0598f6b01b5148fe4afc541
Author: Yann Byron <bi...@gmail.com>
AuthorDate: Sat Jan 28 14:24:33 2023 +0800
[HUDI-5629] Clean CDC log files for enable/disable scenario (#7767)
---
.../hudi/table/action/clean/CleanPlanner.java | 26 +++++-----------------
1 file changed, 6 insertions(+), 20 deletions(-)
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java
index 982800cc246..c6ff62ee764 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java
@@ -29,10 +29,8 @@ import org.apache.hudi.common.model.HoodieCleaningPolicy;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieFileGroup;
import org.apache.hudi.common.model.HoodieFileGroupId;
-import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.model.HoodieReplaceCommitMetadata;
import org.apache.hudi.common.model.HoodieTableType;
-import org.apache.hudi.common.table.cdc.HoodieCDCUtils;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
@@ -62,7 +60,6 @@ import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
-import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@@ -436,23 +433,12 @@ public class CleanPlanner<T, I, K, O> implements Serializable {
cleanPaths.add(new CleanFileInfo(dataFile.getBootstrapBaseFile().get().getPath(), true));
}
}
- if (hoodieTable.getMetaClient().getTableType() == HoodieTableType.MERGE_ON_READ) {
- // If merge on read, then clean the log files for the commits as well
- Predicate<HoodieLogFile> notCDCLogFile =
- hoodieLogFile -> !hoodieLogFile.getFileName().endsWith(HoodieCDCUtils.CDC_LOGFILE_SUFFIX);
- cleanPaths.addAll(
- nextSlice.getLogFiles().filter(notCDCLogFile).map(lf -> new CleanFileInfo(lf.getPath().toString(), false))
- .collect(Collectors.toList()));
- }
- if (hoodieTable.getMetaClient().getTableConfig().isCDCEnabled()) {
- // The cdc log files will be written out in cdc scenario, no matter the table type is mor or cow.
- // Here we need to clean uo these cdc log files.
- Predicate<HoodieLogFile> isCDCLogFile =
- hoodieLogFile -> hoodieLogFile.getFileName().endsWith(HoodieCDCUtils.CDC_LOGFILE_SUFFIX);
- cleanPaths.addAll(
- nextSlice.getLogFiles().filter(isCDCLogFile).map(lf -> new CleanFileInfo(lf.getPath().toString(), false))
- .collect(Collectors.toList()));
- }
+
+ // clean the log files for the commits, which contain cdc log files in cdc scenario
+ // and normal log files for mor tables.
+ cleanPaths.addAll(
+ nextSlice.getLogFiles().map(lf -> new CleanFileInfo(lf.getPath().toString(), false))
+ .collect(Collectors.toList()));
return cleanPaths;
}