You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by GitBox <gi...@apache.org> on 2022/03/11 23:29:56 UTC

[GitHub] [hudi] nsivabalan commented on a change in pull request #4385: [HUDI-1436]: provided option to trigger clean every nth commit

nsivabalan commented on a change in pull request #4385:
URL: https://github.com/apache/hudi/pull/4385#discussion_r825189138



##########
File path: hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieCompactionConfig.java
##########
@@ -129,6 +130,17 @@
       .withDocumentation("Controls how compaction scheduling is triggered, by time or num delta commits or combination of both. "
           + "Valid options: " + Arrays.stream(CompactionTriggerStrategy.values()).map(Enum::name).collect(Collectors.joining(",")));
 
+  public static final ConfigProperty<String> CLEAN_TRIGGER_STRATEGY = ConfigProperty
+          .key("hoodie.clean.inline.trigger.strategy")

Review comment:
       can we remove "inline" from the config. this could be applicable to both inline and async cleaning.

##########
File path: hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
##########
@@ -1135,6 +1136,18 @@ public int getCleanerParallelism() {
     return getInt(HoodieCompactionConfig.CLEANER_PARALLELISM_VALUE);
   }
 
+  public int getInlineCleaningMaxCommits() {

Review comment:
       getCleaning**Min**Commits

##########
File path: hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestCleanPlanExecutor.java
##########
@@ -18,42 +18,437 @@
 
 package org.apache.hudi.table.functional;
 
+import org.apache.hudi.avro.model.HoodieFileStatus;
 import org.apache.hudi.common.HoodieCleanStat;
 import org.apache.hudi.common.config.HoodieMetadataConfig;
 import org.apache.hudi.common.model.BootstrapFileMapping;
 import org.apache.hudi.common.model.HoodieCleaningPolicy;
 import org.apache.hudi.common.model.HoodieCommitMetadata;
 import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
+import org.apache.hudi.common.model.HoodieTableType;
+import org.apache.hudi.common.model.WriteOperationType;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
+import org.apache.hudi.common.testutils.HoodieMetadataTestTable;
 import org.apache.hudi.common.testutils.HoodieTestTable;
+import org.apache.hudi.common.testutils.HoodieTestUtils;
 import org.apache.hudi.common.util.CollectionUtils;
 import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.config.HoodieCompactionConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.metadata.HoodieTableMetadataWriter;
+import org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter;
 import org.apache.hudi.table.TestCleaner;
+import org.junit.jupiter.api.Test;
 import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
 import org.junit.jupiter.params.provider.MethodSource;
+import org.junit.jupiter.params.provider.ValueSource;
 
 import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Paths;
 import java.time.Instant;
 import java.time.ZoneId;
 import java.time.ZonedDateTime;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.Date;
 import java.util.HashMap;
-import java.util.Map;
 import java.util.List;
+import java.util.Map;
 import java.util.UUID;
+import java.util.stream.Stream;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertNull;
 
+/**
+ * Tests covering different clean plan policies/strategies.
+ */
 public class TestCleanPlanExecutor extends TestCleaner {
 
+  @Test
+  public void testInvalidCleaningTriggerStrategy() {

Review comment:
       did you add any tests for the feature added in this patch.
   i.e. a test where we test different values for
   hoodie.clean.min.commits like 1, 4, 8 etc. 
   

##########
File path: hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieCompactionConfig.java
##########
@@ -129,6 +130,17 @@
       .withDocumentation("Controls how compaction scheduling is triggered, by time or num delta commits or combination of both. "
           + "Valid options: " + Arrays.stream(CompactionTriggerStrategy.values()).map(Enum::name).collect(Collectors.joining(",")));
 
+  public static final ConfigProperty<String> CLEAN_TRIGGER_STRATEGY = ConfigProperty
+          .key("hoodie.clean.inline.trigger.strategy")
+          .defaultValue(CleaningTriggerStrategy.NUM_COMMITS.name())
+          .withDocumentation("Controls how cleaning is scheduled. Valid options: "
+                  + Arrays.stream(CleaningTriggerStrategy.values()).map(Enum::name).collect(Collectors.joining(",")));
+
+  public static final ConfigProperty<String> INLINE_CLEAN_NUM_COMMITS = ConfigProperty
+          .key("hoodie.clean.inline.min.commits")

Review comment:
       same here

##########
File path: hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieCompactionConfig.java
##########
@@ -129,6 +130,17 @@
       .withDocumentation("Controls how compaction scheduling is triggered, by time or num delta commits or combination of both. "
           + "Valid options: " + Arrays.stream(CompactionTriggerStrategy.values()).map(Enum::name).collect(Collectors.joining(",")));
 
+  public static final ConfigProperty<String> CLEAN_TRIGGER_STRATEGY = ConfigProperty
+          .key("hoodie.clean.inline.trigger.strategy")
+          .defaultValue(CleaningTriggerStrategy.NUM_COMMITS.name())
+          .withDocumentation("Controls how cleaning is scheduled. Valid options: "
+                  + Arrays.stream(CleaningTriggerStrategy.values()).map(Enum::name).collect(Collectors.joining(",")));
+
+  public static final ConfigProperty<String> INLINE_CLEAN_NUM_COMMITS = ConfigProperty
+          .key("hoodie.clean.inline.min.commits")

Review comment:
       should we call this as 
   "hoodie.clean.max.commits" similar to max delta commits for compaction




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@hudi.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org