You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by fo...@apache.org on 2023/02/11 02:33:44 UTC

[hudi] 17/20: [HUDI-4968] Update misleading read.streaming.skip_compaction/skip_clustering config (#6856)

This is an automated email from the ASF dual-hosted git repository.

forwardxu pushed a commit to branch release-0.12.1
in repository https://gitbox.apache.org/repos/asf/hudi.git

commit 108c6afd308421f79ead6fc2524704db6b33161e
Author: voonhous <vo...@gmail.com>
AuthorDate: Wed Feb 8 14:40:55 2023 +0800

    [HUDI-4968] Update misleading read.streaming.skip_compaction/skip_clustering config (#6856)
    
    (cherry picked from commit 0dbc3450a95ae84985ecfccee76afc9c2d64b536)
---
 .../org/apache/hudi/configuration/FlinkOptions.java     | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java
index b885791daeb..e5f11444452 100644
--- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java
@@ -27,6 +27,8 @@ import org.apache.hudi.common.model.HoodieCleaningPolicy;
 import org.apache.hudi.common.model.HoodiePayloadProps;
 import org.apache.hudi.common.model.HoodieTableType;
 import org.apache.hudi.common.model.WriteOperationType;
+import org.apache.hudi.config.HoodieClusteringConfig;
+import org.apache.hudi.config.HoodieCompactionConfig;
 import org.apache.hudi.config.HoodieIndexConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieException;
@@ -273,10 +275,10 @@ public class FlinkOptions extends HoodieConfig {
       .key("read.streaming.skip_compaction")
       .booleanType()
       .defaultValue(false)// default read as batch
-      .withDescription("Whether to skip compaction instants for streaming read,\n"
-          + "there are two cases that this option can be used to avoid reading duplicates:\n"
-          + "1) you are definitely sure that the consumer reads faster than any compaction instants, "
-          + "usually with delta time compaction strategy that is long enough, for e.g, one week;\n"
+      .withDescription("Whether to skip compaction instants and avoid reading compacted base files for streaming read to improve read performance.\n"
+          + "There are two cases that this option can be used to avoid reading duplicates:\n"
+          + "1) you are definitely sure that the consumer reads [faster than/completes before] any compaction instants "
+          + "when " + HoodieCompactionConfig.PRESERVE_COMMIT_METADATA.key() + " is set to false.\n"
           + "2) changelog mode is enabled, this option is a solution to keep data integrity");
 
   // this option is experimental
@@ -284,8 +286,11 @@ public class FlinkOptions extends HoodieConfig {
       .key("read.streaming.skip_clustering")
       .booleanType()
       .defaultValue(false)
-      .withDescription("Whether to skip clustering instants for streaming read,\n"
-          + "to avoid reading duplicates");
+      .withDescription("Whether to skip clustering instants to avoid reading base files of clustering operations for streaming read "
+          + "to improve read performance.\n"
+          + "This option toggled to true to avoid duplicates when: \n"
+          + "1) you are definitely sure that the consumer reads [faster than/completes before] any clustering instants "
+          + "when " + HoodieClusteringConfig.PRESERVE_COMMIT_METADATA.key() + " is set to false.\n");
 
   public static final String START_COMMIT_EARLIEST = "earliest";
   public static final ConfigOption<String> READ_START_COMMIT = ConfigOptions