You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by da...@apache.org on 2023/02/08 06:41:05 UTC

[hudi] branch master updated: [HUDI-4968] Update misleading read.streaming.skip_compaction/skip_clustering config (#6856)

This is an automated email from the ASF dual-hosted git repository.

danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 0dbc3450a95 [HUDI-4968] Update misleading read.streaming.skip_compaction/skip_clustering config (#6856)
0dbc3450a95 is described below

commit 0dbc3450a95ae84985ecfccee76afc9c2d64b536
Author: voonhous <vo...@gmail.com>
AuthorDate: Wed Feb 8 14:40:55 2023 +0800

    [HUDI-4968] Update misleading read.streaming.skip_compaction/skip_clustering config (#6856)
---
 .../org/apache/hudi/configuration/FlinkOptions.java     | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java
index e3ca60d1405..b9e5f43efce 100644
--- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java
+++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java
@@ -30,6 +30,8 @@ import org.apache.hudi.common.model.HoodieSyncTableStrategy;
 import org.apache.hudi.common.model.HoodieTableType;
 import org.apache.hudi.common.model.WriteOperationType;
 import org.apache.hudi.common.table.HoodieTableConfig;
+import org.apache.hudi.config.HoodieClusteringConfig;
+import org.apache.hudi.config.HoodieCompactionConfig;
 import org.apache.hudi.config.HoodieIndexConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.hive.MultiPartKeysValueExtractor;
@@ -297,10 +299,10 @@ public class FlinkOptions extends HoodieConfig {
       .key("read.streaming.skip_compaction")
       .booleanType()
       .defaultValue(false)// default read as batch
-      .withDescription("Whether to skip compaction instants for streaming read,\n"
-          + "there are two cases that this option can be used to avoid reading duplicates:\n"
-          + "1) you are definitely sure that the consumer reads faster than any compaction instants, "
-          + "usually with delta time compaction strategy that is long enough, for e.g, one week;\n"
+      .withDescription("Whether to skip compaction instants and avoid reading compacted base files for streaming read to improve read performance.\n"
+          + "There are two cases that this option can be used to avoid reading duplicates:\n"
+          + "1) you are definitely sure that the consumer reads [faster than/completes before] any compaction instants "
+          + "when " + HoodieCompactionConfig.PRESERVE_COMMIT_METADATA.key() + " is set to false.\n"
           + "2) changelog mode is enabled, this option is a solution to keep data integrity");
 
   // this option is experimental
@@ -308,8 +310,11 @@ public class FlinkOptions extends HoodieConfig {
           .key("read.streaming.skip_clustering")
           .booleanType()
           .defaultValue(false)
-          .withDescription("Whether to skip clustering instants for streaming read,\n"
-              + "to avoid reading duplicates");
+          .withDescription("Whether to skip clustering instants to avoid reading base files of clustering operations for streaming read "
+              + "to improve read performance.\n"
+              + "This option toggled to true to avoid duplicates when: \n"
+              + "1) you are definitely sure that the consumer reads [faster than/completes before] any clustering instants "
+              + "when " + HoodieClusteringConfig.PRESERVE_COMMIT_METADATA.key() + " is set to false.\n");
 
   public static final String START_COMMIT_EARLIEST = "earliest";
   public static final ConfigOption<String> READ_START_COMMIT = ConfigOptions