You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by GitBox <gi...@apache.org> on 2021/05/04 03:12:13 UTC

[GitHub] [hudi] umehrot2 commented on a change in pull request #2833: [HUDI-89] Add configOption & refactor Hudi configuration framework

umehrot2 commented on a change in pull request #2833:
URL: https://github.com/apache/hudi/pull/2833#discussion_r625483857



##########
File path: hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieClusteringConfig.java
##########
@@ -30,59 +31,70 @@
  */
 public class HoodieClusteringConfig extends DefaultHoodieConfig {
 
-  // Config to provide a strategy class to create ClusteringPlan. Class has to be subclass of ClusteringPlanStrategy
-  public static final String CLUSTERING_PLAN_STRATEGY_CLASS = "hoodie.clustering.plan.strategy.class";
-  public static final String DEFAULT_CLUSTERING_PLAN_STRATEGY_CLASS =
-      "org.apache.hudi.client.clustering.plan.strategy.SparkRecentDaysClusteringPlanStrategy";
+  public static final ConfigOption<String> CLUSTERING_PLAN_STRATEGY_CLASS = ConfigOption
+      .key("hoodie.clustering.plan.strategy.class")
+      .defaultValue("org.apache.hudi.client.clustering.plan.strategy.SparkRecentDaysClusteringPlanStrategy")
+      .withDescription("Config to provide a strategy class to create ClusteringPlan. Class has to be subclass of ClusteringPlanStrategy");
 
-  // Config to provide a strategy class to execute a ClusteringPlan. Class has to be subclass of RunClusteringStrategy
-  public static final String CLUSTERING_EXECUTION_STRATEGY_CLASS = "hoodie.clustering.execution.strategy.class";
-  public static final String DEFAULT_CLUSTERING_EXECUTION_STRATEGY_CLASS =
-      "org.apache.hudi.client.clustering.run.strategy.SparkSortAndSizeExecutionStrategy";
+  public static final ConfigOption<String> CLUSTERING_EXECUTION_STRATEGY_CLASS = ConfigOption
+      .key("hoodie.clustering.execution.strategy.class")
+      .defaultValue("org.apache.hudi.client.clustering.run.strategy.SparkSortAndSizeExecutionStrategy")
+      .withDescription("Config to provide a strategy class to execute a ClusteringPlan. Class has to be subclass of RunClusteringStrategy");
 
-  // Turn on inline clustering - clustering will be run after write operation is complete.
-  public static final String INLINE_CLUSTERING_PROP = "hoodie.clustering.inline";
-  private static final String DEFAULT_INLINE_CLUSTERING = "false";
+  public static final ConfigOption<String> INLINE_CLUSTERING_PROP = ConfigOption
+      .key("hoodie.clustering.inline")
+      .defaultValue("false")
+      .withDescription("Turn on inline clustering - clustering will be run after write operation is complete");
+
+  public static final ConfigOption<String> INLINE_CLUSTERING_MAX_COMMIT_PROP = ConfigOption
+      .key("hoodie.clustering.inline.max.commits")
+      .defaultValue("4")
+      .withDescription("Config to control frequency of clustering");
 
-  // Config to control frequency of clustering
-  public static final String INLINE_CLUSTERING_MAX_COMMIT_PROP = "hoodie.clustering.inline.max.commits";
-  private static final String DEFAULT_INLINE_CLUSTERING_NUM_COMMITS = "4";
-  
   // Any strategy specific params can be saved with this prefix
   public static final String CLUSTERING_STRATEGY_PARAM_PREFIX = "hoodie.clustering.plan.strategy.";
 
-  // Number of partitions to list to create ClusteringPlan.
-  public static final String CLUSTERING_TARGET_PARTITIONS = CLUSTERING_STRATEGY_PARAM_PREFIX + "daybased.lookback.partitions";
-  public static final String DEFAULT_CLUSTERING_TARGET_PARTITIONS = String.valueOf(2);
-
-  // Files smaller than the size specified here are candidates for clustering.
-  public static final String CLUSTERING_PLAN_SMALL_FILE_LIMIT = CLUSTERING_STRATEGY_PARAM_PREFIX + "small.file.limit";
-  public static final String DEFAULT_CLUSTERING_PLAN_SMALL_FILE_LIMIT = String.valueOf(600 * 1024 * 1024L); // 600MB
-
-  // Each clustering operation can create multiple groups. Total amount of data processed by clustering operation
-  // is defined by below two properties (CLUSTERING_MAX_BYTES_PER_GROUP * CLUSTERING_MAX_NUM_GROUPS).
-  // Max amount of data to be included in one group
-  public static final String CLUSTERING_MAX_BYTES_PER_GROUP = CLUSTERING_STRATEGY_PARAM_PREFIX + "max.bytes.per.group";
-  public static final String DEFAULT_CLUSTERING_MAX_GROUP_SIZE = String.valueOf(2 * 1024 * 1024 * 1024L);
-
-  // Maximum number of groups to create as part of ClusteringPlan. Increasing groups will increase parallelism.
-  public static final String CLUSTERING_MAX_NUM_GROUPS = CLUSTERING_STRATEGY_PARAM_PREFIX + "max.num.groups";
-  public static final String DEFAULT_CLUSTERING_MAX_NUM_GROUPS = "30";
-
-  // Each group can produce 'N' (CLUSTERING_MAX_GROUP_SIZE/CLUSTERING_TARGET_FILE_SIZE) output file groups.
-  public static final String CLUSTERING_TARGET_FILE_MAX_BYTES = CLUSTERING_STRATEGY_PARAM_PREFIX + "target.file.max.bytes";
-  public static final String DEFAULT_CLUSTERING_TARGET_FILE_MAX_BYTES = String.valueOf(1 * 1024 * 1024 * 1024L); // 1GB
-  
-  // Constants related to clustering that may be used by more than 1 strategy.
-  public static final String CLUSTERING_SORT_COLUMNS_PROPERTY = HoodieClusteringConfig.CLUSTERING_STRATEGY_PARAM_PREFIX + "sort.columns";
-
-  // When file groups is in clustering, need to handle the update to these file groups. Default strategy just reject the update
-  public static final String CLUSTERING_UPDATES_STRATEGY_PROP = "hoodie.clustering.updates.strategy";
-  public static final String DEFAULT_CLUSTERING_UPDATES_STRATEGY = "org.apache.hudi.client.clustering.update.strategy.SparkRejectUpdateStrategy";
-
-  // Async clustering
-  public static final String ASYNC_CLUSTERING_ENABLE_OPT_KEY = "hoodie.clustering.async.enabled";
-  public static final String DEFAULT_ASYNC_CLUSTERING_ENABLE_OPT_VAL = "false";
+  public static final ConfigOption<String> CLUSTERING_TARGET_PARTITIONS = ConfigOption
+      .key(CLUSTERING_STRATEGY_PARAM_PREFIX + "daybased.lookback.partitions")
+      .defaultValue("2")
+      .withDescription("Number of partitions to list to create ClusteringPlan");
+
+  public static final ConfigOption<String> CLUSTERING_PLAN_SMALL_FILE_LIMIT = ConfigOption
+      .key(CLUSTERING_STRATEGY_PARAM_PREFIX + "small.file.limit")
+      .defaultValue(String.valueOf(600 * 1024 * 1024L))
+      .withDescription("Files smaller than the size specified here are candidates for clustering");
+
+  public static final ConfigOption<String> CLUSTERING_MAX_BYTES_PER_GROUP = ConfigOption
+      .key(CLUSTERING_STRATEGY_PARAM_PREFIX + "max.bytes.per.group")
+      .defaultValue(String.valueOf(2 * 1024 * 1024 * 1024L))
+      .withDescription("Each clustering operation can create multiple groups. Total amount of data processed by clustering operation"
+          + " is defined by below two properties (CLUSTERING_MAX_BYTES_PER_GROUP * CLUSTERING_MAX_NUM_GROUPS)."
+          + " Max amount of data to be included in one group");
+
+  public static final ConfigOption<String> CLUSTERING_MAX_NUM_GROUPS = ConfigOption
+      .key(CLUSTERING_STRATEGY_PARAM_PREFIX + "max.num.groups")
+      .defaultValue("30")
+      .withDescription("Maximum number of groups to create as part of ClusteringPlan. Increasing groups will increase parallelism");
+
+  public static final ConfigOption<String> CLUSTERING_TARGET_FILE_MAX_BYTES = ConfigOption
+      .key(CLUSTERING_STRATEGY_PARAM_PREFIX + "target.file.max.bytes")
+      .defaultValue(String.valueOf(1 * 1024 * 1024 * 1024L))
+      .withDescription("Each group can produce 'N' (CLUSTERING_MAX_GROUP_SIZE/CLUSTERING_TARGET_FILE_SIZE) output file groups");
+
+  public static final ConfigOption<String> CLUSTERING_SORT_COLUMNS_PROPERTY = ConfigOption
+      .key(CLUSTERING_STRATEGY_PARAM_PREFIX + "sort.columns")
+      .noDefaultValue()
+      .withDescription("Constants related to clustering that may be used by more than 1 strategy");

Review comment:
       Description: `Columns to sort the data by when clustering.`

##########
File path: hudi-common/src/main/java/org/apache/hudi/common/config/LockConfiguration.java
##########
@@ -28,36 +28,97 @@
 public class LockConfiguration implements Serializable {
 
   public static final String LOCK_PREFIX = "hoodie.write.lock.";
-  public static final String LOCK_ACQUIRE_RETRY_WAIT_TIME_IN_MILLIS_PROP = LOCK_PREFIX + "wait_time_ms_between_retry";
-  public static final String DEFAULT_LOCK_ACQUIRE_RETRY_WAIT_TIME_IN_MILLIS = String.valueOf(5000L);
-  public static final String LOCK_ACQUIRE_RETRY_MAX_WAIT_TIME_IN_MILLIS_PROP = LOCK_PREFIX + "max_wait_time_ms_between_retry";
-  public static final String DEFAULT_LOCK_ACQUIRE_MAX_RETRY_WAIT_TIME_IN_MILLIS = String.valueOf(5000L);
-  public static final String LOCK_ACQUIRE_CLIENT_RETRY_WAIT_TIME_IN_MILLIS_PROP = LOCK_PREFIX + "client.wait_time_ms_between_retry";
-  public static final String DEFAULT_LOCK_ACQUIRE_CLIENT_RETRY_WAIT_TIME_IN_MILLIS = String.valueOf(10000L);
-  public static final String LOCK_ACQUIRE_NUM_RETRIES_PROP = LOCK_PREFIX + "num_retries";
-  public static final String DEFAULT_LOCK_ACQUIRE_NUM_RETRIES = String.valueOf(3);
-  public static final String LOCK_ACQUIRE_CLIENT_NUM_RETRIES_PROP = LOCK_PREFIX + "client.num_retries";
-  public static final String DEFAULT_LOCK_ACQUIRE_CLIENT_NUM_RETRIES = String.valueOf(0);
-  public static final String LOCK_ACQUIRE_WAIT_TIMEOUT_MS_PROP = LOCK_PREFIX + "wait_time_ms";
-  public static final int DEFAULT_ACQUIRE_LOCK_WAIT_TIMEOUT_MS = 60 * 1000;
+
+  public static final ConfigOption<String> LOCK_ACQUIRE_RETRY_WAIT_TIME_IN_MILLIS_PROP = ConfigOption
+      .key(LOCK_PREFIX + "wait_time_ms_between_retry")
+      .defaultValue(String.valueOf(5000L))
+      .withDescription("");
+
+  public static final ConfigOption<String> LOCK_ACQUIRE_RETRY_MAX_WAIT_TIME_IN_MILLIS_PROP = ConfigOption
+      .key(LOCK_PREFIX + "max_wait_time_ms_between_retry")
+      .defaultValue(String.valueOf(5000L))
+      .withDescription("Initial amount of time to wait between retries by lock provider client");

Review comment:
       Is this statement true ? Looking at the config name it seems like the max time it can wait between retries.

##########
File path: hudi-common/src/main/java/org/apache/hudi/common/config/DefaultHoodieConfig.java
##########
@@ -44,6 +56,53 @@ public static void setDefaultOnCondition(Properties props, boolean condition, De
     }
   }
 
+  public static void setDefaultValue(Properties props, ConfigOption configOption) {
+    if (!contains(props, configOption)) {
+      Option<String> inferValue = Option.empty();
+      if (configOption.getInferFunc() != null) {
+        inferValue = (Option<String>) configOption.getInferFunc().apply(props);
+      }
+      props.setProperty(configOption.key(), inferValue.isPresent() ? inferValue.get() : configOption.defaultValue().toString());
+    }
+  }
+
+  public static void setDefaultValue(Map<String, String> props, ConfigOption configOption) {
+    if (!contains(props, configOption)) {
+      Option<String> inferValue = Option.empty();
+      if (configOption.getInferFunc() != null) {
+        inferValue = (Option<String>) configOption.getInferFunc().apply(props);
+      }
+      props.put(configOption.key(), inferValue.isPresent() ? inferValue.get() : configOption.defaultValue().toString());
+    }
+  }
+
+  public static boolean contains(Map props, ConfigOption configOption) {
+    if (props.containsKey(configOption.key())) {
+      return true;
+    }
+    return Arrays.stream(configOption.getDeprecatedNames()).anyMatch(props::containsKey);
+  }
+
+  public static Option<Object> getRawValue(Properties props, ConfigOption configOption) {
+    if (props.containsKey(configOption.key())) {
+      return Option.of(props.get(configOption.key()));
+    }
+    for (String deprecateName : configOption.getDeprecatedNames()) {
+      if (props.containsKey(deprecateName)) {
+        return Option.of(props.get(deprecateName));
+      }
+    }

Review comment:
       - Shouldn't you be using Option.ofNullable() to avoid null pointer exceptions here ?
   - nit : deprecateName => deprecatedName

##########
File path: hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java
##########
@@ -33,53 +33,82 @@
   public static final String METADATA_PREFIX = "hoodie.metadata";
 
   // Enable the internal Metadata Table which saves file listings
-  public static final String METADATA_ENABLE_PROP = METADATA_PREFIX + ".enable";
-  public static final boolean DEFAULT_METADATA_ENABLE = false;
+  public static final ConfigOption<Boolean> METADATA_ENABLE_PROP = ConfigOption
+      .key(METADATA_PREFIX + ".enable")
+      .defaultValue(false)
+      .withDescription("Enable the internal Metadata Table which stores table level metadata such as file listings");

Review comment:
       Lets just say in the description that it `stores file listings` to avoid confusion. When later its extended for other kinds of metadata we can change.

##########
File path: hudi-common/src/main/java/org/apache/hudi/common/config/DefaultHoodieConfig.java
##########
@@ -44,6 +56,53 @@ public static void setDefaultOnCondition(Properties props, boolean condition, De
     }
   }
 
+  public static void setDefaultValue(Properties props, ConfigOption configOption) {
+    if (!contains(props, configOption)) {
+      Option<String> inferValue = Option.empty();
+      if (configOption.getInferFunc() != null) {
+        inferValue = (Option<String>) configOption.getInferFunc().apply(props);
+      }
+      props.setProperty(configOption.key(), inferValue.isPresent() ? inferValue.get() : configOption.defaultValue().toString());
+    }
+  }
+
+  public static void setDefaultValue(Map<String, String> props, ConfigOption configOption) {
+    if (!contains(props, configOption)) {
+      Option<String> inferValue = Option.empty();
+      if (configOption.getInferFunc() != null) {
+        inferValue = (Option<String>) configOption.getInferFunc().apply(props);
+      }
+      props.put(configOption.key(), inferValue.isPresent() ? inferValue.get() : configOption.defaultValue().toString());
+    }
+  }
+
+  public static boolean contains(Map props, ConfigOption configOption) {
+    if (props.containsKey(configOption.key())) {
+      return true;
+    }
+    return Arrays.stream(configOption.getDeprecatedNames()).anyMatch(props::containsKey);
+  }
+
+  public static Option<Object> getRawValue(Properties props, ConfigOption configOption) {
+    if (props.containsKey(configOption.key())) {
+      return Option.of(props.get(configOption.key()));
+    }
+    for (String deprecateName : configOption.getDeprecatedNames()) {
+      if (props.containsKey(deprecateName)) {
+        return Option.of(props.get(deprecateName));
+      }
+    }
+    return Option.empty();
+  }
+
+  public static String getString(Properties props, ConfigOption configOption) {

Review comment:
       When you have generics in a Class it is not advisable to use the class in its raw form. Perhaps you need to use something like:
   ```
   public static <T> String getString(Properties props, ConfigOption<T> configOption)
   ```
   Inspect other places in the code as well for such usage.

##########
File path: hudi-common/src/main/java/org/apache/hudi/common/config/LockConfiguration.java
##########
@@ -28,36 +28,97 @@
 public class LockConfiguration implements Serializable {
 
   public static final String LOCK_PREFIX = "hoodie.write.lock.";
-  public static final String LOCK_ACQUIRE_RETRY_WAIT_TIME_IN_MILLIS_PROP = LOCK_PREFIX + "wait_time_ms_between_retry";
-  public static final String DEFAULT_LOCK_ACQUIRE_RETRY_WAIT_TIME_IN_MILLIS = String.valueOf(5000L);
-  public static final String LOCK_ACQUIRE_RETRY_MAX_WAIT_TIME_IN_MILLIS_PROP = LOCK_PREFIX + "max_wait_time_ms_between_retry";
-  public static final String DEFAULT_LOCK_ACQUIRE_MAX_RETRY_WAIT_TIME_IN_MILLIS = String.valueOf(5000L);
-  public static final String LOCK_ACQUIRE_CLIENT_RETRY_WAIT_TIME_IN_MILLIS_PROP = LOCK_PREFIX + "client.wait_time_ms_between_retry";
-  public static final String DEFAULT_LOCK_ACQUIRE_CLIENT_RETRY_WAIT_TIME_IN_MILLIS = String.valueOf(10000L);
-  public static final String LOCK_ACQUIRE_NUM_RETRIES_PROP = LOCK_PREFIX + "num_retries";
-  public static final String DEFAULT_LOCK_ACQUIRE_NUM_RETRIES = String.valueOf(3);
-  public static final String LOCK_ACQUIRE_CLIENT_NUM_RETRIES_PROP = LOCK_PREFIX + "client.num_retries";
-  public static final String DEFAULT_LOCK_ACQUIRE_CLIENT_NUM_RETRIES = String.valueOf(0);
-  public static final String LOCK_ACQUIRE_WAIT_TIMEOUT_MS_PROP = LOCK_PREFIX + "wait_time_ms";
-  public static final int DEFAULT_ACQUIRE_LOCK_WAIT_TIMEOUT_MS = 60 * 1000;
+
+  public static final ConfigOption<String> LOCK_ACQUIRE_RETRY_WAIT_TIME_IN_MILLIS_PROP = ConfigOption
+      .key(LOCK_PREFIX + "wait_time_ms_between_retry")
+      .defaultValue(String.valueOf(5000L))
+      .withDescription("");

Review comment:
       We can probably add some description here. Seems like something understandable. Refer to: https://hudi.apache.org/docs/concurrency_control.html

##########
File path: hudi-common/src/main/java/org/apache/hudi/common/config/DefaultHoodieConfig.java
##########
@@ -44,6 +56,53 @@ public static void setDefaultOnCondition(Properties props, boolean condition, De
     }
   }
 
+  public static void setDefaultValue(Properties props, ConfigOption configOption) {
+    if (!contains(props, configOption)) {
+      Option<String> inferValue = Option.empty();
+      if (configOption.getInferFunc() != null) {
+        inferValue = (Option<String>) configOption.getInferFunc().apply(props);
+      }
+      props.setProperty(configOption.key(), inferValue.isPresent() ? inferValue.get() : configOption.defaultValue().toString());
+    }
+  }
+
+  public static void setDefaultValue(Map<String, String> props, ConfigOption configOption) {
+    if (!contains(props, configOption)) {
+      Option<String> inferValue = Option.empty();
+      if (configOption.getInferFunc() != null) {
+        inferValue = (Option<String>) configOption.getInferFunc().apply(props);
+      }
+      props.put(configOption.key(), inferValue.isPresent() ? inferValue.get() : configOption.defaultValue().toString());
+    }
+  }
+
+  public static boolean contains(Map props, ConfigOption configOption) {
+    if (props.containsKey(configOption.key())) {
+      return true;
+    }
+    return Arrays.stream(configOption.getDeprecatedNames()).anyMatch(props::containsKey);
+  }
+
+  public static Option<Object> getRawValue(Properties props, ConfigOption configOption) {
+    if (props.containsKey(configOption.key())) {
+      return Option.of(props.get(configOption.key()));
+    }
+    for (String deprecateName : configOption.getDeprecatedNames()) {
+      if (props.containsKey(deprecateName)) {
+        return Option.of(props.get(deprecateName));
+      }
+    }
+    return Option.empty();
+  }
+
+  public static String getString(Properties props, ConfigOption configOption) {

Review comment:
       When you have generics in a Class it is not advisable to use the class in its raw form. Perhaps you need to use something like:
   ```
   public static <T> String getString(Properties props, ConfigOption<T> configOption)
   ```
   Inspect other places in the code as well for such usage.

##########
File path: hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java
##########
@@ -33,53 +33,82 @@
   public static final String METADATA_PREFIX = "hoodie.metadata";
 
   // Enable the internal Metadata Table which saves file listings
-  public static final String METADATA_ENABLE_PROP = METADATA_PREFIX + ".enable";
-  public static final boolean DEFAULT_METADATA_ENABLE = false;
+  public static final ConfigOption<Boolean> METADATA_ENABLE_PROP = ConfigOption
+      .key(METADATA_PREFIX + ".enable")
+      .defaultValue(false)
+      .withDescription("Enable the internal Metadata Table which stores table level metadata such as file listings");
 
   // Validate contents of Metadata Table on each access against the actual filesystem
-  public static final String METADATA_VALIDATE_PROP = METADATA_PREFIX + ".validate";
-  public static final boolean DEFAULT_METADATA_VALIDATE = false;
+  public static final ConfigOption<Boolean> METADATA_VALIDATE_PROP = ConfigOption
+      .key(METADATA_PREFIX + ".validate")
+      .defaultValue(false)
+      .withDescription("Validate contents of Metadata Table on each access against the actual listings from DFS");
+
   public static final boolean DEFAULT_METADATA_ENABLE_FOR_READERS = false;
 
   // Enable metrics for internal Metadata Table
-  public static final String METADATA_METRICS_ENABLE_PROP = METADATA_PREFIX + ".metrics.enable";
-  public static final boolean DEFAULT_METADATA_METRICS_ENABLE = false;
+  public static final ConfigOption<Boolean> METADATA_METRICS_ENABLE_PROP = ConfigOption
+      .key(METADATA_PREFIX + ".metrics.enable")
+      .defaultValue(false)
+      .withDescription("");
 
   // Parallelism for inserts
-  public static final String METADATA_INSERT_PARALLELISM_PROP = METADATA_PREFIX + ".insert.parallelism";
-  public static final int DEFAULT_METADATA_INSERT_PARALLELISM = 1;
+  public static final ConfigOption<Integer> METADATA_INSERT_PARALLELISM_PROP = ConfigOption
+      .key(METADATA_PREFIX + ".insert.parallelism")
+      .defaultValue(1)
+      .withDescription("Parallelism to use when writing to the metadata table");
 
   // Async clean
-  public static final String METADATA_ASYNC_CLEAN_PROP = METADATA_PREFIX + ".clean.async";
-  public static final boolean DEFAULT_METADATA_ASYNC_CLEAN = false;
+  public static final ConfigOption<Boolean> METADATA_ASYNC_CLEAN_PROP = ConfigOption
+      .key(METADATA_PREFIX + ".clean.async")
+      .defaultValue(false)
+      .withDescription("");
 
   // Maximum delta commits before compaction occurs
-  public static final String METADATA_COMPACT_NUM_DELTA_COMMITS_PROP = METADATA_PREFIX + ".compact.max.delta.commits";
-  public static final int DEFAULT_METADATA_COMPACT_NUM_DELTA_COMMITS = 24;
+  public static final ConfigOption<Integer> METADATA_COMPACT_NUM_DELTA_COMMITS_PROP = ConfigOption
+      .key(METADATA_PREFIX + ".compact.max.delta.commits")
+      .defaultValue(24)
+      .withDescription("Controls how often the metadata table is compacted.");
 
   // Archival settings
-  public static final String MIN_COMMITS_TO_KEEP_PROP = METADATA_PREFIX + ".keep.min.commits";
-  public static final int DEFAULT_MIN_COMMITS_TO_KEEP = 20;
-  public static final String MAX_COMMITS_TO_KEEP_PROP = METADATA_PREFIX + ".keep.max.commits";
-  public static final int DEFAULT_MAX_COMMITS_TO_KEEP = 30;
+  public static final ConfigOption<Integer> MIN_COMMITS_TO_KEEP_PROP = ConfigOption
+      .key(METADATA_PREFIX + ".keep.min.commits")
+      .defaultValue(20)
+      .withDescription("Controls the archival of the metadata table’s timeline");
+
+  public static final ConfigOption<Integer> MAX_COMMITS_TO_KEEP_PROP = ConfigOption
+      .key(METADATA_PREFIX + ".keep.max.commits")
+      .defaultValue(30)
+      .withDescription("Controls the archival of the metadata table’s timeline");
 
   // Cleaner commits retained
-  public static final String CLEANER_COMMITS_RETAINED_PROP = METADATA_PREFIX + ".cleaner.commits.retained";
-  public static final int DEFAULT_CLEANER_COMMITS_RETAINED = 3;
+  public static final ConfigOption<Integer> CLEANER_COMMITS_RETAINED_PROP = ConfigOption
+      .key(METADATA_PREFIX + ".cleaner.commits.retained")
+      .defaultValue(3)
+      .withDescription("");
 
   // Controls whether or not, upon failure to fetch from metadata table, should fallback to listing.
-  public static final String ENABLE_FALLBACK_PROP = METADATA_PREFIX + ".fallback.enable";
-  public static final String DEFAULT_ENABLE_FALLBACK = "true";
+  public static final ConfigOption<String> ENABLE_FALLBACK_PROP = ConfigOption
+      .key(METADATA_PREFIX + ".fallback.enable")
+      .defaultValue("true")
+      .withDescription("Fallback to listing from DFS, if there are any errors in fetching from metadata table");
 
   // Regex to filter out matching directories during bootstrap
-  public static final String DIRECTORY_FILTER_REGEX = METADATA_PREFIX + ".dir.filter.regex";
-  public static final String DEFAULT_DIRECTORY_FILTER_REGEX = "";
-
-  public static final String HOODIE_ASSUME_DATE_PARTITIONING_PROP = "hoodie.assume.date.partitioning";
-  public static final String DEFAULT_ASSUME_DATE_PARTITIONING = "false";
-
-  public static final String FILE_LISTING_PARALLELISM_PROP = "hoodie.file.listing.parallelism";
-  public static final int DEFAULT_FILE_LISTING_PARALLELISM = 1500;
+  public static final ConfigOption<String> DIRECTORY_FILTER_REGEX = ConfigOption
+      .key(METADATA_PREFIX + ".dir.filter.regex")
+      .defaultValue("")
+      .withDescription("");
+
+  public static final ConfigOption<String> HOODIE_ASSUME_DATE_PARTITIONING_PROP = ConfigOption
+      .key("hoodie.assume.date.partitioning")
+      .defaultValue("false")
+      .withDescription("Should HoodieWriteClient assume the data is partitioned by dates, i.e three levels from base path. "
+          + "This is a stop-gap to support tables created by versions < 0.3.1. Will be removed eventually");

Review comment:
       Can you explain what this means ?

##########
File path: hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java
##########
@@ -33,53 +33,82 @@
   public static final String METADATA_PREFIX = "hoodie.metadata";
 
   // Enable the internal Metadata Table which saves file listings
-  public static final String METADATA_ENABLE_PROP = METADATA_PREFIX + ".enable";
-  public static final boolean DEFAULT_METADATA_ENABLE = false;
+  public static final ConfigOption<Boolean> METADATA_ENABLE_PROP = ConfigOption
+      .key(METADATA_PREFIX + ".enable")
+      .defaultValue(false)
+      .withDescription("Enable the internal Metadata Table which stores table level metadata such as file listings");
 
   // Validate contents of Metadata Table on each access against the actual filesystem
-  public static final String METADATA_VALIDATE_PROP = METADATA_PREFIX + ".validate";
-  public static final boolean DEFAULT_METADATA_VALIDATE = false;
+  public static final ConfigOption<Boolean> METADATA_VALIDATE_PROP = ConfigOption
+      .key(METADATA_PREFIX + ".validate")
+      .defaultValue(false)
+      .withDescription("Validate contents of Metadata Table on each access against the actual listings from DFS");
+
   public static final boolean DEFAULT_METADATA_ENABLE_FOR_READERS = false;
 
   // Enable metrics for internal Metadata Table
-  public static final String METADATA_METRICS_ENABLE_PROP = METADATA_PREFIX + ".metrics.enable";
-  public static final boolean DEFAULT_METADATA_METRICS_ENABLE = false;
+  public static final ConfigOption<Boolean> METADATA_METRICS_ENABLE_PROP = ConfigOption
+      .key(METADATA_PREFIX + ".metrics.enable")
+      .defaultValue(false)
+      .withDescription("");
 
   // Parallelism for inserts
-  public static final String METADATA_INSERT_PARALLELISM_PROP = METADATA_PREFIX + ".insert.parallelism";
-  public static final int DEFAULT_METADATA_INSERT_PARALLELISM = 1;
+  public static final ConfigOption<Integer> METADATA_INSERT_PARALLELISM_PROP = ConfigOption
+      .key(METADATA_PREFIX + ".insert.parallelism")
+      .defaultValue(1)
+      .withDescription("Parallelism to use when writing to the metadata table");
 
   // Async clean
-  public static final String METADATA_ASYNC_CLEAN_PROP = METADATA_PREFIX + ".clean.async";
-  public static final boolean DEFAULT_METADATA_ASYNC_CLEAN = false;
+  public static final ConfigOption<Boolean> METADATA_ASYNC_CLEAN_PROP = ConfigOption
+      .key(METADATA_PREFIX + ".clean.async")
+      .defaultValue(false)
+      .withDescription("");

Review comment:
       `Enable asynchronous cleaning for metadata table`

##########
File path: hudi-common/src/main/java/org/apache/hudi/common/fs/ConsistencyGuardConfig.java
##########
@@ -30,29 +31,42 @@
  */
 public class ConsistencyGuardConfig extends DefaultHoodieConfig {
 
-  private static final String CONSISTENCY_CHECK_ENABLED_PROP = "hoodie.consistency.check.enabled";
-  private static final String DEFAULT_CONSISTENCY_CHECK_ENABLED = "false";
-
   // time between successive attempts to ensure written data's metadata is consistent on storage
-  private static final String INITIAL_CONSISTENCY_CHECK_INTERVAL_MS_PROP =
-      "hoodie.consistency.check.initial_interval_ms";
-  private static long DEFAULT_INITIAL_CONSISTENCY_CHECK_INTERVAL_MS = 400L;
+  public static final ConfigOption<String> CONSISTENCY_CHECK_ENABLED_PROP = ConfigOption
+      .key("hoodie.consistency.check.enabled")
+      .defaultValue("false")
+      .withDescription("Should HoodieWriteClient perform additional checks to ensure written files’ are listable "
+          + "on the underlying filesystem/storage. Set this to true, to workaround S3’s eventual consistency model "
+          + "and ensure all data written as a part of a commit is faithfully available for queries.");

Review comment:
       Just mention that this property is to handle S3 eventual consistency, and say that this property is no longer required to be used since S3 is now strongly consistent. Also mention that this will be removed in future releases.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org