You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by si...@apache.org on 2023/01/25 05:11:11 UTC

[hudi] branch master updated: [HUDI-5582] Do not let users override internal metadata configs (#7709)

This is an automated email from the ASF dual-hosted git repository.

sivabalan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new a79f8093755 [HUDI-5582] Do not let users override internal metadata configs (#7709)
a79f8093755 is described below

commit a79f80937550a1e7164b197d46ce012ed4c4831b
Author: Jon Vexler <jb...@gmail.com>
AuthorDate: Wed Jan 25 00:11:01 2023 -0500

    [HUDI-5582] Do not let users override internal metadata configs (#7709)
    
    Some of the metadata configs are not meant to be overridden by the user. Fixing them in this patch
    
    hoodie.metadata.clean.async
    hoodie.metadata.cleaner.commits.retained
    hoodie.metadata.enable.full.scan.log.files
    hoodie.metadata.populate.meta.fields
    
    Co-authored-by: Jonathan Vexler <=>
    Co-authored-by: sivabalan <n....@gmail.com>
---
 .../hudi/metadata/HoodieBackedTableMetadataWriter.java |  6 +++---
 .../client/functional/TestHoodieBackedMetadata.java    | 18 ++++++++----------
 .../org/apache/hudi/io/TestHoodieTimelineArchiver.java |  2 +-
 .../hudi/common/config/HoodieMetadataConfig.java       | 11 +++++++----
 .../hudi/metadata/HoodieBackedTableMetadata.java       |  2 +-
 5 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
index fd83d24ea56..bbfa4460af4 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
@@ -264,12 +264,12 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
         .forTable(tableName)
         // we will trigger cleaning manually, to control the instant times
         .withCleanConfig(HoodieCleanConfig.newBuilder()
-            .withAsyncClean(writeConfig.isMetadataAsyncClean())
+            .withAsyncClean(HoodieMetadataConfig.ASYNC_CLEAN_ENABLE.defaultValue())
             .withAutoClean(false)
             .withCleanerParallelism(parallelism)
             .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS)
             .withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.EAGER)
-            .retainCommits(writeConfig.getMetadataCleanerCommitsRetained())
+            .retainCommits(HoodieMetadataConfig.CLEANER_COMMITS_RETAINED.defaultValue())
             .build())
         // we will trigger archive manually, to ensure only regular writer invokes it
         .withArchivalConfig(HoodieArchivalConfig.newBuilder()
@@ -291,7 +291,7 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
         .withFinalizeWriteParallelism(parallelism)
         .withAllowMultiWriteOnSameInstant(true)
         .withKeyGenerator(HoodieTableMetadataKeyGenerator.class.getCanonicalName())
-        .withPopulateMetaFields(dataWriteConfig.getMetadataConfig().populateMetaFields());
+        .withPopulateMetaFields(HoodieMetadataConfig.POPULATE_META_FIELDS.defaultValue());
 
     // RecordKey properties are needed for the metadata table records
     final Properties properties = new Properties();
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java
index d46e02cf39f..8bfe13db9ad 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java
@@ -497,8 +497,8 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
             .enableFullScan(true)
             .enableMetrics(false)
             .withMaxNumDeltaCommitsBeforeCompaction(3)
-            .archiveCommitsWith(3, 4)
-            .retainCommits(1)
+            .archiveCommitsWith(4, 5)
+            .retainCommits(3)
             .build())
         .withCleanConfig(HoodieCleanConfig.newBuilder()
             .retainCommits(1)
@@ -732,9 +732,9 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
    *
    * @throws Exception
    */
-  @ParameterizedTest
-  @ValueSource(booleans = {true, false})
-  public void testVirtualKeysInBaseFiles(boolean populateMetaFields) throws Exception {
+  @Test
+  public void testVirtualKeysInBaseFiles() throws Exception {
+    boolean populateMetaFields = false;
     init(MERGE_ON_READ, false);
     writeConfig = getWriteConfigBuilder(true, true, false)
         .withMetadataConfig(HoodieMetadataConfig.newBuilder()
@@ -970,9 +970,7 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
    */
   public static List<Arguments> testMetadataRecordKeyExcludeFromPayloadArgs() {
     return asList(
-        Arguments.of(COPY_ON_WRITE, true),
         Arguments.of(COPY_ON_WRITE, false),
-        Arguments.of(MERGE_ON_READ, true),
         Arguments.of(MERGE_ON_READ, false)
     );
   }
@@ -1285,9 +1283,9 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
    * Test that manual rollbacks work correctly and enough timeline history is maintained on the metadata table
    * timeline.
    */
-  @ParameterizedTest
-  @ValueSource(booleans = {true, false})
-  public void testManualRollbacks(final boolean populateMateFields) throws Exception {
+  @Test
+  public void testManualRollbacks() throws Exception {
+    boolean populateMateFields = false;
     init(COPY_ON_WRITE, false);
     // Setting to archive more aggressively on the Metadata Table than the Dataset
     final int maxDeltaCommitsBeforeCompaction = 4;
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiver.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiver.java
index 05c854b1b83..6ac0a490732 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiver.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiver.java
@@ -1296,7 +1296,7 @@ public class TestHoodieTimelineArchiver extends HoodieClientTestHarness {
             .withRemoteServerPort(timelineServicePort).build())
         .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(true)
             .withMaxNumDeltaCommitsBeforeCompaction(8)
-            .retainCommits(1).archiveCommitsWith(2, 4).build())
+            .retainCommits(3).archiveCommitsWith(4, 5).build())
         .forTable("test-trip-table").build();
     initWriteConfigAndMetatableWriter(writeConfig, true);
 
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java
index 9979be38cfc..81f2c1daeff 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java
@@ -71,7 +71,7 @@ public final class HoodieMetadataConfig extends HoodieConfig {
       .key(METADATA_PREFIX + ".clean.async")
       .defaultValue(false)
       .sinceVersion("0.7.0")
-      .withDocumentation("Enable asynchronous cleaning for metadata table");
+      .withDocumentation("Enable asynchronous cleaning for metadata table. This is an internal config and setting this will not overwrite the value actually used.");
 
   // Async index
   public static final ConfigProperty<Boolean> ASYNC_INDEX_ENABLE = ConfigProperty
@@ -109,7 +109,8 @@ public final class HoodieMetadataConfig extends HoodieConfig {
       .key(METADATA_PREFIX + ".cleaner.commits.retained")
       .defaultValue(3)
       .sinceVersion("0.7.0")
-      .withDocumentation("Number of commits to retain, without cleaning, on metadata table.");
+      .withDocumentation("Number of commits to retain, without cleaning, on metadata table. "
+          + "This is an internal config and setting this will not overwrite the actual value used.");
 
   // Regex to filter out matching directories during bootstrap
   public static final ConfigProperty<String> DIR_FILTER_REGEX = ConfigProperty
@@ -135,7 +136,8 @@ public final class HoodieMetadataConfig extends HoodieConfig {
       .key(METADATA_PREFIX + ".enable.full.scan.log.files")
       .defaultValue(true)
       .sinceVersion("0.10.0")
-      .withDocumentation("Enable full scanning of log files while reading log records. If disabled, Hudi does look up of only interested entries.");
+      .withDocumentation("Enable full scanning of log files while reading log records. If disabled, Hudi does look up of only interested entries. "
+          + "This is an internal config and setting this will not overwrite the actual value used.");
 
   public static final ConfigProperty<Boolean> ENABLE_METADATA_INDEX_BLOOM_FILTER = ConfigProperty
       .key(METADATA_PREFIX + ".index.bloom.filter.enable")
@@ -224,7 +226,8 @@ public final class HoodieMetadataConfig extends HoodieConfig {
       .key(METADATA_PREFIX + ".populate.meta.fields")
       .defaultValue(false)
       .sinceVersion("0.10.0")
-      .withDocumentation("When enabled, populates all meta fields. When disabled, no meta fields are populated.");
+      .withDocumentation("When enabled, populates all meta fields. When disabled, no meta fields are populated. "
+          + "This is an internal config and setting this will not overwrite the actual value used.");
 
   public static final ConfigProperty<Boolean> IGNORE_SPURIOUS_DELETES = ConfigProperty
       .key("_" + METADATA_PREFIX + ".ignore.spurious.deletes")
diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java
index 26780265d03..23399578268 100644
--- a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java
+++ b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java
@@ -536,7 +536,7 @@ public class HoodieBackedTableMetadata extends BaseTableMetadata {
   private boolean isFullScanAllowedForPartition(String partitionName) {
     switch (partitionName) {
       case PARTITION_NAME_FILES:
-        return metadataConfig.allowFullScan();
+        return HoodieMetadataConfig.ENABLE_FULL_SCAN_LOG_FILES.defaultValue();
 
       case PARTITION_NAME_COLUMN_STATS:
       case PARTITION_NAME_BLOOM_FILTERS: