You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by si...@apache.org on 2023/01/25 05:11:11 UTC
[hudi] branch master updated: [HUDI-5582] Do not let users override internal metadata configs (#7709)
This is an automated email from the ASF dual-hosted git repository.
sivabalan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new a79f8093755 [HUDI-5582] Do not let users override internal metadata configs (#7709)
a79f8093755 is described below
commit a79f80937550a1e7164b197d46ce012ed4c4831b
Author: Jon Vexler <jb...@gmail.com>
AuthorDate: Wed Jan 25 00:11:01 2023 -0500
[HUDI-5582] Do not let users override internal metadata configs (#7709)
Some of the metadata configs are not meant to be overridden by the user. Fixing them in this patch
hoodie.metadata.clean.async
hoodie.metadata.cleaner.commits.retained
hoodie.metadata.enable.full.scan.log.files
hoodie.metadata.populate.meta.fields
Co-authored-by: Jonathan Vexler <=>
Co-authored-by: sivabalan <n....@gmail.com>
---
.../hudi/metadata/HoodieBackedTableMetadataWriter.java | 6 +++---
.../client/functional/TestHoodieBackedMetadata.java | 18 ++++++++----------
.../org/apache/hudi/io/TestHoodieTimelineArchiver.java | 2 +-
.../hudi/common/config/HoodieMetadataConfig.java | 11 +++++++----
.../hudi/metadata/HoodieBackedTableMetadata.java | 2 +-
5 files changed, 20 insertions(+), 19 deletions(-)
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
index fd83d24ea56..bbfa4460af4 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
@@ -264,12 +264,12 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
.forTable(tableName)
// we will trigger cleaning manually, to control the instant times
.withCleanConfig(HoodieCleanConfig.newBuilder()
- .withAsyncClean(writeConfig.isMetadataAsyncClean())
+ .withAsyncClean(HoodieMetadataConfig.ASYNC_CLEAN_ENABLE.defaultValue())
.withAutoClean(false)
.withCleanerParallelism(parallelism)
.withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS)
.withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.EAGER)
- .retainCommits(writeConfig.getMetadataCleanerCommitsRetained())
+ .retainCommits(HoodieMetadataConfig.CLEANER_COMMITS_RETAINED.defaultValue())
.build())
// we will trigger archive manually, to ensure only regular writer invokes it
.withArchivalConfig(HoodieArchivalConfig.newBuilder()
@@ -291,7 +291,7 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
.withFinalizeWriteParallelism(parallelism)
.withAllowMultiWriteOnSameInstant(true)
.withKeyGenerator(HoodieTableMetadataKeyGenerator.class.getCanonicalName())
- .withPopulateMetaFields(dataWriteConfig.getMetadataConfig().populateMetaFields());
+ .withPopulateMetaFields(HoodieMetadataConfig.POPULATE_META_FIELDS.defaultValue());
// RecordKey properties are needed for the metadata table records
final Properties properties = new Properties();
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java
index d46e02cf39f..8bfe13db9ad 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java
@@ -497,8 +497,8 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
.enableFullScan(true)
.enableMetrics(false)
.withMaxNumDeltaCommitsBeforeCompaction(3)
- .archiveCommitsWith(3, 4)
- .retainCommits(1)
+ .archiveCommitsWith(4, 5)
+ .retainCommits(3)
.build())
.withCleanConfig(HoodieCleanConfig.newBuilder()
.retainCommits(1)
@@ -732,9 +732,9 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
*
* @throws Exception
*/
- @ParameterizedTest
- @ValueSource(booleans = {true, false})
- public void testVirtualKeysInBaseFiles(boolean populateMetaFields) throws Exception {
+ @Test
+ public void testVirtualKeysInBaseFiles() throws Exception {
+ boolean populateMetaFields = false;
init(MERGE_ON_READ, false);
writeConfig = getWriteConfigBuilder(true, true, false)
.withMetadataConfig(HoodieMetadataConfig.newBuilder()
@@ -970,9 +970,7 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
*/
public static List<Arguments> testMetadataRecordKeyExcludeFromPayloadArgs() {
return asList(
- Arguments.of(COPY_ON_WRITE, true),
Arguments.of(COPY_ON_WRITE, false),
- Arguments.of(MERGE_ON_READ, true),
Arguments.of(MERGE_ON_READ, false)
);
}
@@ -1285,9 +1283,9 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
* Test that manual rollbacks work correctly and enough timeline history is maintained on the metadata table
* timeline.
*/
- @ParameterizedTest
- @ValueSource(booleans = {true, false})
- public void testManualRollbacks(final boolean populateMateFields) throws Exception {
+ @Test
+ public void testManualRollbacks() throws Exception {
+ boolean populateMateFields = false;
init(COPY_ON_WRITE, false);
// Setting to archive more aggressively on the Metadata Table than the Dataset
final int maxDeltaCommitsBeforeCompaction = 4;
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiver.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiver.java
index 05c854b1b83..6ac0a490732 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiver.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiver.java
@@ -1296,7 +1296,7 @@ public class TestHoodieTimelineArchiver extends HoodieClientTestHarness {
.withRemoteServerPort(timelineServicePort).build())
.withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(true)
.withMaxNumDeltaCommitsBeforeCompaction(8)
- .retainCommits(1).archiveCommitsWith(2, 4).build())
+ .retainCommits(3).archiveCommitsWith(4, 5).build())
.forTable("test-trip-table").build();
initWriteConfigAndMetatableWriter(writeConfig, true);
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java
index 9979be38cfc..81f2c1daeff 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java
@@ -71,7 +71,7 @@ public final class HoodieMetadataConfig extends HoodieConfig {
.key(METADATA_PREFIX + ".clean.async")
.defaultValue(false)
.sinceVersion("0.7.0")
- .withDocumentation("Enable asynchronous cleaning for metadata table");
+ .withDocumentation("Enable asynchronous cleaning for metadata table. This is an internal config and setting this will not overwrite the value actually used.");
// Async index
public static final ConfigProperty<Boolean> ASYNC_INDEX_ENABLE = ConfigProperty
@@ -109,7 +109,8 @@ public final class HoodieMetadataConfig extends HoodieConfig {
.key(METADATA_PREFIX + ".cleaner.commits.retained")
.defaultValue(3)
.sinceVersion("0.7.0")
- .withDocumentation("Number of commits to retain, without cleaning, on metadata table.");
+ .withDocumentation("Number of commits to retain, without cleaning, on metadata table. "
+ + "This is an internal config and setting this will not overwrite the actual value used.");
// Regex to filter out matching directories during bootstrap
public static final ConfigProperty<String> DIR_FILTER_REGEX = ConfigProperty
@@ -135,7 +136,8 @@ public final class HoodieMetadataConfig extends HoodieConfig {
.key(METADATA_PREFIX + ".enable.full.scan.log.files")
.defaultValue(true)
.sinceVersion("0.10.0")
- .withDocumentation("Enable full scanning of log files while reading log records. If disabled, Hudi does look up of only interested entries.");
+ .withDocumentation("Enable full scanning of log files while reading log records. If disabled, Hudi does look up of only interested entries. "
+ + "This is an internal config and setting this will not overwrite the actual value used.");
public static final ConfigProperty<Boolean> ENABLE_METADATA_INDEX_BLOOM_FILTER = ConfigProperty
.key(METADATA_PREFIX + ".index.bloom.filter.enable")
@@ -224,7 +226,8 @@ public final class HoodieMetadataConfig extends HoodieConfig {
.key(METADATA_PREFIX + ".populate.meta.fields")
.defaultValue(false)
.sinceVersion("0.10.0")
- .withDocumentation("When enabled, populates all meta fields. When disabled, no meta fields are populated.");
+ .withDocumentation("When enabled, populates all meta fields. When disabled, no meta fields are populated. "
+ + "This is an internal config and setting this will not overwrite the actual value used.");
public static final ConfigProperty<Boolean> IGNORE_SPURIOUS_DELETES = ConfigProperty
.key("_" + METADATA_PREFIX + ".ignore.spurious.deletes")
diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java
index 26780265d03..23399578268 100644
--- a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java
+++ b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java
@@ -536,7 +536,7 @@ public class HoodieBackedTableMetadata extends BaseTableMetadata {
private boolean isFullScanAllowedForPartition(String partitionName) {
switch (partitionName) {
case PARTITION_NAME_FILES:
- return metadataConfig.allowFullScan();
+ return HoodieMetadataConfig.ENABLE_FULL_SCAN_LOG_FILES.defaultValue();
case PARTITION_NAME_COLUMN_STATS:
case PARTITION_NAME_BLOOM_FILTERS: