You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by yi...@apache.org on 2023/03/31 20:30:44 UTC
[hudi] branch master updated: [HUDI-5900] Clean up unused metadata configs (#8125)
This is an automated email from the ASF dual-hosted git repository.
yihua pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new c53d9fbe019 [HUDI-5900] Clean up unused metadata configs (#8125)
c53d9fbe019 is described below
commit c53d9fbe019a43f31b3eb7556ff109d71287cf6c
Author: Lokesh Jain <lj...@apache.org>
AuthorDate: Sat Apr 1 02:00:34 2023 +0530
[HUDI-5900] Clean up unused metadata configs (#8125)
This commit removes the metadata table configs that cannot be overridden by users anymore (#7709):
hoodie.metadata.clean.async
hoodie.metadata.cleaner.commits.retained
hoodie.metadata.enable.full.scan.log.files
hoodie.metadata.populate.meta.fields
Co-authored-by: Y Ethan Guo <et...@gmail.com>
---
.../org/apache/hudi/config/HoodieWriteConfig.java | 8 --
.../metadata/HoodieBackedTableMetadataWriter.java | 15 ++--
.../functional/TestHoodieBackedMetadata.java | 53 ++-----------
.../functional/TestHoodieBackedTableMetadata.java | 1 -
.../client/functional/TestHoodieMetadataBase.java | 29 ++++----
.../apache/hudi/io/TestHoodieTimelineArchiver.java | 2 +-
.../hudi/common/config/HoodieMetadataConfig.java | 87 +++-------------------
.../hudi/metadata/HoodieBackedTableMetadata.java | 3 +-
.../hudi/functional/TestColumnStatsIndex.scala | 12 +--
9 files changed, 45 insertions(+), 165 deletions(-)
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
index c19a5d5f84f..5887604d3ad 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
@@ -2301,10 +2301,6 @@ public class HoodieWriteConfig extends HoodieConfig {
return getInt(HoodieMetadataConfig.COMPACT_NUM_DELTA_COMMITS);
}
- public boolean isMetadataAsyncClean() {
- return getBoolean(HoodieMetadataConfig.ASYNC_CLEAN_ENABLE);
- }
-
public boolean isMetadataAsyncIndex() {
return getBooleanOrDefault(HoodieMetadataConfig.ASYNC_INDEX_ENABLE);
}
@@ -2317,10 +2313,6 @@ public class HoodieWriteConfig extends HoodieConfig {
return getInt(HoodieMetadataConfig.MIN_COMMITS_TO_KEEP);
}
- public int getMetadataCleanerCommitsRetained() {
- return getInt(HoodieMetadataConfig.CLEANER_COMMITS_RETAINED);
- }
-
/**
* Hoodie Client Lock Configs.
* @return
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
index dd1e40cb972..34b2d866ee2 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
@@ -94,6 +94,9 @@ import java.util.Properties;
import java.util.Set;
import java.util.stream.Collectors;
+import static org.apache.hudi.common.config.HoodieMetadataConfig.DEFAULT_METADATA_CLEANER_COMMITS_RETAINED;
+import static org.apache.hudi.common.config.HoodieMetadataConfig.DEFAULT_METADATA_ASYNC_CLEAN;
+import static org.apache.hudi.common.config.HoodieMetadataConfig.DEFAULT_METADATA_POPULATE_META_FIELDS;
import static org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy.EAGER;
import static org.apache.hudi.common.table.HoodieTableConfig.ARCHIVELOG_FOLDER;
import static org.apache.hudi.common.table.timeline.HoodieInstant.State.REQUESTED;
@@ -272,12 +275,12 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
.forTable(tableName)
// we will trigger cleaning manually, to control the instant times
.withCleanConfig(HoodieCleanConfig.newBuilder()
- .withAsyncClean(HoodieMetadataConfig.ASYNC_CLEAN_ENABLE.defaultValue())
+ .withAsyncClean(DEFAULT_METADATA_ASYNC_CLEAN)
.withAutoClean(false)
.withCleanerParallelism(parallelism)
.withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS)
.withFailedWritesCleaningPolicy(failedWritesCleaningPolicy)
- .retainCommits(HoodieMetadataConfig.CLEANER_COMMITS_RETAINED.defaultValue())
+ .retainCommits(DEFAULT_METADATA_CLEANER_COMMITS_RETAINED)
.build())
// we will trigger archive manually, to ensure only regular writer invokes it
.withArchivalConfig(HoodieArchivalConfig.newBuilder()
@@ -299,7 +302,7 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
.withFinalizeWriteParallelism(parallelism)
.withAllowMultiWriteOnSameInstant(true)
.withKeyGenerator(HoodieTableMetadataKeyGenerator.class.getCanonicalName())
- .withPopulateMetaFields(HoodieMetadataConfig.POPULATE_META_FIELDS.defaultValue())
+ .withPopulateMetaFields(DEFAULT_METADATA_POPULATE_META_FIELDS)
.withReleaseResourceEnabled(writeConfig.areReleaseResourceEnabled());
// RecordKey properties are needed for the metadata table records
@@ -450,9 +453,9 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
HoodieTableMetaClient metadataMetaClient = HoodieTableMetaClient.builder().setConf(hadoopConf.get())
.setBasePath(metadataWriteConfig.getBasePath()).build();
- if (dataWriteConfig.getMetadataConfig().populateMetaFields() != metadataMetaClient.getTableConfig().populateMetaFields()) {
+ if (DEFAULT_METADATA_POPULATE_META_FIELDS != metadataMetaClient.getTableConfig().populateMetaFields()) {
LOG.info("Re-initiating metadata table properties since populate meta fields have changed");
- metadataMetaClient = initializeMetaClient(dataWriteConfig.getMetadataConfig().populateMetaFields());
+ metadataMetaClient = initializeMetaClient(DEFAULT_METADATA_POPULATE_META_FIELDS);
}
final Option<HoodieInstant> latestMetadataInstant =
@@ -566,7 +569,7 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
String createInstantTime = getInitialCommitInstantTime(dataMetaClient);
- initializeMetaClient(dataWriteConfig.getMetadataConfig().populateMetaFields());
+ initializeMetaClient(DEFAULT_METADATA_POPULATE_META_FIELDS);
initTableMetadata();
// if async metadata indexing is enabled,
// then only initialize files partition as other partitions will be built using HoodieIndexer
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java
index 94c3495b0ff..d9d7688effd 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java
@@ -495,11 +495,9 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
writeConfig = getWriteConfigBuilder(true, true, false)
.withMetadataConfig(HoodieMetadataConfig.newBuilder()
.enable(true)
- .enableFullScan(true)
.enableMetrics(false)
.withMaxNumDeltaCommitsBeforeCompaction(3)
.archiveCommitsWith(4, 5)
- .retainCommits(3)
.build())
.withCleanConfig(HoodieCleanConfig.newBuilder()
.retainCommits(1)
@@ -551,36 +549,6 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
validateMetadata(testTable, emptyList(), true);
}
- @Test
- public void testUpdationOfPopulateMetaFieldsForMetadataTable() throws Exception {
- tableType = COPY_ON_WRITE;
- init(tableType, false);
-
- writeConfig = getWriteConfigBuilder(true, true, false)
- .withMetadataConfig(HoodieMetadataConfig.newBuilder()
- .enable(true)
- .withPopulateMetaFields(true)
- .build())
- .build();
- initWriteConfigAndMetatableWriter(writeConfig, true);
- doWriteOperation(testTable, "0000001", INSERT);
-
- HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setBasePath(writeConfig.getBasePath() + "/.hoodie/metadata").setConf(hadoopConf).build();
- assertTrue(metaClient.getTableConfig().populateMetaFields());
-
- // update populateMeta fields to false.
- writeConfig = getWriteConfigBuilder(true, true, false)
- .withMetadataConfig(HoodieMetadataConfig.newBuilder()
- .enable(true)
- .withPopulateMetaFields(false)
- .build())
- .build();
- initWriteConfigAndMetatableWriter(writeConfig, true);
- doWriteOperation(testTable, "0000002", INSERT);
- metaClient = HoodieTableMetaClient.builder().setBasePath(writeConfig.getBasePath() + "/.hoodie/metadata").setConf(hadoopConf).build();
- assertFalse(metaClient.getTableConfig().populateMetaFields());
- }
-
@Test
public void testMetadataInsertUpsertCleanNonPartitioned() throws Exception {
init(COPY_ON_WRITE);
@@ -615,7 +583,6 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
writeConfig = getWriteConfigBuilder(true, true, false)
.withMetadataConfig(HoodieMetadataConfig.newBuilder()
.enable(true)
- .enableFullScan(true)
.enableMetrics(false)
.withMaxNumDeltaCommitsBeforeCompaction(3) // after 3 delta commits for regular writer operations, compaction should kick in.
.build()).build();
@@ -740,9 +707,7 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
writeConfig = getWriteConfigBuilder(true, true, false)
.withMetadataConfig(HoodieMetadataConfig.newBuilder()
.enable(true)
- .enableFullScan(true)
.enableMetrics(false)
- .withPopulateMetaFields(populateMetaFields)
.withMaxNumDeltaCommitsBeforeCompaction(2)
.build()).build();
initWriteConfigAndMetatableWriter(writeConfig, true);
@@ -782,7 +747,6 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
writeConfig = getWriteConfigBuilder(true, true, false)
.withMetadataConfig(HoodieMetadataConfig.newBuilder()
.enable(true)
- .enableFullScan(true)
.enableMetrics(false)
.withMaxNumDeltaCommitsBeforeCompaction(4)
.build()).build();
@@ -834,7 +798,6 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
writeConfig = getWriteConfigBuilder(true, true, false)
.withMetadataConfig(HoodieMetadataConfig.newBuilder()
.enable(true)
- .enableFullScan(true)
.enableMetrics(false)
.withMaxNumDeltaCommitsBeforeCompaction(3)
.build()).build();
@@ -914,7 +877,6 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
writeConfig = getWriteConfigBuilder(false, true, false)
.withMetadataConfig(HoodieMetadataConfig.newBuilder()
.enable(true)
- .withPopulateMetaFields(true)
.build())
.build();
@@ -990,7 +952,6 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
writeConfig = getWriteConfigBuilder(true, true, false)
.withMetadataConfig(HoodieMetadataConfig.newBuilder()
.enable(true)
- .withPopulateMetaFields(enableMetaFields)
.withMaxNumDeltaCommitsBeforeCompaction(3)
.build())
.build();
@@ -1294,9 +1255,8 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
final int minArchiveCommitsDataset = 4;
writeConfig = getWriteConfigBuilder(true, true, false)
.withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(true)
- .archiveCommitsWith(minArchiveCommitsMetadata, minArchiveCommitsMetadata + 1).retainCommits(1)
+ .archiveCommitsWith(minArchiveCommitsMetadata, minArchiveCommitsMetadata + 1)
.withMaxNumDeltaCommitsBeforeCompaction(maxDeltaCommitsBeforeCompaction)
- .withPopulateMetaFields(populateMateFields)
.build())
.withCleanConfig(HoodieCleanConfig.newBuilder()
.retainCommits(1)
@@ -1528,7 +1488,6 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
.withMetadataConfig(HoodieMetadataConfig.newBuilder()
.enable(true)
.withMetadataIndexColumnStats(true)
- .enableFullScan(false)
.build())
.build();
@@ -2108,7 +2067,7 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
final int maxDeltaCommitsBeforeCompaction = 3;
HoodieWriteConfig config = getWriteConfigBuilder(true, true, false)
.withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(true)
- .archiveCommitsWith(40, 60).retainCommits(1)
+ .archiveCommitsWith(40, 60)
.withMaxNumDeltaCommitsBeforeCompaction(maxDeltaCommitsBeforeCompaction).build())
.withCleanConfig(HoodieCleanConfig.newBuilder()
.withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.NEVER)
@@ -2330,7 +2289,7 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
HoodieSparkEngineContext engineContext = new HoodieSparkEngineContext(jsc);
try (SparkRDDWriteClient client = new SparkRDDWriteClient(engineContext,
- getWriteConfigBuilder(HoodieFailedWritesCleaningPolicy.EAGER, true, true, false, true, false, false).build(),
+ getWriteConfigBuilder(HoodieFailedWritesCleaningPolicy.EAGER, true, true, false, false, false).build(),
true)) {
String newCommitTime = HoodieActiveTimeline.createNewInstantTime();
client.startCommitWithTime(newCommitTime);
@@ -2361,7 +2320,7 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
}
try (SparkRDDWriteClient client = new SparkRDDWriteClient(engineContext,
- getWriteConfigBuilder(HoodieFailedWritesCleaningPolicy.EAGER, true, true, false, true, false, false).build(),
+ getWriteConfigBuilder(HoodieFailedWritesCleaningPolicy.EAGER, true, true, false, false, false).build(),
true)) {
String newCommitTime = client.startCommit();
// Next insert
@@ -2441,7 +2400,7 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
// TESTCASE: If commit on the metadata table succeeds but fails on the dataset, then on next init the metadata table
// should be rolled back to last valid commit.
try (SparkRDDWriteClient client = new SparkRDDWriteClient(engineContext,
- getWriteConfigBuilder(HoodieFailedWritesCleaningPolicy.EAGER, true, true, false, true, false, false).build(),
+ getWriteConfigBuilder(HoodieFailedWritesCleaningPolicy.EAGER, true, true, false, false, false).build(),
true)) {
String newCommitTime = HoodieActiveTimeline.createNewInstantTime();
client.startCommitWithTime(newCommitTime);
@@ -2465,7 +2424,7 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
}
try (SparkRDDWriteClient client = new SparkRDDWriteClient(engineContext,
- getWriteConfigBuilder(HoodieFailedWritesCleaningPolicy.EAGER, true, true, false, true, false, false).build(),
+ getWriteConfigBuilder(HoodieFailedWritesCleaningPolicy.EAGER, true, true, false, false, false).build(),
true)) {
String newCommitTime = client.startCommit();
// Next insert
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedTableMetadata.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedTableMetadata.java
index b67fe8a80d0..4aa1a25f2da 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedTableMetadata.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedTableMetadata.java
@@ -232,7 +232,6 @@ public class TestHoodieBackedTableMetadata extends TestHoodieMetadataBase {
writeConfig = getWriteConfigBuilder(true, true, false)
.withMetadataConfig(HoodieMetadataConfig.newBuilder()
.enable(true)
- .withPopulateMetaFields(false)
.withMaxNumDeltaCommitsBeforeCompaction(3)
.build())
.build();
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieMetadataBase.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieMetadataBase.java
index 6bb95028523..36c174b1115 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieMetadataBase.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieMetadataBase.java
@@ -37,10 +37,10 @@ import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
import org.apache.hudi.common.testutils.HoodieMetadataTestTable;
import org.apache.hudi.common.testutils.HoodieTestTable;
import org.apache.hudi.common.util.Option;
-import org.apache.hudi.config.HoodieCleanConfig;
import org.apache.hudi.config.HoodieArchivalConfig;
-import org.apache.hudi.config.HoodieIndexConfig;
+import org.apache.hudi.config.HoodieCleanConfig;
import org.apache.hudi.config.HoodieCompactionConfig;
+import org.apache.hudi.config.HoodieIndexConfig;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.config.metrics.HoodieMetricsConfig;
import org.apache.hudi.config.metrics.HoodieMetricsGraphiteConfig;
@@ -70,6 +70,9 @@ import java.util.Properties;
import static java.util.Arrays.asList;
import static java.util.Collections.emptyList;
+import static org.apache.hudi.common.config.HoodieMetadataConfig.DEFAULT_METADATA_ASYNC_CLEAN;
+import static org.apache.hudi.common.config.HoodieMetadataConfig.DEFAULT_METADATA_CLEANER_COMMITS_RETAINED;
+import static org.apache.hudi.common.config.HoodieMetadataConfig.DEFAULT_METADATA_POPULATE_META_FIELDS;
import static org.apache.hudi.common.model.WriteOperationType.INSERT;
import static org.apache.hudi.common.model.WriteOperationType.UPSERT;
import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA;
@@ -90,7 +93,7 @@ public class TestHoodieMetadataBase extends HoodieClientTestHarness {
}
public void init(HoodieTableType tableType, HoodieWriteConfig writeConfig) throws IOException {
- init(tableType, Option.of(writeConfig), true, false, false, false);
+ init(tableType, Option.of(writeConfig), true, false, false);
}
public void init(HoodieTableType tableType, boolean enableMetadataTable) throws IOException {
@@ -103,12 +106,12 @@ public class TestHoodieMetadataBase extends HoodieClientTestHarness {
public void init(HoodieTableType tableType, boolean enableMetadataTable, boolean enableFullScan, boolean enableMetrics, boolean
validateMetadataPayloadStateConsistency) throws IOException {
- init(tableType, Option.empty(), enableMetadataTable, enableFullScan, enableMetrics,
+ init(tableType, Option.empty(), enableMetadataTable, enableMetrics,
validateMetadataPayloadStateConsistency);
}
public void init(HoodieTableType tableType, Option<HoodieWriteConfig> writeConfig, boolean enableMetadataTable,
- boolean enableFullScan, boolean enableMetrics, boolean validateMetadataPayloadStateConsistency) throws IOException {
+ boolean enableMetrics, boolean validateMetadataPayloadStateConsistency) throws IOException {
this.tableType = tableType;
initPath();
initSparkContexts("TestHoodieMetadata");
@@ -120,7 +123,7 @@ public class TestHoodieMetadataBase extends HoodieClientTestHarness {
metadataTableBasePath = HoodieTableMetadata.getMetadataTableBasePath(basePath);
this.writeConfig = writeConfig.isPresent()
? writeConfig.get() : getWriteConfigBuilder(HoodieFailedWritesCleaningPolicy.EAGER, true,
- enableMetadataTable, enableMetrics, enableFullScan, true,
+ enableMetadataTable, enableMetrics, true,
validateMetadataPayloadStateConsistency)
.build();
initWriteConfigAndMetatableWriter(this.writeConfig, enableMetadataTable);
@@ -328,11 +331,11 @@ public class TestHoodieMetadataBase extends HoodieClientTestHarness {
protected HoodieWriteConfig.Builder getWriteConfigBuilder(HoodieFailedWritesCleaningPolicy policy, boolean autoCommit, boolean useFileListingMetadata,
boolean enableMetrics) {
- return getWriteConfigBuilder(policy, autoCommit, useFileListingMetadata, enableMetrics, true, true, false);
+ return getWriteConfigBuilder(policy, autoCommit, useFileListingMetadata, enableMetrics, true, false);
}
protected HoodieWriteConfig.Builder getWriteConfigBuilder(HoodieFailedWritesCleaningPolicy policy, boolean autoCommit, boolean useFileListingMetadata,
- boolean enableMetrics, boolean enableFullScan, boolean useRollbackUsingMarkers,
+ boolean enableMetrics, boolean useRollbackUsingMarkers,
boolean validateMetadataPayloadConsistency) {
Properties properties = new Properties();
properties.put(HoodieTableConfig.KEY_GENERATOR_CLASS_NAME.key(), SimpleKeyGenerator.class.getName());
@@ -352,9 +355,7 @@ public class TestHoodieMetadataBase extends HoodieClientTestHarness {
.withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build())
.withMetadataConfig(HoodieMetadataConfig.newBuilder()
.enable(useFileListingMetadata)
- .enableFullScan(enableFullScan)
.enableMetrics(enableMetrics)
- .withPopulateMetaFields(HoodieMetadataConfig.POPULATE_META_FIELDS.defaultValue())
.ignoreSpuriousDeletes(validateMetadataPayloadConsistency)
.build())
.withMetricsConfig(HoodieMetricsConfig.newBuilder().on(enableMetrics)
@@ -396,16 +397,16 @@ public class TestHoodieMetadataBase extends HoodieClientTestHarness {
.forTable(writeConfig.getTableName() + METADATA_TABLE_NAME_SUFFIX)
// we will trigger cleaning manually, to control the instant times
.withCleanConfig(HoodieCleanConfig.newBuilder()
- .withAsyncClean(writeConfig.isMetadataAsyncClean())
+ .withAsyncClean(DEFAULT_METADATA_ASYNC_CLEAN)
.withAutoClean(false)
.withCleanerParallelism(parallelism)
.withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS)
.withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.LAZY)
- .retainCommits(writeConfig.getMetadataCleanerCommitsRetained())
+ .retainCommits(DEFAULT_METADATA_CLEANER_COMMITS_RETAINED)
.build())
// we will trigger archival manually, to control the instant times
.withArchivalConfig(HoodieArchivalConfig.newBuilder()
- .archiveCommitsWith(minCommitsToKeep, maxCommitsToKeep).build())
+ .archiveCommitsWith(minCommitsToKeep, maxCommitsToKeep).build())
// we will trigger compaction manually, to control the instant times
.withCompactionConfig(HoodieCompactionConfig.newBuilder()
.withInlineCompaction(false)
@@ -416,7 +417,7 @@ public class TestHoodieMetadataBase extends HoodieClientTestHarness {
.withFinalizeWriteParallelism(parallelism)
.withAllowMultiWriteOnSameInstant(true)
.withKeyGenerator(HoodieTableMetadataKeyGenerator.class.getCanonicalName())
- .withPopulateMetaFields(writeConfig.getMetadataConfig().populateMetaFields());
+ .withPopulateMetaFields(DEFAULT_METADATA_POPULATE_META_FIELDS);
// RecordKey properties are needed for the metadata table records
final Properties properties = new Properties();
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiver.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiver.java
index 4320b13662e..22540b09cc2 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiver.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiver.java
@@ -1296,7 +1296,7 @@ public class TestHoodieTimelineArchiver extends HoodieClientTestHarness {
.withRemoteServerPort(timelineServicePort).build())
.withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(true)
.withMaxNumDeltaCommitsBeforeCompaction(8)
- .retainCommits(3).archiveCommitsWith(4, 5).build())
+ .archiveCommitsWith(4, 5).build())
.forTable("test-trip-table").build();
initWriteConfigAndMetatableWriter(writeConfig, true);
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java
index 9b84466090d..1e9b28b7992 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java
@@ -41,6 +41,15 @@ import java.util.Properties;
+ " to avoid overhead of accessing cloud storage, during queries.")
public final class HoodieMetadataConfig extends HoodieConfig {
+ // Asynchronous cleaning for metadata table is disabled by default
+ public static final boolean DEFAULT_METADATA_ASYNC_CLEAN = false;
+ // Full scanning of log files while reading log records is enabled by default for metadata table
+ public static final boolean DEFAULT_METADATA_ENABLE_FULL_SCAN_LOG_FILES = true;
+ // Meta fields are not populated by default for metadata table
+ public static final boolean DEFAULT_METADATA_POPULATE_META_FIELDS = false;
+ // Default number of commits to retain, without cleaning, on metadata table
+ public static final int DEFAULT_METADATA_CLEANER_COMMITS_RETAINED = 3;
+
public static final String METADATA_PREFIX = "hoodie.metadata";
public static final String OPTIMIZED_LOG_BLOCKS_SCAN = ".optimized.log.blocks.scan.enable";
@@ -67,13 +76,6 @@ public final class HoodieMetadataConfig extends HoodieConfig {
.sinceVersion("0.7.0")
.withDocumentation("Parallelism to use when inserting to the metadata table");
- // Async clean
- public static final ConfigProperty<Boolean> ASYNC_CLEAN_ENABLE = ConfigProperty
- .key(METADATA_PREFIX + ".clean.async")
- .defaultValue(false)
- .sinceVersion("0.7.0")
- .withDocumentation("Enable asynchronous cleaning for metadata table. This is an internal config and setting this will not overwrite the value actually used.");
-
// Async index
public static final ConfigProperty<Boolean> ASYNC_INDEX_ENABLE = ConfigProperty
.key(METADATA_PREFIX + ".index.async")
@@ -105,14 +107,6 @@ public final class HoodieMetadataConfig extends HoodieConfig {
.withDocumentation("Similar to " + MIN_COMMITS_TO_KEEP.key() + ", this config controls "
+ "the maximum number of instants to retain in the active timeline.");
- // Cleaner commits retained
- public static final ConfigProperty<Integer> CLEANER_COMMITS_RETAINED = ConfigProperty
- .key(METADATA_PREFIX + ".cleaner.commits.retained")
- .defaultValue(3)
- .sinceVersion("0.7.0")
- .withDocumentation("Number of commits to retain, without cleaning, on metadata table. "
- + "This is an internal config and setting this will not overwrite the actual value used.");
-
// Regex to filter out matching directories during bootstrap
public static final ConfigProperty<String> DIR_FILTER_REGEX = ConfigProperty
.key(METADATA_PREFIX + ".dir.filter.regex")
@@ -133,13 +127,6 @@ public final class HoodieMetadataConfig extends HoodieConfig {
.sinceVersion("0.7.0")
.withDocumentation("Parallelism to use, when listing the table on lake storage.");
- public static final ConfigProperty<Boolean> ENABLE_FULL_SCAN_LOG_FILES = ConfigProperty
- .key(METADATA_PREFIX + ".enable.full.scan.log.files")
- .defaultValue(true)
- .sinceVersion("0.10.0")
- .withDocumentation("Enable full scanning of log files while reading log records. If disabled, Hudi does look up of only interested entries. "
- + "This is an internal config and setting this will not overwrite the actual value used.");
-
public static final ConfigProperty<Boolean> ENABLE_METADATA_INDEX_BLOOM_FILTER = ConfigProperty
.key(METADATA_PREFIX + ".index.bloom.filter.enable")
.defaultValue(false)
@@ -223,13 +210,6 @@ public final class HoodieMetadataConfig extends HoodieConfig {
.withDocumentation("After the async indexer has finished indexing upto the base instant, it will ensure that all inflight writers "
+ "reliably write index updates as well. If this timeout expires, then the indexer will abort itself safely.");
- public static final ConfigProperty<Boolean> POPULATE_META_FIELDS = ConfigProperty
- .key(METADATA_PREFIX + ".populate.meta.fields")
- .defaultValue(false)
- .sinceVersion("0.10.0")
- .withDocumentation("When enabled, populates all meta fields. When disabled, no meta fields are populated. "
- + "This is an internal config and setting this will not overwrite the actual value used.");
-
public static final ConfigProperty<Boolean> IGNORE_SPURIOUS_DELETES = ConfigProperty
.key("_" + METADATA_PREFIX + ".ignore.spurious.deletes")
.defaultValue(true)
@@ -317,14 +297,6 @@ public final class HoodieMetadataConfig extends HoodieConfig {
return getString(DIR_FILTER_REGEX);
}
- public boolean allowFullScan() {
- return getBooleanOrDefault(ENABLE_FULL_SCAN_LOG_FILES);
- }
-
- public boolean populateMetaFields() {
- return getBooleanOrDefault(HoodieMetadataConfig.POPULATE_META_FIELDS);
- }
-
public boolean ignoreSpuriousDeletes() {
return getBoolean(IGNORE_SPURIOUS_DELETES);
}
@@ -413,11 +385,6 @@ public final class HoodieMetadataConfig extends HoodieConfig {
return this;
}
- public Builder withAsyncClean(boolean asyncClean) {
- metadataConfig.setValue(ASYNC_CLEAN_ENABLE, String.valueOf(asyncClean));
- return this;
- }
-
public Builder withAsyncIndex(boolean asyncIndex) {
metadataConfig.setValue(ASYNC_INDEX_ENABLE, String.valueOf(asyncIndex));
return this;
@@ -428,22 +395,12 @@ public final class HoodieMetadataConfig extends HoodieConfig {
return this;
}
- public Builder withPopulateMetaFields(boolean populateMetaFields) {
- metadataConfig.setValue(POPULATE_META_FIELDS, Boolean.toString(populateMetaFields));
- return this;
- }
-
public Builder archiveCommitsWith(int minToKeep, int maxToKeep) {
metadataConfig.setValue(MIN_COMMITS_TO_KEEP, String.valueOf(minToKeep));
metadataConfig.setValue(MAX_COMMITS_TO_KEEP, String.valueOf(maxToKeep));
return this;
}
- public Builder retainCommits(int commitsRetained) {
- metadataConfig.setValue(CLEANER_COMMITS_RETAINED, String.valueOf(commitsRetained));
- return this;
- }
-
public Builder withFileListingParallelism(int parallelism) {
metadataConfig.setValue(FILE_LISTING_PARALLELISM_VALUE, String.valueOf(parallelism));
return this;
@@ -459,11 +416,6 @@ public final class HoodieMetadataConfig extends HoodieConfig {
return this;
}
- public Builder enableFullScan(boolean enableFullScan) {
- metadataConfig.setValue(ENABLE_FULL_SCAN_LOG_FILES, String.valueOf(enableFullScan));
- return this;
- }
-
public Builder ignoreSpuriousDeletes(boolean validateMetadataPayloadConsistency) {
metadataConfig.setValue(IGNORE_SPURIOUS_DELETES, String.valueOf(validateMetadataPayloadConsistency));
return this;
@@ -536,17 +488,6 @@ public final class HoodieMetadataConfig extends HoodieConfig {
@Deprecated
public static final int DEFAULT_METADATA_INSERT_PARALLELISM = INSERT_PARALLELISM_VALUE.defaultValue();
- /**
- * @deprecated Use {@link #ASYNC_CLEAN_ENABLE} and its methods.
- */
- @Deprecated
- public static final String METADATA_ASYNC_CLEAN_PROP = ASYNC_CLEAN_ENABLE.key();
- /**
- * @deprecated Use {@link #ASYNC_CLEAN_ENABLE} and its methods.
- */
- @Deprecated
- public static final boolean DEFAULT_METADATA_ASYNC_CLEAN = ASYNC_CLEAN_ENABLE.defaultValue();
-
/**
* @deprecated Use {@link #COMPACT_NUM_DELTA_COMMITS} and its methods.
*/
@@ -578,16 +519,6 @@ public final class HoodieMetadataConfig extends HoodieConfig {
*/
@Deprecated
public static final int DEFAULT_MAX_COMMITS_TO_KEEP = MAX_COMMITS_TO_KEEP.defaultValue();
- /**
- * @deprecated Use {@link #CLEANER_COMMITS_RETAINED} and its methods.
- */
- @Deprecated
- public static final String CLEANER_COMMITS_RETAINED_PROP = CLEANER_COMMITS_RETAINED.key();
- /**
- * @deprecated Use {@link #CLEANER_COMMITS_RETAINED} and its methods.
- */
- @Deprecated
- public static final int DEFAULT_CLEANER_COMMITS_RETAINED = CLEANER_COMMITS_RETAINED.defaultValue();
/**
* @deprecated No longer takes any effect.
*/
diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java
index fe4efacd0ca..36de343091e 100644
--- a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java
+++ b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java
@@ -76,6 +76,7 @@ import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
+import static org.apache.hudi.common.config.HoodieMetadataConfig.DEFAULT_METADATA_ENABLE_FULL_SCAN_LOG_FILES;
import static org.apache.hudi.common.util.CollectionUtils.isNullOrEmpty;
import static org.apache.hudi.common.util.CollectionUtils.toStream;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.PARTITION_NAME_BLOOM_FILTERS;
@@ -549,7 +550,7 @@ public class HoodieBackedTableMetadata extends BaseTableMetadata {
private boolean isFullScanAllowedForPartition(String partitionName) {
switch (partitionName) {
case PARTITION_NAME_FILES:
- return HoodieMetadataConfig.ENABLE_FULL_SCAN_LOG_FILES.defaultValue();
+ return DEFAULT_METADATA_ENABLE_FULL_SCAN_LOG_FILES;
case PARTITION_NAME_COLUMN_STATS:
case PARTITION_NAME_BLOOM_FILTERS:
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestColumnStatsIndex.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestColumnStatsIndex.scala
index c6dd7ac6170..640a2fa0e49 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestColumnStatsIndex.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestColumnStatsIndex.scala
@@ -98,10 +98,6 @@ class TestColumnStatsIndex extends HoodieSparkClientTestBase {
DataSourceWriteOptions.TABLE_TYPE.key -> testCase.tableType.toString,
RECORDKEY_FIELD.key -> "c1",
PRECOMBINE_FIELD.key -> "c1",
- // NOTE: Currently only this setting is used like following by different MT partitions:
- // - Files: using it
- // - Column Stats: NOT using it (defaults to doing "point-lookups")
- HoodieMetadataConfig.ENABLE_FULL_SCAN_LOG_FILES.key -> testCase.forceFullLogScan.toString,
HoodieTableConfig.POPULATE_META_FIELDS.key -> "true"
) ++ metadataOpts
@@ -650,14 +646,12 @@ class TestColumnStatsIndex extends HoodieSparkClientTestBase {
object TestColumnStatsIndex {
- case class ColumnStatsTestCase(tableType: HoodieTableType, forceFullLogScan: Boolean, shouldReadInMemory: Boolean)
+ case class ColumnStatsTestCase(tableType: HoodieTableType, shouldReadInMemory: Boolean)
def testMetadataColumnStatsIndexParams: java.util.stream.Stream[Arguments] = {
java.util.stream.Stream.of(HoodieTableType.values().toStream.flatMap(tableType =>
- Seq(Arguments.arguments(ColumnStatsTestCase(tableType, forceFullLogScan = false, shouldReadInMemory = true)),
- Arguments.arguments(ColumnStatsTestCase(tableType, forceFullLogScan = false, shouldReadInMemory = false)),
- Arguments.arguments(ColumnStatsTestCase(tableType, forceFullLogScan = true, shouldReadInMemory = false)),
- Arguments.arguments(ColumnStatsTestCase(tableType, forceFullLogScan = true, shouldReadInMemory = true)))
+ Seq(Arguments.arguments(ColumnStatsTestCase(tableType, shouldReadInMemory = true)),
+ Arguments.arguments(ColumnStatsTestCase(tableType, shouldReadInMemory = false)))
): _*)
}
}