You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by si...@apache.org on 2022/10/25 03:08:01 UTC
[hudi] branch master updated: [HUDI-4201] Cli tool to get warned about empty non-completed instants from timeline (#6867)
This is an automated email from the ASF dual-hosted git repository.
sivabalan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 49b9ba02c7 [HUDI-4201] Cli tool to get warned about empty non-completed instants from timeline (#6867)
49b9ba02c7 is described below
commit 49b9ba02c78c4a2710c211e409f75b6848ad03e4
Author: Jon Vexler <jb...@gmail.com>
AuthorDate: Mon Oct 24 20:07:56 2022 -0700
[HUDI-4201] Cli tool to get warned about empty non-completed instants from timeline (#6867)
---
.../apache/hudi/cli/commands/RepairsCommand.java | 15 ++++-
.../hudi/cli/commands/TestRepairsCommand.java | 69 ++++++++++++++++++++++
2 files changed, 81 insertions(+), 3 deletions(-)
diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/RepairsCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/RepairsCommand.java
index 2b11e20a10..644fe61004 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/RepairsCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/RepairsCommand.java
@@ -18,8 +18,6 @@
package org.apache.hudi.cli.commands;
-import org.apache.avro.AvroRuntimeException;
-import org.apache.hadoop.fs.Path;
import org.apache.hudi.cli.DeDupeType;
import org.apache.hudi.cli.HoodieCLI;
import org.apache.hudi.cli.HoodiePrintHelper;
@@ -38,6 +36,9 @@ import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.PartitionPathEncodeUtils;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.exception.HoodieIOException;
+
+import org.apache.avro.AvroRuntimeException;
+import org.apache.hadoop.fs.Path;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.spark.launcher.SparkLauncher;
@@ -45,7 +46,6 @@ import org.apache.spark.util.Utils;
import org.springframework.shell.standard.ShellComponent;
import org.springframework.shell.standard.ShellMethod;
import org.springframework.shell.standard.ShellOption;
-import scala.collection.JavaConverters;
import java.io.FileInputStream;
import java.io.IOException;
@@ -55,6 +55,8 @@ import java.util.Properties;
import java.util.TreeSet;
import java.util.stream.Collectors;
+import scala.collection.JavaConverters;
+
import static org.apache.hudi.common.table.HoodieTableMetaClient.METAFOLDER_NAME;
/**
@@ -205,6 +207,13 @@ public class RepairsCommand {
});
}
+ @ShellMethod(key = "repair show empty commit metadata", value = "show failed commits")
+ public void showFailedCommits() {
+ HoodieTableMetaClient metaClient = HoodieCLI.getTableMetaClient();
+ HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline();
+ activeTimeline.filterCompletedInstants().getInstants().filter(activeTimeline::isEmpty).forEach(hoodieInstant -> LOG.warn("Empty Commit: " + hoodieInstant.toString()));
+ }
+
@ShellMethod(key = "repair migrate-partition-meta", value = "Migrate all partition meta file currently stored in text format "
+ "to be stored in base file format. See HoodieTableConfig#PARTITION_METAFILE_USE_DATA_FORMAT.")
public String migratePartitionMeta(
diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRepairsCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRepairsCommand.java
index 29377c21ea..c7492bba2f 100644
--- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRepairsCommand.java
+++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRepairsCommand.java
@@ -45,6 +45,9 @@ import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.core.LogEvent;
+import org.apache.logging.log4j.core.appender.AbstractAppender;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.sql.SQLContext;
import org.junit.jupiter.api.AfterEach;
@@ -54,6 +57,8 @@ import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.shell.Shell;
+import org.apache.logging.log4j.Level;
+import org.apache.logging.log4j.core.Logger;
import java.io.FileInputStream;
import java.io.IOException;
@@ -65,6 +70,7 @@ import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Properties;
+import java.util.UUID;
import java.util.stream.Collectors;
import static org.apache.hudi.common.table.HoodieTableConfig.ARCHIVELOG_FOLDER;
@@ -259,6 +265,50 @@ public class TestRepairsCommand extends CLIFunctionalTestHarness {
assertEquals(0, metaClient.getActiveTimeline().filterInflightsAndRequested().getInstants().count());
}
+ /**
+ * Testcase for "repair cleanup empty commit metadata"
+ *
+ */
+ @Test
+ public void testShowFailedCommits() {
+ HoodieCLI.conf = hadoopConf();
+
+ Configuration conf = HoodieCLI.conf;
+
+ HoodieTableMetaClient metaClient = HoodieCLI.getTableMetaClient();
+
+ for (int i = 1; i < 20; i++) {
+ String timestamp = String.valueOf(i);
+ // Write corrupted requested Clean File
+ HoodieTestCommitMetadataGenerator.createCommitFile(tablePath, timestamp, conf);
+ }
+
+ metaClient.getActiveTimeline().getInstants().filter(hoodieInstant -> Integer.parseInt(hoodieInstant.getTimestamp()) % 4 == 0).forEach(hoodieInstant -> {
+ metaClient.getActiveTimeline().deleteInstantFileIfExists(hoodieInstant);
+ metaClient.getActiveTimeline().createNewInstant(hoodieInstant);
+ });
+
+ final TestLogAppender appender = new TestLogAppender();
+ final Logger logger = (Logger) LogManager.getLogger(RepairsCommand.class);
+ try {
+ appender.start();
+ logger.addAppender(appender);
+ Object result = shell.evaluate(() -> "repair show empty commit metadata");
+ assertTrue(ShellEvaluationResultUtil.isSuccess(result));
+ final List<LogEvent> log = appender.getLog();
+ assertEquals(log.size(),4);
+ log.forEach(LoggingEvent -> {
+ assertEquals(LoggingEvent.getLevel(), Level.WARN);
+ assertTrue(LoggingEvent.getMessage().getFormattedMessage().contains("Empty Commit: "));
+ assertTrue(LoggingEvent.getMessage().getFormattedMessage().contains("COMPLETED]"));
+ });
+ } finally {
+ logger.removeAppender(appender);
+ }
+
+
+ }
+
@Test
public void testRepairDeprecatedPartition() throws IOException {
tablePath = tablePath + "/repair_test/";
@@ -374,4 +424,23 @@ public class TestRepairsCommand extends CLIFunctionalTestHarness {
assertEquals(totalRecs, totalRecsInOldPartition);
}
}
+
+ class TestLogAppender extends AbstractAppender {
+ private final List<LogEvent> log = new ArrayList<>();
+
+ protected TestLogAppender() {
+ super(UUID.randomUUID().toString(), null, null, false, null);
+ }
+
+ @Override
+ public void append(LogEvent event) {
+ log.add(event);
+ }
+
+ public List<LogEvent> getLog() {
+ return new ArrayList<LogEvent>(log);
+ }
+ }
}
+
+