You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by si...@apache.org on 2022/10/25 03:08:01 UTC

[hudi] branch master updated: [HUDI-4201] Cli tool to get warned about empty non-completed instants from timeline (#6867)

This is an automated email from the ASF dual-hosted git repository.

sivabalan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 49b9ba02c7 [HUDI-4201] Cli tool to get warned about empty non-completed instants from timeline (#6867)
49b9ba02c7 is described below

commit 49b9ba02c78c4a2710c211e409f75b6848ad03e4
Author: Jon Vexler <jb...@gmail.com>
AuthorDate: Mon Oct 24 20:07:56 2022 -0700

    [HUDI-4201] Cli tool to get warned about empty non-completed instants from timeline (#6867)
---
 .../apache/hudi/cli/commands/RepairsCommand.java   | 15 ++++-
 .../hudi/cli/commands/TestRepairsCommand.java      | 69 ++++++++++++++++++++++
 2 files changed, 81 insertions(+), 3 deletions(-)

diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/RepairsCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/RepairsCommand.java
index 2b11e20a10..644fe61004 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/RepairsCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/RepairsCommand.java
@@ -18,8 +18,6 @@
 
 package org.apache.hudi.cli.commands;
 
-import org.apache.avro.AvroRuntimeException;
-import org.apache.hadoop.fs.Path;
 import org.apache.hudi.cli.DeDupeType;
 import org.apache.hudi.cli.HoodieCLI;
 import org.apache.hudi.cli.HoodiePrintHelper;
@@ -38,6 +36,9 @@ import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.PartitionPathEncodeUtils;
 import org.apache.hudi.common.util.StringUtils;
 import org.apache.hudi.exception.HoodieIOException;
+
+import org.apache.avro.AvroRuntimeException;
+import org.apache.hadoop.fs.Path;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 import org.apache.spark.launcher.SparkLauncher;
@@ -45,7 +46,6 @@ import org.apache.spark.util.Utils;
 import org.springframework.shell.standard.ShellComponent;
 import org.springframework.shell.standard.ShellMethod;
 import org.springframework.shell.standard.ShellOption;
-import scala.collection.JavaConverters;
 
 import java.io.FileInputStream;
 import java.io.IOException;
@@ -55,6 +55,8 @@ import java.util.Properties;
 import java.util.TreeSet;
 import java.util.stream.Collectors;
 
+import scala.collection.JavaConverters;
+
 import static org.apache.hudi.common.table.HoodieTableMetaClient.METAFOLDER_NAME;
 
 /**
@@ -205,6 +207,13 @@ public class RepairsCommand {
     });
   }
 
+  @ShellMethod(key = "repair show empty commit metadata", value = "show failed commits")
+  public void showFailedCommits() {
+    HoodieTableMetaClient metaClient = HoodieCLI.getTableMetaClient();
+    HoodieActiveTimeline activeTimeline =  metaClient.getActiveTimeline();
+    activeTimeline.filterCompletedInstants().getInstants().filter(activeTimeline::isEmpty).forEach(hoodieInstant -> LOG.warn("Empty Commit: " + hoodieInstant.toString()));
+  }
+
   @ShellMethod(key = "repair migrate-partition-meta", value = "Migrate all partition meta file currently stored in text format "
       + "to be stored in base file format. See HoodieTableConfig#PARTITION_METAFILE_USE_DATA_FORMAT.")
   public String migratePartitionMeta(
diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRepairsCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRepairsCommand.java
index 29377c21ea..c7492bba2f 100644
--- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRepairsCommand.java
+++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRepairsCommand.java
@@ -45,6 +45,9 @@ import org.apache.avro.generic.GenericRecord;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.core.LogEvent;
+import org.apache.logging.log4j.core.appender.AbstractAppender;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.sql.SQLContext;
 import org.junit.jupiter.api.AfterEach;
@@ -54,6 +57,8 @@ import org.junit.jupiter.api.Test;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.boot.test.context.SpringBootTest;
 import org.springframework.shell.Shell;
+import org.apache.logging.log4j.Level;
+import org.apache.logging.log4j.core.Logger;
 
 import java.io.FileInputStream;
 import java.io.IOException;
@@ -65,6 +70,7 @@ import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
 import java.util.Properties;
+import java.util.UUID;
 import java.util.stream.Collectors;
 
 import static org.apache.hudi.common.table.HoodieTableConfig.ARCHIVELOG_FOLDER;
@@ -259,6 +265,50 @@ public class TestRepairsCommand extends CLIFunctionalTestHarness {
     assertEquals(0, metaClient.getActiveTimeline().filterInflightsAndRequested().getInstants().count());
   }
 
+  /**
+   * Testcase for "repair cleanup empty commit metadata"
+   *
+   */
+  @Test
+  public void testShowFailedCommits() {
+    HoodieCLI.conf = hadoopConf();
+
+    Configuration conf = HoodieCLI.conf;
+
+    HoodieTableMetaClient metaClient = HoodieCLI.getTableMetaClient();
+
+    for (int i = 1; i < 20; i++) {
+      String timestamp = String.valueOf(i);
+      // Write corrupted requested Clean File
+      HoodieTestCommitMetadataGenerator.createCommitFile(tablePath, timestamp, conf);
+    }
+
+    metaClient.getActiveTimeline().getInstants().filter(hoodieInstant -> Integer.parseInt(hoodieInstant.getTimestamp()) % 4 == 0).forEach(hoodieInstant -> {
+      metaClient.getActiveTimeline().deleteInstantFileIfExists(hoodieInstant);
+      metaClient.getActiveTimeline().createNewInstant(hoodieInstant);
+    });
+
+    final TestLogAppender appender = new TestLogAppender();
+    final Logger logger = (Logger) LogManager.getLogger(RepairsCommand.class);
+    try {
+      appender.start();
+      logger.addAppender(appender);
+      Object result = shell.evaluate(() -> "repair show empty commit metadata");
+      assertTrue(ShellEvaluationResultUtil.isSuccess(result));
+      final List<LogEvent> log = appender.getLog();
+      assertEquals(log.size(),4);
+      log.forEach(LoggingEvent -> {
+        assertEquals(LoggingEvent.getLevel(), Level.WARN);
+        assertTrue(LoggingEvent.getMessage().getFormattedMessage().contains("Empty Commit: "));
+        assertTrue(LoggingEvent.getMessage().getFormattedMessage().contains("COMPLETED]"));
+      });
+    } finally {
+      logger.removeAppender(appender);
+    }
+
+
+  }
+
   @Test
   public void testRepairDeprecatedPartition() throws IOException {
     tablePath = tablePath + "/repair_test/";
@@ -374,4 +424,23 @@ public class TestRepairsCommand extends CLIFunctionalTestHarness {
       assertEquals(totalRecs, totalRecsInOldPartition);
     }
   }
+
+  class TestLogAppender extends AbstractAppender {
+    private final List<LogEvent> log = new ArrayList<>();
+
+    protected TestLogAppender() {
+      super(UUID.randomUUID().toString(), null, null, false, null);
+    }
+
+    @Override
+    public void append(LogEvent event) {
+      log.add(event);
+    }
+
+    public List<LogEvent> getLog() {
+      return new ArrayList<LogEvent>(log);
+    }
+  }
 }
+
+