You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by GitBox <gi...@apache.org> on 2020/07/06 00:47:40 UTC

[GitHub] [hudi] vinothchandar commented on a change in pull request #1756: [HUDI-839] Adding unit test for MarkerFiles,RollbackUtils, RollbackActionExecutor for markers and filelisting

vinothchandar commented on a change in pull request #1756:
URL: https://github.com/apache/hudi/pull/1756#discussion_r449786823



##########
File path: hudi-client/src/main/java/org/apache/hudi/io/HoodieMergeHandle.java
##########
@@ -113,8 +109,9 @@ private void init(String fileId, String partitionPath, HoodieBaseFile dataFileTo
       partitionMetadata.trySave(getPartitionId());
 
       oldFilePath = new Path(config.getBasePath() + "/" + partitionPath + "/" + latestValidFilePath);
+      String newFileName = FSUtils.makeDataFileName(instantTime, writeToken, fileId);

Review comment:
       probably need to ensure we are getting the base file format extension from the hoodieTable instance? 

##########
File path: hudi-client/src/test/java/org/apache/hudi/table/action/rollback/TestMergeOnReadRollbackActionExecutor.java
##########
@@ -0,0 +1,211 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.table.action.rollback;
+
+import org.apache.hudi.client.HoodieWriteClient;
+import org.apache.hudi.client.WriteStatus;
+import org.apache.hudi.common.HoodieRollbackStat;
+
+import org.apache.hudi.common.model.FileSlice;
+import org.apache.hudi.common.model.HoodieFileGroup;
+import org.apache.hudi.common.model.HoodieLogFile;
+import org.apache.hudi.common.model.HoodieTableType;
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
+import org.apache.hudi.common.table.timeline.HoodieTimeline;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.table.HoodieTable;
+import org.apache.hudi.testutils.HoodieClientTestBase;
+import org.apache.hudi.testutils.HoodieTestDataGenerator;
+import org.apache.spark.api.java.JavaRDD;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import static org.apache.hudi.testutils.HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH;
+import static org.apache.hudi.testutils.HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+
+public class TestMergeOnReadRollbackActionExecutor extends HoodieClientTestBase {
+  @Override
+  protected HoodieTableType getTableType() {
+    return HoodieTableType.MERGE_ON_READ;
+  }
+
+  @BeforeEach
+  public void setUp() throws Exception {
+    initPath();
+    initSparkContexts();
+    //just generate tow partitions
+    dataGen = new HoodieTestDataGenerator(new String[]{DEFAULT_FIRST_PARTITION_PATH, DEFAULT_SECOND_PARTITION_PATH});
+    initFileSystem();
+    initMetaClient();
+  }
+
+  @AfterEach
+  public void tearDown() throws Exception {
+    cleanupResources();
+  }
+
+  private void twoUpsertCommitDataRollBack(boolean isUsingMarkers) throws IOException, InterruptedException {

Review comment:
       anyway to share code with the COW test?

##########
File path: hudi-client/src/test/java/org/apache/hudi/index/TestHoodieIndex.java
##########
@@ -328,6 +330,18 @@ public void testSimpleTagLocationAndUpdateWithRollback(IndexType indexType) thro
     assert (javaRDD.filter(record -> record.getCurrentLocation() != null).collect().size() == 0);
   }
 
+  @ParameterizedTest
+  @EnumSource(value = IndexType.class, names = {"BLOOM", "GLOBAL_BLOOM", "SIMPLE", "GLOBAL_SIMPLE"})

Review comment:
       Similar here.. this test probably does not need to test these two modes? for e.g: what additional testing are we getting over the test schema evolution test by doing this?

##########
File path: hudi-client/src/main/java/org/apache/hudi/table/action/rollback/MarkerBasedRollbackStrategy.java
##########
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.table.action.rollback;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hudi.common.HoodieRollbackStat;
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.model.HoodieLogFile;
+import org.apache.hudi.common.table.log.HoodieLogFormat;
+import org.apache.hudi.common.table.log.block.HoodieCommandBlock;
+import org.apache.hudi.common.table.log.block.HoodieLogBlock;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.exception.HoodieIOException;
+import org.apache.hudi.exception.HoodieRollbackException;
+import org.apache.hudi.table.HoodieTable;
+import org.apache.hudi.table.MarkerFiles;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.apache.spark.api.java.JavaSparkContext;
+import scala.Tuple2;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Performs rollback using marker files generated during the write..
+ */
+public class MarkerBasedRollbackStrategy implements BaseRollbackActionExecutor.RollbackStrategy {
+
+  private static final Logger LOG = LogManager.getLogger(MarkerBasedRollbackStrategy.class);
+
+  private final HoodieTable<?> table;
+
+  private final transient JavaSparkContext jsc;
+
+  private final HoodieWriteConfig config;
+
+  private final String basePath;
+
+  private final String instantTime;
+
+  public MarkerBasedRollbackStrategy(HoodieTable<?> table, JavaSparkContext jsc, HoodieWriteConfig config, String instantTime) {
+    this.table = table;
+    this.jsc = jsc;
+    this.basePath = table.getMetaClient().getBasePath();
+    this.config = config;
+    this.instantTime = instantTime;
+  }
+
+  private HoodieRollbackStat undoMerge(String mergedBaseFilePath) throws IOException {
+    LOG.info("Rolling back by deleting the merged base file:" + mergedBaseFilePath);
+    return deleteBaseFile(mergedBaseFilePath);
+  }
+
+  private HoodieRollbackStat undoCreate(String createdBaseFilePath) throws IOException {
+    LOG.info("Rolling back by deleting the created base file:" + createdBaseFilePath);
+

Review comment:
       nit:extra line

##########
File path: hudi-client/src/main/java/org/apache/hudi/table/action/rollback/ListingBasedRollbackHelper.java
##########
@@ -54,29 +53,28 @@
 /**
  * Performs Rollback of Hoodie Tables.
  */
-public class RollbackHelper implements Serializable {
+public class ListingBasedRollbackHelper implements Serializable {
 
-  private static final Logger LOG = LogManager.getLogger(RollbackHelper.class);
+  private static final Logger LOG = LogManager.getLogger(ListingBasedRollbackHelper.class);
 
   private final HoodieTableMetaClient metaClient;
   private final HoodieWriteConfig config;
 
-  public RollbackHelper(HoodieTableMetaClient metaClient, HoodieWriteConfig config) {
+  public ListingBasedRollbackHelper(HoodieTableMetaClient metaClient, HoodieWriteConfig config) {
     this.metaClient = metaClient;
     this.config = config;
   }
 
   /**
    * Performs all rollback actions that we have collected in parallel.
    */
-  public List<HoodieRollbackStat> performRollback(JavaSparkContext jsc, HoodieInstant instantToRollback, List<RollbackRequest> rollbackRequests) {
+  public List<HoodieRollbackStat> performRollback(JavaSparkContext jsc, HoodieInstant instantToRollback, List<ListingBasedRollbackRequest> rollbackRequests) {
 
-    String basefileExtension = metaClient.getTableConfig().getBaseFileFormat().getFileExtension();
     SerializablePathFilter filter = (path) -> {
-      if (path.toString().contains(basefileExtension)) {
+      if (path.toString().endsWith(HoodieFileFormat.PARQUET.getFileExtension())) {

Review comment:
       these could be replaced with base format from the table object?

##########
File path: hudi-client/src/main/java/org/apache/hudi/io/HoodieAppendHandle.java
##########
@@ -278,6 +286,11 @@ public WriteStatus getWriteStatus() {
     return writeStatus;
   }
 
+  @Override
+  public MarkerFiles.MarkerType getIOType() {

Review comment:
       Reflecting on this, probably good to rename `MarkerType` to `IOType`.. 

##########
File path: hudi-client/src/main/java/org/apache/hudi/client/HoodieWriteClient.java
##########
@@ -332,9 +333,11 @@ public static SparkConf registerClasses(SparkConf conf) {
   }
 
   @Override
-  protected void postCommit(HoodieCommitMetadata metadata, String instantTime,
-      Option<Map<String, String>> extraMetadata) {
+  protected void postCommit(HoodieTable<?> table, HoodieCommitMetadata metadata, String instantTime, Option<Map<String, String>> extraMetadata) {
     try {
+      if (!config.getRollBackUsingMarkers()) {

Review comment:
       I am wondering if we should do this always.. having this logic be rollback dependent can become hard to reason with in the long run

##########
File path: hudi-client/src/test/java/org/apache/hudi/table/action/rollback/TestCopyOnWriteRollbackActionExecutor.java
##########
@@ -0,0 +1,247 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.table.action.rollback;
+
+import org.apache.hudi.common.model.FileSlice;
+import org.apache.hudi.common.model.HoodieFileGroup;
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
+import org.apache.hudi.common.table.timeline.HoodieTimeline;
+import org.apache.hudi.client.HoodieWriteClient;
+import org.apache.hudi.client.WriteStatus;
+import org.apache.hudi.common.HoodieRollbackStat;
+import org.apache.hudi.common.testutils.HoodieTestUtils;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.config.HoodieIndexConfig;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.index.HoodieIndex;
+import org.apache.hudi.table.HoodieTable;
+import org.apache.hudi.testutils.HoodieClientTestBase;
+import org.apache.hudi.testutils.HoodieTestDataGenerator;
+import org.apache.spark.api.java.JavaRDD;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import static org.apache.hudi.testutils.HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH;
+import static org.apache.hudi.testutils.HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+public class TestCopyOnWriteRollbackActionExecutor extends HoodieClientTestBase {
+  @BeforeEach
+  public void setUp() throws Exception {
+    initPath();
+    initSparkContexts();
+    //just generate tow partitions

Review comment:
       typo:two

##########
File path: hudi-client/src/main/java/org/apache/hudi/io/HoodieWriteHandle.java
##########
@@ -97,28 +98,9 @@ public Path makeNewPath(String partitionPath) {
    *
    * @param partitionPath Partition path
    */
-  protected void createMarkerFile(String partitionPath) {
-    Path markerPath = makeNewMarkerPath(partitionPath);
-    try {
-      LOG.info("Creating Marker Path=" + markerPath);
-      fs.create(markerPath, false).close();
-    } catch (IOException e) {
-      throw new HoodieException("Failed to create marker file " + markerPath, e);
-    }
-  }
-
-  /**
-   * THe marker path will be <base-path>/.hoodie/.temp/<instant_ts>/2019/04/25/filename.
-   */
-  private Path makeNewMarkerPath(String partitionPath) {
-    Path markerRootPath = new Path(hoodieTable.getMetaClient().getMarkerFolderPath(instantTime));
-    Path path = FSUtils.getPartitionPath(markerRootPath, partitionPath);
-    try {
-      fs.mkdirs(path); // create a new partition as needed.
-    } catch (IOException e) {
-      throw new HoodieIOException("Failed to make dir " + path, e);
-    }
-    return new Path(path.toString(), FSUtils.makeMarkerFile(instantTime, writeToken, fileId));
+  protected void createMarkerFile(String partitionPath, String dataFileName) {
+    MarkerFiles markerFiles = new MarkerFiles(hoodieTable, instantTime);
+    markerFiles.createMarkerFile(partitionPath, dataFileName, getIOType());

Review comment:
       nit: we can just name this `create()` 

##########
File path: hudi-client/src/test/java/org/apache/hudi/client/TestTableSchemaEvolution.java
##########
@@ -408,6 +416,16 @@ public void testCopyOnWriteTable() throws Exception {
     checkReadRecords("000", 2 * numRecords);
   }
 
+  @Test
+  public void testCopyOnWriteTableUsingFileListRollBack() throws Exception {
+    testCopyOnWriteTable(false);
+  }
+
+  @Test
+  public void testCopyOnWriteTableUsingMarkersRollBack() throws Exception {

Review comment:
       these combinations added on Schema evolution test is a bit hard to understand. like why would be testing modes for rollbacks in a schema evolution test? any particular reason?

##########
File path: hudi-client/src/test/java/org/apache/hudi/table/TestHoodieMergeOnReadTable.java
##########
@@ -445,10 +442,20 @@ public void testCOWToMORConvertedTableRollback(HoodieFileFormat baseFileFormat)
 
   @ParameterizedTest
   @MethodSource("argumentsProvider")
-  public void testRollbackWithDeltaAndCompactionCommit(HoodieFileFormat baseFileFormat) throws Exception {
+  public void testCOWToMORConvertedTableRollbackUsingFileList(HoodieFileFormat baseFileFormat) throws Exception {

Review comment:
       these may be worth adding in both modes.. 

##########
File path: hudi-client/src/test/java/org/apache/hudi/table/TestCleaner.java
##########
@@ -904,6 +901,19 @@ public void testCleanMarkerDataFilesOnRollback() throws IOException {
     assertEquals(0, getTotalTempFiles(), "All temp files are deleted.");
   }
 
+  /**
+   * Test Cleaning functionality of table.rollback() API.
+   */
+  @Test
+  public void testCleanMarkerDataFilesOnRollbackUsingFileList() throws IOException {

Review comment:
       same comment here.. 




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org