You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by GitBox <gi...@apache.org> on 2020/10/11 02:02:40 UTC
[GitHub] [hudi] xushiyan commented on a change in pull request #2167: [HUDI-995] Migrate HoodieTestUtils APIs to HoodieTestTable

xushiyan commented on a change in pull request #2167:
URL: https://github.com/apache/hudi/pull/2167#discussion_r502854170



##########
File path: hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestCleaner.java
##########
@@ -1098,73 +1097,82 @@ public void testCleanPreviousCorruptedCleanFiles() throws IOException {
    * @param expNumFilesDeleted Number of files deleted
    */
   private void testPendingCompactions(HoodieWriteConfig config, int expNumFilesDeleted,
-      int expNumFilesUnderCompactionDeleted, boolean retryFailure) throws IOException {
+      int expNumFilesUnderCompactionDeleted, boolean retryFailure) throws Exception {
     HoodieTableMetaClient metaClient =
         HoodieTestUtils.init(hadoopConf, basePath, HoodieTableType.MERGE_ON_READ);
-    String[] instants = new String[] {"000", "001", "003", "005", "007", "009", "011", "013"};
-    String[] compactionInstants = new String[] {"002", "004", "006", "008", "010"};
-    Map<String, String> expFileIdToPendingCompaction = new HashMap<>();
-    Map<String, String> fileIdToLatestInstantBeforeCompaction = new HashMap<>();
-    Map<String, List<FileSlice>> compactionInstantsToFileSlices = new HashMap<>();
-
-    for (String instant : instants) {
-      HoodieTestUtils.createCommitFiles(basePath, instant);
-    }
+    final String partition = "2016/03/15";
+    Map<String, String> expFileIdToPendingCompaction = new HashMap<String, String>() {
+      {
+        put("fileId2", "004");
+        put("fileId3", "006");
+        put("fileId4", "008");
+        put("fileId5", "010");
+      }
+    };
+    Map<String, String> fileIdToLatestInstantBeforeCompaction = new HashMap<String, String>() {
+      {
+        put("fileId1", "000");
+        put("fileId2", "000");
+        put("fileId3", "001");
+        put("fileId4", "003");
+        put("fileId5", "005");
+        put("fileId6", "009");
+        put("fileId7", "011");
+      }
+    };
 
     // Generate 7 file-groups. First one has only one slice and no pending compaction. File Slices (2 - 5) has
     // multiple versions with pending compaction. File Slices (6 - 7) have multiple file-slices but not under
     // compactions
     // FileIds 2-5 will be under compaction
-    int maxNumFileIds = 7;
-    String[] fileIds = new String[] {"fileId1", "fileId2", "fileId3", "fileId4", "fileId5", "fileId6", "fileId7"};
-    int maxNumFileIdsForCompaction = 4;
-    for (int i = 0; i < maxNumFileIds; i++) {
-      final String fileId = HoodieTestUtils.createDataFile(basePath,
-          HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, instants[0], fileIds[i]);
-      HoodieTestUtils.createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, instants[0],
-          fileId, Option.empty());
-      HoodieTestUtils.createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, instants[0],
-          fileId, Option.of(2));
-      fileIdToLatestInstantBeforeCompaction.put(fileId, instants[0]);
-      for (int j = 1; j <= i; j++) {
-        if (j == i && j <= maxNumFileIdsForCompaction) {
-          expFileIdToPendingCompaction.put(fileId, compactionInstants[j]);
-          metaClient = HoodieTableMetaClient.reload(metaClient);
-          HoodieTable table = HoodieSparkTable.create(config, context, metaClient);
-          FileSlice slice =
-              table.getSliceView().getLatestFileSlices(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH)
-                  .filter(fs -> fs.getFileId().equals(fileId)).findFirst().get();
-          List<FileSlice> slices = new ArrayList<>();
-          if (compactionInstantsToFileSlices.containsKey(compactionInstants[j])) {
-            slices = compactionInstantsToFileSlices.get(compactionInstants[j]);
-          }
-          slices.add(slice);
-          compactionInstantsToFileSlices.put(compactionInstants[j], slices);
-          // Add log-files to simulate delta-commits after pending compaction
-          HoodieTestUtils.createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH,
-              compactionInstants[j], fileId, Option.empty());
-          HoodieTestUtils.createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH,
-              compactionInstants[j], fileId, Option.of(2));
-        } else {
-          HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, instants[j],
-              fileId);
-          HoodieTestUtils.createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH,
-              instants[j], fileId, Option.empty());
-          HoodieTestUtils.createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH,
-              instants[j], fileId, Option.of(2));
-          fileIdToLatestInstantBeforeCompaction.put(fileId, instants[j]);
-        }
-      }
-    }
-
-    // Setup pending compaction plans
-    for (String instant : compactionInstants) {
-      List<FileSlice> fileSliceList = compactionInstantsToFileSlices.get(instant);
-      if (null != fileSliceList) {
-        HoodieTestUtils.createCompactionRequest(metaClient, instant, fileSliceList.stream()
-            .map(fs -> Pair.of(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, fs)).collect(Collectors.toList()));
-      }
-    }

Review comment:
       @yanghua The original logic of test prep is incredibly difficult to understand. Please avoid reading it. I simply compared the output files in temp directory and made sure they were equivalent before and after the change.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org