You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by GitBox <gi...@apache.org> on 2022/04/07 20:03:08 UTC

[GitHub] [hudi] yihua commented on a diff in pull request #5234: [HUDI-3637] Exclude uncommitted log files from metadata table validation

yihua commented on code in PR #5234:
URL: https://github.com/apache/hudi/pull/5234#discussion_r845518740


##########
hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java:
##########
@@ -723,6 +722,121 @@ private void validateBloomFilters(
     }
   }
 
+  private void validateFileSlices(
+      List<FileSlice> fileSliceListFromMetadataTable, List<FileSlice> fileSliceListFromFS,
+      String partitionPath, HoodieTableMetaClient metaClient, String label) {
+    boolean mismatch = false;
+    if (fileSliceListFromMetadataTable.size() != fileSliceListFromFS.size()) {
+      mismatch = true;
+    } else if (!fileSliceListFromMetadataTable.equals(fileSliceListFromFS)) {
+      for (int i = 0; i < fileSliceListFromMetadataTable.size(); i++) {
+        FileSlice fileSlice1 = fileSliceListFromMetadataTable.get(i);
+        FileSlice fileSlice2 = fileSliceListFromFS.get(i);
+        if (!Objects.equals(fileSlice1.getFileGroupId(), fileSlice2.getFileGroupId())
+            || !Objects.equals(fileSlice1.getBaseInstantTime(), fileSlice2.getBaseInstantTime())
+            || !Objects.equals(fileSlice1.getBaseFile(), fileSlice2.getBaseFile())) {
+          mismatch = true;
+          break;
+        }
+        if (!areFileSliceCommittedLogFilesMatching(fileSlice1, fileSlice2, metaClient)) {
+          mismatch = true;
+          break;
+        } else {
+          LOG.warn(String.format("There are uncommitted log files in the latest file slices "
+              + "but the committed log files match: %s %s", fileSlice1, fileSlice2));
+        }
+      }
+    }
+
+    if (mismatch) {
+      String message = String.format("Validation of %s for partition %s failed."
+              + "\n%s from metadata: %s\n%s from file system and base files: %s",
+          label, partitionPath, label, fileSliceListFromMetadataTable, label, fileSliceListFromFS);
+      LOG.error(message);
+      throw new HoodieValidationException(message);
+    } else {
+      LOG.info(String.format("Validation of %s succeeded for partition %s", label, partitionPath));
+    }
+  }
+
+  /**
+   * Compares committed log files from two file slices.
+   *
+   * @param fs1        File slice 1
+   * @param fs2        File slice 2
+   * @param metaClient {@link HoodieTableMetaClient} instance
+   * @return {@code true} if matching; {@code false} otherwise.
+   */
+  private boolean areFileSliceCommittedLogFilesMatching(
+      FileSlice fs1, FileSlice fs2, HoodieTableMetaClient metaClient) {
+    Set<String> fs1LogPathSet =

Review Comment:
   That makes sense.  We can follow up with a change later.  Tracked here: HUDI-3822



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@hudi.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org