You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iotdb.apache.org by qi...@apache.org on 2022/07/07 00:49:47 UTC

[iotdb] branch master updated: [IOTDB-3387]Print all bad files in Validation Tool (#6489)

This is an automated email from the ASF dual-hosted git repository.

qiaojialin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iotdb.git


The following commit(s) were added to refs/heads/master by this push:
     new d7f26ab019 [IOTDB-3387]Print all bad files in Validation Tool (#6489)
d7f26ab019 is described below

commit d7f26ab019be657f9f424ea1e081973426f01c6b
Author: 周沛辰 <45...@users.noreply.github.com>
AuthorDate: Thu Jul 7 08:49:42 2022 +0800

    [IOTDB-3387]Print all bad files in Validation Tool (#6489)
---
 .../db/tools/validate/TsFileValidationTool.java    | 181 +++++++++++++++++----
 1 file changed, 145 insertions(+), 36 deletions(-)

diff --git a/server/src/main/java/org/apache/iotdb/db/tools/validate/TsFileValidationTool.java b/server/src/main/java/org/apache/iotdb/db/tools/validate/TsFileValidationTool.java
index 5be1417c99..ca75905233 100644
--- a/server/src/main/java/org/apache/iotdb/db/tools/validate/TsFileValidationTool.java
+++ b/server/src/main/java/org/apache/iotdb/db/tools/validate/TsFileValidationTool.java
@@ -33,6 +33,7 @@ import org.apache.iotdb.tsfile.read.TsFileSequenceReader;
 import org.apache.iotdb.tsfile.read.common.BatchData;
 import org.apache.iotdb.tsfile.read.reader.page.PageReader;
 import org.apache.iotdb.tsfile.read.reader.page.TimePageReader;
+import org.apache.iotdb.tsfile.utils.Pair;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -96,7 +97,9 @@ public class TsFileValidationTool {
     if (printToFile) {
       pw = new PrintWriter(new FileWriter(outFilePath));
     }
-    printBoth("Start checking seq files ...");
+    if (printDetails) {
+      printBoth("Start checking seq files ...");
+    }
 
     // check tsfile, which will only check for correctness inside a single tsfile
     for (File f : fileList) {
@@ -115,7 +118,9 @@ public class TsFileValidationTool {
         if (!checkIsDirectory(sgDir)) {
           continue;
         }
-        printBoth("- Check files in storage group: " + sgDir.getAbsolutePath());
+        if (printDetails) {
+          printBoth("- Check files in storage group: " + sgDir.getAbsolutePath());
+        }
         // get data region dirs
         File[] dataRegionDirs = sgDir.listFiles();
         for (File dataRegionDir : Objects.requireNonNull(dataRegionDirs)) {
@@ -149,19 +154,24 @@ public class TsFileValidationTool {
         }
       }
     }
-    printBoth("Finish checking successfully, totally find " + badFileNum + " bad files.");
+    if (printDetails) {
+      printBoth("Finish checking successfully, totally find " + badFileNum + " bad files.");
+    }
     if (printToFile) {
       pw.close();
     }
   }
 
   private static void findUncorrectFiles(List<File> tsFiles) {
-    // measurementID -> [lastTime, endTimeInLastFile]
-    Map<String, long[]> measurementLastTime = new HashMap<>();
-    // deviceID -> endTime, the endTime of device in the last seq file
-    Map<String, Long> deviceEndTime = new HashMap<>();
+    // measurementID -> <fileName, [lastTime, endTimeInLastFile]>
+    Map<String, Pair<String, long[]>> measurementLastTime = new HashMap<>();
+    // deviceID -> <fileName, endTime>, the endTime of device in the last seq file
+    Map<String, Pair<String, Long>> deviceEndTime = new HashMap<>();
+    // fileName -> isBadFile
+    Map<String, Boolean> isBadFileMap = new HashMap<>();
 
     for (File tsFile : tsFiles) {
+      List<String> previousBadFileMsgs = new ArrayList<>();
       try {
         TsFileResource resource = new TsFileResource(tsFile);
         if (!new File(tsFile.getAbsolutePath() + TsFileResource.RESOURCE_SUFFIX).exists()) {
@@ -173,7 +183,8 @@ public class TsFileValidationTool {
         } else {
           resource.deserialize();
         }
-        boolean isBadFile = false;
+        isBadFileMap.put(tsFile.getName(), false);
+
         try (TsFileSequenceReader reader = new TsFileSequenceReader(tsFile.getAbsolutePath())) {
           // deviceID -> has checked overlap or not
           Map<String, Boolean> hasCheckedDeviceOverlap = new HashMap<>();
@@ -206,7 +217,7 @@ public class TsFileValidationTool {
                     k -> {
                       long[] arr = new long[2];
                       Arrays.fill(arr, Long.MIN_VALUE);
-                      return arr;
+                      return new Pair<>("", arr);
                     });
                 Decoder defaultTimeDecoder =
                     Decoder.getDecoderByType(
@@ -233,20 +244,47 @@ public class TsFileValidationTool {
                     long[] timeBatch = timePageReader.getNextTimeBatch();
                     for (int i = 0; i < timeBatch.length; i++) {
                       long timestamp = timeBatch[i];
-                      if (timestamp <= measurementLastTime.get(measurementID)[0]) {
+                      if (timestamp <= measurementLastTime.get(measurementID).right[0]) {
                         // find bad file
-                        if (!isBadFile) {
-                          printBoth("-- Find the bad file " + tsFile.getAbsolutePath());
-                          isBadFile = true;
+                        if (timestamp <= measurementLastTime.get(measurementID).right[1]) {
+                          // overlap between file, then add previous bad file path to list
+                          String lastBadFile = measurementLastTime.get(measurementID).left;
+                          if (!isBadFileMap.get(lastBadFile)) {
+                            if (printDetails) {
+                              previousBadFileMsgs.add(
+                                  "-- Find the bad file "
+                                      + tsFile.getParentFile().getAbsolutePath()
+                                      + File.separator
+                                      + lastBadFile
+                                      + ", overlap with later files.");
+                            } else {
+                              previousBadFileMsgs.add(
+                                  tsFile.getParentFile().getAbsolutePath()
+                                      + File.separator
+                                      + lastBadFile);
+                            }
+                            isBadFileMap.put(lastBadFile, true);
+                            badFileNum++;
+                          }
+                        }
+
+                        if (!isBadFileMap.get(tsFile.getName())) {
+                          if (printDetails) {
+                            printBoth("-- Find the bad file " + tsFile.getAbsolutePath());
+                          } else {
+                            printBoth(tsFile.getAbsolutePath());
+                          }
+                          isBadFileMap.put(tsFile.getName(), true);
                           badFileNum++;
                         }
                         if (printDetails) {
-                          if (timestamp <= measurementLastTime.get(measurementID)[1]) {
+                          if (timestamp <= measurementLastTime.get(measurementID).right[1]) {
                             if (!hasMeasurementPrintedDetails.get(measurementID)[0]) {
                               printBoth(
                                   "-------- Timeseries "
                                       + measurementID
-                                      + " overlap between files");
+                                      + " overlap between files, with previous file "
+                                      + measurementLastTime.get(measurementID).left);
                               hasMeasurementPrintedDetails.get(measurementID)[0] = true;
                             }
                           } else if (timestamp
@@ -277,7 +315,7 @@ public class TsFileValidationTool {
                           }
                         }
                       } else {
-                        measurementLastTime.get(measurementID)[0] = timestamp;
+                        measurementLastTime.get(measurementID).right[0] = timestamp;
                         currentPageEndTime = timestamp;
                         currentChunkEndTime = timestamp;
                       }
@@ -293,20 +331,45 @@ public class TsFileValidationTool {
                     BatchData batchData = pageReader.getAllSatisfiedPageData();
                     while (batchData.hasCurrent()) {
                       long timestamp = batchData.currentTime();
-                      if (timestamp <= measurementLastTime.get(measurementID)[0]) {
+                      if (timestamp <= measurementLastTime.get(measurementID).right[0]) {
                         // find bad file
-                        if (!isBadFile) {
-                          printBoth("-- Find the bad file " + tsFile.getAbsolutePath());
-                          isBadFile = true;
+                        if (timestamp <= measurementLastTime.get(measurementID).right[1]) {
+                          // overlap between file, then add previous bad file path to list
+                          if (!isBadFileMap.get(measurementLastTime.get(measurementID).left)) {
+                            if (printDetails) {
+                              previousBadFileMsgs.add(
+                                  "-- Find the bad file "
+                                      + tsFile.getParentFile().getAbsolutePath()
+                                      + File.separator
+                                      + measurementLastTime.get(measurementID).left
+                                      + ", overlap with later files.");
+                            } else {
+                              previousBadFileMsgs.add(
+                                  tsFile.getParentFile().getAbsolutePath()
+                                      + File.separator
+                                      + measurementLastTime.get(measurementID).left);
+                            }
+                            badFileNum++;
+                            isBadFileMap.put(measurementLastTime.get(measurementID).left, true);
+                          }
+                        }
+                        if (!isBadFileMap.get(tsFile.getName())) {
+                          if (printDetails) {
+                            printBoth("-- Find the bad file " + tsFile.getAbsolutePath());
+                          } else {
+                            printBoth(tsFile.getAbsolutePath());
+                          }
+                          isBadFileMap.put(tsFile.getName(), true);
                           badFileNum++;
                         }
                         if (printDetails) {
-                          if (timestamp <= measurementLastTime.get(measurementID)[1]) {
+                          if (timestamp <= measurementLastTime.get(measurementID).right[1]) {
                             if (!hasMeasurementPrintedDetails.get(measurementID)[0]) {
                               printBoth(
                                   "-------- Timeseries "
                                       + measurementID
-                                      + " overlap between files");
+                                      + " overlap between files, with previous file "
+                                      + measurementLastTime.get(measurementID).left);
                               hasMeasurementPrintedDetails.get(measurementID)[0] = true;
                             }
                           } else if (timestamp
@@ -337,7 +400,7 @@ public class TsFileValidationTool {
                           }
                         }
                       } else {
-                        measurementLastTime.get(measurementID)[0] = timestamp;
+                        measurementLastTime.get(measurementID).right[0] = timestamp;
                         currentPageEndTime = timestamp;
                         currentChunkEndTime = timestamp;
                       }
@@ -357,23 +420,61 @@ public class TsFileValidationTool {
                 if (!deviceID.equals("")) {
                   // record the end time of last device in current file
                   if (resource.getEndTime(deviceID)
-                      > deviceEndTime.getOrDefault(deviceID, Long.MIN_VALUE)) {
-                    deviceEndTime.put(deviceID, resource.getEndTime(deviceID));
+                      > deviceEndTime.computeIfAbsent(
+                              deviceID,
+                              k -> {
+                                return new Pair<>("", Long.MIN_VALUE);
+                              })
+                          .right) {
+                    deviceEndTime.get(deviceID).left = tsFile.getName();
+                    deviceEndTime.get(deviceID).right = resource.getEndTime(deviceID);
                   }
                 }
                 ChunkGroupHeader chunkGroupHeader = reader.readChunkGroupHeader();
                 deviceID = chunkGroupHeader.getDeviceID();
                 if (!hasCheckedDeviceOverlap.getOrDefault(deviceID, false)
                     && resource.getStartTime(deviceID)
-                        <= deviceEndTime.getOrDefault(deviceID, Long.MIN_VALUE)) {
+                        <= deviceEndTime.computeIfAbsent(
+                                deviceID,
+                                k -> {
+                                  return new Pair<>("", Long.MIN_VALUE);
+                                })
+                            .right) {
                   // find bad file
-                  if (!isBadFile) {
-                    printBoth("-- Find the bad file " + tsFile.getAbsolutePath());
-                    isBadFile = true;
+                  // add prevous bad file msg to list
+                  if (!isBadFileMap.get(deviceEndTime.get(deviceID).left)) {
+                    if (printDetails) {
+                      previousBadFileMsgs.add(
+                          "-- Find the bad file "
+                              + tsFile.getParentFile().getAbsolutePath()
+                              + File.separator
+                              + deviceEndTime.get(deviceID).left
+                              + ", overlap with later files.");
+                    } else {
+                      previousBadFileMsgs.add(
+                          tsFile.getParentFile().getAbsolutePath()
+                              + File.separator
+                              + deviceEndTime.get(deviceID).left);
+                    }
+                    isBadFileMap.put(deviceEndTime.get(deviceID).left, true);
+                    badFileNum++;
+                  }
+                  // print current file
+                  if (!isBadFileMap.get(tsFile.getName())) {
+                    if (printDetails) {
+                      printBoth("-- Find the bad file " + tsFile.getAbsolutePath());
+                    } else {
+                      printBoth(tsFile.getAbsolutePath());
+                    }
+                    isBadFileMap.put(tsFile.getName(), true);
                     badFileNum++;
                   }
                   if (printDetails) {
-                    printBoth("---- Device " + deviceID + " overlap between files");
+                    printBoth(
+                        "---- Device "
+                            + deviceID
+                            + " overlap between files, with previous file "
+                            + deviceEndTime.get(deviceID).left);
                   }
                 }
                 hasCheckedDeviceOverlap.put(deviceID, true);
@@ -388,16 +489,24 @@ public class TsFileValidationTool {
 
           // record the end time of each timeseries in current file
           for (Map.Entry<String, Long> entry : lashChunkEndTime.entrySet()) {
-            Long endTime = Math.max(measurementLastTime.get(entry.getKey())[1], entry.getValue());
-            measurementLastTime.get(entry.getKey())[1] = endTime;
+            if (measurementLastTime.get(entry.getKey()).right[1] <= entry.getValue()) {
+              measurementLastTime.get(entry.getKey()).right[1] = entry.getValue();
+              measurementLastTime.get(entry.getKey()).left = tsFile.getName();
+            }
           }
         }
       } catch (Throwable e) {
         logger.error("Meet errors in reading file {} , skip it.", tsFile.getAbsolutePath(), e);
-        printBoth(
-            "-- Meet errors in reading file "
-                + tsFile.getAbsolutePath()
-                + ", tsfile may be corrupted.");
+        if (printDetails) {
+          printBoth(
+              "-- Meet errors in reading file "
+                  + tsFile.getAbsolutePath()
+                  + ", tsfile may be corrupted.");
+        }
+      } finally {
+        for (String msg : previousBadFileMsgs) {
+          printBoth(msg);
+        }
       }
     }
   }