You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iotdb.apache.org by ja...@apache.org on 2022/07/07 09:27:23 UTC

[iotdb] branch rel/0.12 updated: print all bad tsfiles, including previous files overlap with current file and later files (#6614)

This is an automated email from the ASF dual-hosted git repository.

jackietien pushed a commit to branch rel/0.12
in repository https://gitbox.apache.org/repos/asf/iotdb.git


The following commit(s) were added to refs/heads/rel/0.12 by this push:
     new 1852eb9918 print all bad tsfiles, including previous files overlap with current file and later files (#6614)
1852eb9918 is described below

commit 1852eb9918a14a5167ef27204a770d49e3f34c05
Author: 周沛辰 <45...@users.noreply.github.com>
AuthorDate: Thu Jul 7 17:27:18 2022 +0800

    print all bad tsfiles, including previous files overlap with current file and later files (#6614)
---
 .../org/apache/iotdb/TsFileValidationTool.java     | 131 +++++++++++++++------
 1 file changed, 98 insertions(+), 33 deletions(-)

diff --git a/rewriteFileTool/src/main/java/org/apache/iotdb/TsFileValidationTool.java b/rewriteFileTool/src/main/java/org/apache/iotdb/TsFileValidationTool.java
index beea1a1075..92a0a1e677 100644
--- a/rewriteFileTool/src/main/java/org/apache/iotdb/TsFileValidationTool.java
+++ b/rewriteFileTool/src/main/java/org/apache/iotdb/TsFileValidationTool.java
@@ -32,6 +32,7 @@ import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding;
 import org.apache.iotdb.tsfile.read.TsFileSequenceReader;
 import org.apache.iotdb.tsfile.read.common.BatchData;
 import org.apache.iotdb.tsfile.read.reader.page.PageReader;
+import org.apache.iotdb.tsfile.utils.Pair;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -49,6 +50,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.Objects;
 
+import static org.apache.iotdb.tsfile.common.constant.TsFileConstant.PATH_SEPARATOR;
 import static org.apache.iotdb.tsfile.common.constant.TsFileConstant.TSFILE_SUFFIX;
 
 /**
@@ -125,16 +127,12 @@ public class TsFileValidationTool {
             continue;
           }
           // get time partition dirs and sort them
-          List<File> timePartitionDirs = new ArrayList<>();
-          for (File file : Objects.requireNonNull(dataRegionDir.listFiles())) {
-            if (file != null && !file.getName().endsWith(".DS_Store")) {
-              timePartitionDirs.add(file);
-            }
-          }
+          List<File> timePartitionDirs =
+              Arrays.asList(Objects.requireNonNull(dataRegionDir.listFiles()));
           timePartitionDirs.sort(
               (f1, f2) ->
                   Long.compareUnsigned(Long.parseLong(f1.getName()), Long.parseLong(f2.getName())));
-          for (File timePartitionDir : timePartitionDirs) {
+          for (File timePartitionDir : Objects.requireNonNull(timePartitionDirs)) {
             if (!checkIsDirectory(timePartitionDir)) {
               continue;
             }
@@ -164,12 +162,15 @@ public class TsFileValidationTool {
   }
 
   private static void findUncorrectFiles(List<File> tsFiles) {
-    // measurementID -> [lastTime, endTimeInLastFile]
-    Map<String, long[]> measurementLastTime = new HashMap<>();
-    // deviceID -> endTime, the endTime of device in the last seq file
-    Map<String, Long> deviceEndTime = new HashMap<>();
+    // measurementID -> <fileName, [lastTime, endTimeInLastFile]>
+    Map<String, Pair<String, long[]>> measurementLastTime = new HashMap<>();
+    // deviceID -> <fileName, endTime>, the endTime of device in the last seq file
+    Map<String, Pair<String, Long>> deviceEndTime = new HashMap<>();
+    // fileName -> isBadFile
+    Map<String, Boolean> isBadFileMap = new HashMap<>();
 
     for (File tsFile : tsFiles) {
+      List<String> previousBadFileMsgs = new ArrayList<>();
       try {
         TsFileResource resource = new TsFileResource(tsFile);
         if (!new File(tsFile.getAbsolutePath() + TsFileResource.RESOURCE_SUFFIX).exists()) {
@@ -181,7 +182,8 @@ public class TsFileValidationTool {
         } else {
           resource.deserialize();
         }
-        boolean isBadFile = false;
+        isBadFileMap.put(tsFile.getName(), false);
+
         try (TsFileSequenceReader reader = new TsFileSequenceReader(tsFile.getAbsolutePath())) {
           // deviceID -> has checked overlap or not
           Map<String, Boolean> hasCheckedDeviceOverlap = new HashMap<>();
@@ -198,19 +200,15 @@ public class TsFileValidationTool {
               case MetaMarker.CHUNK_HEADER:
               case MetaMarker.ONLY_ONE_PAGE_CHUNK_HEADER:
                 ChunkHeader header = reader.readChunkHeader(marker);
-                if (header.getDataSize() == 0) {
-                  // empty value chunk
-                  break;
-                }
                 long currentChunkEndTime = Long.MIN_VALUE;
-                String measurementID = deviceID + "." + header.getMeasurementID();
+                String measurementID = deviceID + PATH_SEPARATOR + header.getMeasurementID();
                 hasMeasurementPrintedDetails.computeIfAbsent(measurementID, k -> new boolean[4]);
                 measurementLastTime.computeIfAbsent(
                     measurementID,
                     k -> {
                       long[] arr = new long[2];
                       Arrays.fill(arr, Long.MIN_VALUE);
-                      return arr;
+                      return new Pair<>("", arr);
                     });
                 Decoder defaultTimeDecoder =
                     Decoder.getDecoderByType(
@@ -236,22 +234,45 @@ public class TsFileValidationTool {
                   BatchData batchData = pageReader.getAllSatisfiedPageData();
                   while (batchData.hasCurrent()) {
                     long timestamp = batchData.currentTime();
-                    if (timestamp <= measurementLastTime.get(measurementID)[0]) {
+                    if (timestamp <= measurementLastTime.get(measurementID).right[0]) {
                       // find bad file
-                      if (!isBadFile) {
+                      if (timestamp <= measurementLastTime.get(measurementID).right[1]) {
+                        // overlap between file, then add previous bad file path to list
+                        if (!isBadFileMap.get(measurementLastTime.get(measurementID).left)) {
+                          if (printDetails) {
+                            previousBadFileMsgs.add(
+                                "-- Find the bad file "
+                                    + tsFile.getParentFile().getAbsolutePath()
+                                    + File.separator
+                                    + measurementLastTime.get(measurementID).left
+                                    + ", overlap with later files.");
+                          } else {
+                            previousBadFileMsgs.add(
+                                tsFile.getParentFile().getAbsolutePath()
+                                    + File.separator
+                                    + measurementLastTime.get(measurementID).left);
+                          }
+                          badFileNum++;
+                          isBadFileMap.put(measurementLastTime.get(measurementID).left, true);
+                        }
+                      }
+                      if (!isBadFileMap.get(tsFile.getName())) {
                         if (printDetails) {
                           printBoth("-- Find the bad file " + tsFile.getAbsolutePath());
                         } else {
                           printBoth(tsFile.getAbsolutePath());
                         }
-                        isBadFile = true;
+                        isBadFileMap.put(tsFile.getName(), true);
                         badFileNum++;
                       }
                       if (printDetails) {
-                        if (timestamp <= measurementLastTime.get(measurementID)[1]) {
+                        if (timestamp <= measurementLastTime.get(measurementID).right[1]) {
                           if (!hasMeasurementPrintedDetails.get(measurementID)[0]) {
                             printBoth(
-                                "-------- Timeseries " + measurementID + " overlap between files");
+                                "-------- Timeseries "
+                                    + measurementID
+                                    + " overlap between files, with previous file "
+                                    + measurementLastTime.get(measurementID).left);
                             hasMeasurementPrintedDetails.get(measurementID)[0] = true;
                           }
                         } else if (timestamp
@@ -278,12 +299,13 @@ public class TsFileValidationTool {
                         }
                       }
                     } else {
-                      measurementLastTime.get(measurementID)[0] = timestamp;
+                      measurementLastTime.get(measurementID).right[0] = timestamp;
                       currentPageEndTime = timestamp;
                       currentChunkEndTime = timestamp;
                     }
                     batchData.next();
                   }
+
                   dataSize -= pageHeader.getSerializedPageSize();
                   lastPageEndTime = Math.max(lastPageEndTime, currentPageEndTime);
                 }
@@ -297,27 +319,61 @@ public class TsFileValidationTool {
                 if (!deviceID.equals("")) {
                   // record the end time of last device in current file
                   if (resource.getEndTime(deviceID)
-                      > deviceEndTime.getOrDefault(deviceID, Long.MIN_VALUE)) {
-                    deviceEndTime.put(deviceID, resource.getEndTime(deviceID));
+                      > deviceEndTime.computeIfAbsent(
+                              deviceID,
+                              k -> {
+                                return new Pair<>("", Long.MIN_VALUE);
+                              })
+                          .right) {
+                    deviceEndTime.get(deviceID).left = tsFile.getName();
+                    deviceEndTime.get(deviceID).right = resource.getEndTime(deviceID);
                   }
                 }
                 ChunkGroupHeader chunkGroupHeader = reader.readChunkGroupHeader();
                 deviceID = chunkGroupHeader.getDeviceID();
                 if (!hasCheckedDeviceOverlap.getOrDefault(deviceID, false)
                     && resource.getStartTime(deviceID)
-                        <= deviceEndTime.getOrDefault(deviceID, Long.MIN_VALUE)) {
+                        <= deviceEndTime.computeIfAbsent(
+                                deviceID,
+                                k -> {
+                                  return new Pair<>("", Long.MIN_VALUE);
+                                })
+                            .right) {
                   // find bad file
-                  if (!isBadFile) {
+                  // add prevous bad file msg to list
+                  if (!isBadFileMap.get(deviceEndTime.get(deviceID).left)) {
+                    if (printDetails) {
+                      previousBadFileMsgs.add(
+                          "-- Find the bad file "
+                              + tsFile.getParentFile().getAbsolutePath()
+                              + File.separator
+                              + deviceEndTime.get(deviceID).left
+                              + ", overlap with later files.");
+                    } else {
+                      previousBadFileMsgs.add(
+                          tsFile.getParentFile().getAbsolutePath()
+                              + File.separator
+                              + deviceEndTime.get(deviceID).left);
+                    }
+                    isBadFileMap.put(deviceEndTime.get(deviceID).left, true);
+                    badFileNum++;
+                  }
+                  // print current file
+                  if (!isBadFileMap.get(tsFile.getName())) {
                     if (printDetails) {
                       printBoth("-- Find the bad file " + tsFile.getAbsolutePath());
                     } else {
                       printBoth(tsFile.getAbsolutePath());
                     }
-                    isBadFile = true;
+                    isBadFileMap.put(tsFile.getName(), true);
                     badFileNum++;
                   }
                   if (printDetails) {
-                    printBoth("---- Device " + deviceID + " overlap between files");
+                    printBoth(
+                        "---- Device "
+                            + deviceID
+                            + " overlap between files, with previous file "
+                            + deviceEndTime.get(deviceID).left);
                   }
                 }
                 hasCheckedDeviceOverlap.put(deviceID, true);
@@ -332,14 +388,23 @@ public class TsFileValidationTool {
 
           // record the end time of each timeseries in current file
           for (Map.Entry<String, Long> entry : lashChunkEndTime.entrySet()) {
-            Long endTime = Math.max(measurementLastTime.get(entry.getKey())[1], entry.getValue());
-            measurementLastTime.get(entry.getKey())[1] = endTime;
+            if (measurementLastTime.get(entry.getKey()).right[1] <= entry.getValue()) {
+              measurementLastTime.get(entry.getKey()).right[1] = entry.getValue();
+              measurementLastTime.get(entry.getKey()).left = tsFile.getName();
+            }
           }
         }
       } catch (Throwable e) {
         logger.error("Meet errors in reading file {} , skip it.", tsFile.getAbsolutePath(), e);
         if (printDetails) {
-          printBoth("-- Meet errors in reading file " + tsFile.getAbsolutePath());
+          printBoth(
+              "-- Meet errors in reading file "
+                  + tsFile.getAbsolutePath()
+                  + ", tsfile may be corrupted.");
+        }
+      } finally {
+        for (String msg : previousBadFileMsgs) {
+          printBoth(msg);
         }
       }
     }