You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iotdb.apache.org by ja...@apache.org on 2022/07/07 09:27:23 UTC
[iotdb] branch rel/0.12 updated: print all bad tsfiles, including previous files overlap with current file and later files (#6614)
This is an automated email from the ASF dual-hosted git repository.
jackietien pushed a commit to branch rel/0.12
in repository https://gitbox.apache.org/repos/asf/iotdb.git
The following commit(s) were added to refs/heads/rel/0.12 by this push:
new 1852eb9918 print all bad tsfiles, including previous files overlap with current file and later files (#6614)
1852eb9918 is described below
commit 1852eb9918a14a5167ef27204a770d49e3f34c05
Author: 周沛辰 <45...@users.noreply.github.com>
AuthorDate: Thu Jul 7 17:27:18 2022 +0800
print all bad tsfiles, including previous files overlap with current file and later files (#6614)
---
.../org/apache/iotdb/TsFileValidationTool.java | 131 +++++++++++++++------
1 file changed, 98 insertions(+), 33 deletions(-)
diff --git a/rewriteFileTool/src/main/java/org/apache/iotdb/TsFileValidationTool.java b/rewriteFileTool/src/main/java/org/apache/iotdb/TsFileValidationTool.java
index beea1a1075..92a0a1e677 100644
--- a/rewriteFileTool/src/main/java/org/apache/iotdb/TsFileValidationTool.java
+++ b/rewriteFileTool/src/main/java/org/apache/iotdb/TsFileValidationTool.java
@@ -32,6 +32,7 @@ import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding;
import org.apache.iotdb.tsfile.read.TsFileSequenceReader;
import org.apache.iotdb.tsfile.read.common.BatchData;
import org.apache.iotdb.tsfile.read.reader.page.PageReader;
+import org.apache.iotdb.tsfile.utils.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -49,6 +50,7 @@ import java.util.List;
import java.util.Map;
import java.util.Objects;
+import static org.apache.iotdb.tsfile.common.constant.TsFileConstant.PATH_SEPARATOR;
import static org.apache.iotdb.tsfile.common.constant.TsFileConstant.TSFILE_SUFFIX;
/**
@@ -125,16 +127,12 @@ public class TsFileValidationTool {
continue;
}
// get time partition dirs and sort them
- List<File> timePartitionDirs = new ArrayList<>();
- for (File file : Objects.requireNonNull(dataRegionDir.listFiles())) {
- if (file != null && !file.getName().endsWith(".DS_Store")) {
- timePartitionDirs.add(file);
- }
- }
+ List<File> timePartitionDirs =
+ Arrays.asList(Objects.requireNonNull(dataRegionDir.listFiles()));
timePartitionDirs.sort(
(f1, f2) ->
Long.compareUnsigned(Long.parseLong(f1.getName()), Long.parseLong(f2.getName())));
- for (File timePartitionDir : timePartitionDirs) {
+ for (File timePartitionDir : Objects.requireNonNull(timePartitionDirs)) {
if (!checkIsDirectory(timePartitionDir)) {
continue;
}
@@ -164,12 +162,15 @@ public class TsFileValidationTool {
}
private static void findUncorrectFiles(List<File> tsFiles) {
- // measurementID -> [lastTime, endTimeInLastFile]
- Map<String, long[]> measurementLastTime = new HashMap<>();
- // deviceID -> endTime, the endTime of device in the last seq file
- Map<String, Long> deviceEndTime = new HashMap<>();
+ // measurementID -> <fileName, [lastTime, endTimeInLastFile]>
+ Map<String, Pair<String, long[]>> measurementLastTime = new HashMap<>();
+ // deviceID -> <fileName, endTime>, the endTime of device in the last seq file
+ Map<String, Pair<String, Long>> deviceEndTime = new HashMap<>();
+ // fileName -> isBadFile
+ Map<String, Boolean> isBadFileMap = new HashMap<>();
for (File tsFile : tsFiles) {
+ List<String> previousBadFileMsgs = new ArrayList<>();
try {
TsFileResource resource = new TsFileResource(tsFile);
if (!new File(tsFile.getAbsolutePath() + TsFileResource.RESOURCE_SUFFIX).exists()) {
@@ -181,7 +182,8 @@ public class TsFileValidationTool {
} else {
resource.deserialize();
}
- boolean isBadFile = false;
+ isBadFileMap.put(tsFile.getName(), false);
+
try (TsFileSequenceReader reader = new TsFileSequenceReader(tsFile.getAbsolutePath())) {
// deviceID -> has checked overlap or not
Map<String, Boolean> hasCheckedDeviceOverlap = new HashMap<>();
@@ -198,19 +200,15 @@ public class TsFileValidationTool {
case MetaMarker.CHUNK_HEADER:
case MetaMarker.ONLY_ONE_PAGE_CHUNK_HEADER:
ChunkHeader header = reader.readChunkHeader(marker);
- if (header.getDataSize() == 0) {
- // empty value chunk
- break;
- }
long currentChunkEndTime = Long.MIN_VALUE;
- String measurementID = deviceID + "." + header.getMeasurementID();
+ String measurementID = deviceID + PATH_SEPARATOR + header.getMeasurementID();
hasMeasurementPrintedDetails.computeIfAbsent(measurementID, k -> new boolean[4]);
measurementLastTime.computeIfAbsent(
measurementID,
k -> {
long[] arr = new long[2];
Arrays.fill(arr, Long.MIN_VALUE);
- return arr;
+ return new Pair<>("", arr);
});
Decoder defaultTimeDecoder =
Decoder.getDecoderByType(
@@ -236,22 +234,45 @@ public class TsFileValidationTool {
BatchData batchData = pageReader.getAllSatisfiedPageData();
while (batchData.hasCurrent()) {
long timestamp = batchData.currentTime();
- if (timestamp <= measurementLastTime.get(measurementID)[0]) {
+ if (timestamp <= measurementLastTime.get(measurementID).right[0]) {
// find bad file
- if (!isBadFile) {
+ if (timestamp <= measurementLastTime.get(measurementID).right[1]) {
+ // overlap between file, then add previous bad file path to list
+ if (!isBadFileMap.get(measurementLastTime.get(measurementID).left)) {
+ if (printDetails) {
+ previousBadFileMsgs.add(
+ "-- Find the bad file "
+ + tsFile.getParentFile().getAbsolutePath()
+ + File.separator
+ + measurementLastTime.get(measurementID).left
+ + ", overlap with later files.");
+ } else {
+ previousBadFileMsgs.add(
+ tsFile.getParentFile().getAbsolutePath()
+ + File.separator
+ + measurementLastTime.get(measurementID).left);
+ }
+ badFileNum++;
+ isBadFileMap.put(measurementLastTime.get(measurementID).left, true);
+ }
+ }
+ if (!isBadFileMap.get(tsFile.getName())) {
if (printDetails) {
printBoth("-- Find the bad file " + tsFile.getAbsolutePath());
} else {
printBoth(tsFile.getAbsolutePath());
}
- isBadFile = true;
+ isBadFileMap.put(tsFile.getName(), true);
badFileNum++;
}
if (printDetails) {
- if (timestamp <= measurementLastTime.get(measurementID)[1]) {
+ if (timestamp <= measurementLastTime.get(measurementID).right[1]) {
if (!hasMeasurementPrintedDetails.get(measurementID)[0]) {
printBoth(
- "-------- Timeseries " + measurementID + " overlap between files");
+ "-------- Timeseries "
+ + measurementID
+ + " overlap between files, with previous file "
+ + measurementLastTime.get(measurementID).left);
hasMeasurementPrintedDetails.get(measurementID)[0] = true;
}
} else if (timestamp
@@ -278,12 +299,13 @@ public class TsFileValidationTool {
}
}
} else {
- measurementLastTime.get(measurementID)[0] = timestamp;
+ measurementLastTime.get(measurementID).right[0] = timestamp;
currentPageEndTime = timestamp;
currentChunkEndTime = timestamp;
}
batchData.next();
}
+
dataSize -= pageHeader.getSerializedPageSize();
lastPageEndTime = Math.max(lastPageEndTime, currentPageEndTime);
}
@@ -297,27 +319,61 @@ public class TsFileValidationTool {
if (!deviceID.equals("")) {
// record the end time of last device in current file
if (resource.getEndTime(deviceID)
- > deviceEndTime.getOrDefault(deviceID, Long.MIN_VALUE)) {
- deviceEndTime.put(deviceID, resource.getEndTime(deviceID));
+ > deviceEndTime.computeIfAbsent(
+ deviceID,
+ k -> {
+ return new Pair<>("", Long.MIN_VALUE);
+ })
+ .right) {
+ deviceEndTime.get(deviceID).left = tsFile.getName();
+ deviceEndTime.get(deviceID).right = resource.getEndTime(deviceID);
}
}
ChunkGroupHeader chunkGroupHeader = reader.readChunkGroupHeader();
deviceID = chunkGroupHeader.getDeviceID();
if (!hasCheckedDeviceOverlap.getOrDefault(deviceID, false)
&& resource.getStartTime(deviceID)
- <= deviceEndTime.getOrDefault(deviceID, Long.MIN_VALUE)) {
+ <= deviceEndTime.computeIfAbsent(
+ deviceID,
+ k -> {
+ return new Pair<>("", Long.MIN_VALUE);
+ })
+ .right) {
// find bad file
- if (!isBadFile) {
+ // add prevous bad file msg to list
+ if (!isBadFileMap.get(deviceEndTime.get(deviceID).left)) {
+ if (printDetails) {
+ previousBadFileMsgs.add(
+ "-- Find the bad file "
+ + tsFile.getParentFile().getAbsolutePath()
+ + File.separator
+ + deviceEndTime.get(deviceID).left
+ + ", overlap with later files.");
+ } else {
+ previousBadFileMsgs.add(
+ tsFile.getParentFile().getAbsolutePath()
+ + File.separator
+ + deviceEndTime.get(deviceID).left);
+ }
+ isBadFileMap.put(deviceEndTime.get(deviceID).left, true);
+ badFileNum++;
+ }
+ // print current file
+ if (!isBadFileMap.get(tsFile.getName())) {
if (printDetails) {
printBoth("-- Find the bad file " + tsFile.getAbsolutePath());
} else {
printBoth(tsFile.getAbsolutePath());
}
- isBadFile = true;
+ isBadFileMap.put(tsFile.getName(), true);
badFileNum++;
}
if (printDetails) {
- printBoth("---- Device " + deviceID + " overlap between files");
+ printBoth(
+ "---- Device "
+ + deviceID
+ + " overlap between files, with previous file "
+ + deviceEndTime.get(deviceID).left);
}
}
hasCheckedDeviceOverlap.put(deviceID, true);
@@ -332,14 +388,23 @@ public class TsFileValidationTool {
// record the end time of each timeseries in current file
for (Map.Entry<String, Long> entry : lashChunkEndTime.entrySet()) {
- Long endTime = Math.max(measurementLastTime.get(entry.getKey())[1], entry.getValue());
- measurementLastTime.get(entry.getKey())[1] = endTime;
+ if (measurementLastTime.get(entry.getKey()).right[1] <= entry.getValue()) {
+ measurementLastTime.get(entry.getKey()).right[1] = entry.getValue();
+ measurementLastTime.get(entry.getKey()).left = tsFile.getName();
+ }
}
}
} catch (Throwable e) {
logger.error("Meet errors in reading file {} , skip it.", tsFile.getAbsolutePath(), e);
if (printDetails) {
- printBoth("-- Meet errors in reading file " + tsFile.getAbsolutePath());
+ printBoth(
+ "-- Meet errors in reading file "
+ + tsFile.getAbsolutePath()
+ + ", tsfile may be corrupted.");
+ }
+ } finally {
+ for (String msg : previousBadFileMsgs) {
+ printBoth(msg);
}
}
}