You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iotdb.apache.org by hx...@apache.org on 2020/02/14 08:37:07 UTC
[incubator-iotdb] 01/02: add a data folder check tool
This is an automated email from the ASF dual-hosted git repository.
hxd pushed a commit to branch tsfileTools
in repository https://gitbox.apache.org/repos/asf/incubator-iotdb.git
commit ce42d8b57db319c0468ad2bc61b25547a4d18e24
Author: xiangdong huang <sa...@gmail.com>
AuthorDate: Fri Feb 14 16:13:54 2020 +0800
add a data folder check tool
---
.../tools/tsfileToolSet/data-health-check.sh | 48 +++++
.../apache/iotdb/db/tools/DataHealthChecker.java | 207 +++++++++++++++++++++
.../iotdb/db/tools/TsFileResourcePrinter.java | 25 ++-
.../tsfile/fileSystem/fsFactory/FSFactory.java | 8 +
4 files changed, 283 insertions(+), 5 deletions(-)
diff --git a/server/src/assembly/resources/tools/tsfileToolSet/data-health-check.sh b/server/src/assembly/resources/tools/tsfileToolSet/data-health-check.sh
new file mode 100755
index 0000000..8944d35
--- /dev/null
+++ b/server/src/assembly/resources/tools/tsfileToolSet/data-health-check.sh
@@ -0,0 +1,48 @@
+#!/bin/sh
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+echo ---------------------
+echo Starting Printing the TsFile Sketch
+echo ---------------------
+
+if [ -z "${IOTDB_HOME}" ]; then
+ export IOTDB_HOME="$(cd "`dirname "$0"`"/../..; pwd)"
+fi
+
+if [ -n "$JAVA_HOME" ]; then
+ for java in "$JAVA_HOME"/bin/amd64/java "$JAVA_HOME"/bin/java; do
+ if [ -x "$java" ]; then
+ JAVA="$java"
+ break
+ fi
+ done
+else
+ JAVA=java
+fi
+
+CLASSPATH=""
+for f in ${IOTDB_HOME}/lib/*.jar; do
+ CLASSPATH=${CLASSPATH}":"$f
+done
+
+MAIN_CLASS=org.apache.iotdb.db.tools.DataHealthChecker
+
+"$JAVA" -cp "$CLASSPATH" "$MAIN_CLASS" "$@"
+exit $?
\ No newline at end of file
diff --git a/server/src/main/java/org/apache/iotdb/db/tools/DataHealthChecker.java b/server/src/main/java/org/apache/iotdb/db/tools/DataHealthChecker.java
new file mode 100644
index 0000000..7c918bf
--- /dev/null
+++ b/server/src/main/java/org/apache/iotdb/db/tools/DataHealthChecker.java
@@ -0,0 +1,207 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iotdb.db.tools;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+import org.apache.iotdb.db.engine.fileSystem.SystemFileFactory;
+import org.apache.iotdb.db.engine.storagegroup.TsFileResource;
+import org.apache.iotdb.tsfile.file.footer.ChunkGroupFooter;
+import org.apache.iotdb.tsfile.file.metadata.ChunkGroupMetaData;
+import org.apache.iotdb.tsfile.file.metadata.ChunkMetaData;
+import org.apache.iotdb.tsfile.file.metadata.TsDeviceMetadata;
+import org.apache.iotdb.tsfile.file.metadata.TsDeviceMetadataIndex;
+import org.apache.iotdb.tsfile.file.metadata.TsFileMetaData;
+import org.apache.iotdb.tsfile.fileSystem.FSFactoryProducer;
+import org.apache.iotdb.tsfile.read.TsFileCheckStatus;
+import org.apache.iotdb.tsfile.read.TsFileSequenceReader;
+import org.apache.iotdb.tsfile.read.common.Chunk;
+import org.apache.iotdb.tsfile.write.schema.MeasurementSchema;
+
+/**
+ * for checking whether the data folder has broken files
+ */
+public class DataHealthChecker {
+ public static void main(String[] args) {
+ String dataFolder = "data/data/";//the dataFoler has two subfolders: sequence and unsequence
+ if (args.length == 1) {
+ dataFolder = args[0];
+ }
+ System.out.println("data folder path:" + dataFolder);
+ File folder = FSFactoryProducer.getFSFactory().getFile(dataFolder);
+
+ if (!folder.isDirectory()) {
+ System.out.println("the input is not a data folder");
+ return;
+ }
+ File[] subFiles = folder.listFiles(File::isDirectory);
+ Arrays.sort(subFiles, (x, y) -> x.getName().compareTo(y.getName()));
+ if (subFiles.length > 2) {
+ System.out.println("[ERROR] there are some unknown folder under the data folder");
+ }
+ for (File seqOrUnSeq : subFiles) {
+ for(File sgFolder : seqOrUnSeq.listFiles(File::isDirectory)) {
+ for (File partitionFolder : sgFolder.listFiles(File::isDirectory)) {
+ //check the resource file first
+ for (File tsfile : partitionFolder.listFiles(x -> x.getName().endsWith(".tsfile"))) {
+ //check resource
+ TsFileResource tsResource = new TsFileResource(SystemFileFactory.INSTANCE.getFile(tsfile.getAbsolutePath()));
+ try {
+ tsResource.deSerialize();
+ if (tsResource.getStartTimeMap().isEmpty()) {
+ System.out.print(String.format("[ERROR] Resource file %s has no devices", tsfile.getAbsolutePath()+".resource"));
+ }
+ } catch (Exception e) {
+ System.out.print(String.format("[ERROR] Resource file %s is broken: %s", tsfile.getAbsolutePath()+".resource", e.getMessage()));
+ }
+ //check data
+ checkTsFile(tsfile);
+ }
+ for (File otherFile : partitionFolder.listFiles(x -> ! (x.getName().endsWith(".tsfile") || x.getName().endsWith(".tsfile.resource")))) {
+ System.out.print(String.format("[ERROR] Bad file %s ", otherFile.getAbsolutePath()));
+ }
+ }
+ }
+ }
+ }
+
+ private static void checkTsFile(File tsfile) {
+ if (tsfile.length() == 0) {
+ System.out.println(
+ String.format("[ERROR] TsFile %s is 0 byte, will remove it.", tsfile.getAbsolutePath()));
+ File temp = FSFactoryProducer.getFSFactory().getFile(tsfile.getAbsoluteFile() + ".resource");
+ tsfile.delete();
+ temp.delete();
+ return;
+ }
+ // get metadata information
+ //
+ TsFileSequenceReader reader = null;
+ try {
+ reader = new TsFileSequenceReader(tsfile.getAbsolutePath());
+ } catch (Exception e) {
+ System.out.println(String
+ .format("[ERROR] TsFile %s is broken: %s", tsfile.getAbsolutePath(), e.getMessage()));
+ return;
+ }
+
+ // this tsfile is complete
+ try {
+ if (!reader.isComplete()) {
+ // uncompleted file
+ List<ChunkGroupMetaData> chunkGroupMetaDataList = new ArrayList<>();
+ Map<String, MeasurementSchema> knownSchemas = new HashMap<>();
+ long truncatedPosition = reader.selfCheck(knownSchemas, chunkGroupMetaDataList, true);
+ if (truncatedPosition == TsFileCheckStatus.INCOMPATIBLE_FILE) {
+ System.out.println(String
+ .format("[ERROR] TsFile %s is not a TsFile format", tsfile.getAbsolutePath()));
+ } else {
+ System.out.println(String
+ .format("[INFO] TsFile %s is an incomplete file, last useful position: %d", tsfile.getAbsolutePath(), truncatedPosition));
+ }
+
+ return;
+ } else {
+ TsFileMetaData tsFileMetaData = null;
+ try {
+ tsFileMetaData = reader.readFileMetadata();
+ } catch (Exception e) {
+ System.out.println(String
+ .format("[ERROR] TsFile (tsFileMetaData) %s is broken : %s", tsfile.getAbsolutePath(),
+ e.getMessage()));
+ return;
+ }
+ List<TsDeviceMetadataIndex> tsDeviceMetadataIndexSortedList = tsFileMetaData.getDeviceMap()
+ .values()
+ .stream()
+ .sorted((x, y) -> (int) (x.getOffset() - y.getOffset())).collect(Collectors.toList());
+ if (tsDeviceMetadataIndexSortedList.isEmpty()) {
+ System.out.println(String
+ .format("[ERROR] TsFile %s has no device, please check.", tsfile.getAbsolutePath()));
+ return;
+ }
+ List<ChunkGroupMetaData> chunkGroupMetaDataTmpList = new ArrayList<>();
+ List<TsDeviceMetadata> tsDeviceMetadataSortedList = new ArrayList<>();
+ for (TsDeviceMetadataIndex index : tsDeviceMetadataIndexSortedList) {
+ TsDeviceMetadata deviceMetadata = null;
+ try {
+ deviceMetadata = reader.readTsDeviceMetaData(index);
+ } catch (Exception e) {
+ System.out.println(String
+ .format("[ERROR] TsFile (tsDeviceMetadata) %s is broken : %s", tsfile.getAbsolutePath(),
+ e.getMessage()));
+ return;
+ }
+ tsDeviceMetadataSortedList.add(deviceMetadata);
+ chunkGroupMetaDataTmpList.addAll(deviceMetadata.getChunkGroupMetaDataList());
+ }
+ List<ChunkGroupMetaData> chunkGroupMetaDataSortedList = chunkGroupMetaDataTmpList.stream()
+ .sorted(Comparator.comparingLong(ChunkGroupMetaData::getStartOffsetOfChunkGroup))
+ .collect(Collectors.toList());
+ if (chunkGroupMetaDataSortedList.isEmpty()) {
+ System.out.println(String
+ .format("[ERROR] TsFile %s has no chunkGroups, please check.", tsfile.getAbsolutePath()));
+ return;
+ }
+
+ // chunkGroup begins
+ for (ChunkGroupMetaData chunkGroupMetaData : chunkGroupMetaDataSortedList) {
+ // chunk begins
+ long chunkEndPos = 0;
+ for (ChunkMetaData chunkMetaData : chunkGroupMetaData.getChunkMetaDataList()) {
+ try {
+ Chunk chunk = reader.readMemChunk(chunkMetaData);
+ chunkEndPos =
+ chunkMetaData.getOffsetOfChunkHeader() + chunk.getHeader().getSerializedSize() + chunk
+ .getHeader().getDataSize();
+ } catch (Exception e) {
+ System.out.println(String
+ .format("[ERROR] TsFile (chunk) %s is broken : %s", tsfile.getAbsolutePath(),
+ e.getMessage()));
+ return;
+ }
+ }
+ // chunkGroupFooter begins
+ try {
+ ChunkGroupFooter chunkGroupFooter = reader.readChunkGroupFooter(chunkEndPos, false);
+ } catch (Exception e) {
+ System.out.println(String
+ .format("[ERROR] TsFile (chunkGroupFooter) %s is broken : %s", tsfile.getAbsolutePath(),
+ e.getMessage()));
+ return;
+ }
+ }
+ try {
+ reader.close();
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ return;
+ }
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+}
diff --git a/server/src/main/java/org/apache/iotdb/db/tools/TsFileResourcePrinter.java b/server/src/main/java/org/apache/iotdb/db/tools/TsFileResourcePrinter.java
index d52a220..db193ac 100644
--- a/server/src/main/java/org/apache/iotdb/db/tools/TsFileResourcePrinter.java
+++ b/server/src/main/java/org/apache/iotdb/db/tools/TsFileResourcePrinter.java
@@ -41,13 +41,25 @@ public class TsFileResourcePrinter {
folder = args[0];
}
File folderFile = SystemFileFactory.INSTANCE.getFile(folder);
- File[] files = FSFactoryProducer.getFSFactory().listFilesBySuffix(folderFile.getAbsolutePath(), ".tsfile.resource");
- Arrays.sort(files, Comparator.comparingLong(x -> Long.valueOf(x.getName().split("-")[0])));
+ recursiveFolders(folderFile);
+ System.out.println("analyzing the resource file finished.");
+ }
- for (File file : files) {
- printResource(file.getAbsolutePath());
+ private static void recursiveFolders(File folderFile) throws IOException {
+ File[] files = FSFactoryProducer.getFSFactory().listFilesBySuffix(folderFile.getAbsolutePath(), ".tsfile.resource");
+ //in IoTDB, all tsfiles have the same depth in the file directories. So, either a folder has tsfiles, or has folders only.
+ if (files.length == 0) {
+ //no such files, then try to find all directories.
+ File[] folders = FSFactoryProducer.getFSFactory().listSubFolders(folderFile);
+ for (File folder : folders) {
+ recursiveFolders(folder);
+ }
+ } else {
+ Arrays.sort(files, Comparator.comparingLong(x -> Long.valueOf(x.getName().split("-")[0])));
+ for (File file : files) {
+ printResource(file.getAbsolutePath());
+ }
}
- System.out.println("analyzing the resource file finished.");
}
public static void printResource(String filename) throws IOException {
@@ -69,5 +81,8 @@ public class TsFileResourcePrinter {
resource.getEndTimeMap().get(device),
DatetimeUtils.convertMillsecondToZonedDateTime(resource.getEndTimeMap().get(device))));
}
+ if (resource.getStartTimeMap().isEmpty()) {
+ System.err.println(String.format("%s file has no devices, please check the file", filename));
+ }
}
}
diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/fileSystem/fsFactory/FSFactory.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/fileSystem/fsFactory/FSFactory.java
index 0597cc1..7f18c82 100644
--- a/tsfile/src/main/java/org/apache/iotdb/tsfile/fileSystem/fsFactory/FSFactory.java
+++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/fileSystem/fsFactory/FSFactory.java
@@ -116,4 +116,12 @@ public interface FSFactory {
* @return list of files
*/
File[] listFilesByPrefix(String fileFolder, String prefix);
+
+ /**
+ * list all its sub folders (non-recursive)
+ * @return
+ */
+ default File[] listSubFolders(File file) {
+ return file.listFiles(File::isDirectory);
+ }
}
\ No newline at end of file