You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iotdb.apache.org by hx...@apache.org on 2020/02/14 08:37:07 UTC

[incubator-iotdb] 01/02: add a data folder check tool

This is an automated email from the ASF dual-hosted git repository.

hxd pushed a commit to branch tsfileTools
in repository https://gitbox.apache.org/repos/asf/incubator-iotdb.git

commit ce42d8b57db319c0468ad2bc61b25547a4d18e24
Author: xiangdong huang <sa...@gmail.com>
AuthorDate: Fri Feb 14 16:13:54 2020 +0800

    add a data folder check tool
---
 .../tools/tsfileToolSet/data-health-check.sh       |  48 +++++
 .../apache/iotdb/db/tools/DataHealthChecker.java   | 207 +++++++++++++++++++++
 .../iotdb/db/tools/TsFileResourcePrinter.java      |  25 ++-
 .../tsfile/fileSystem/fsFactory/FSFactory.java     |   8 +
 4 files changed, 283 insertions(+), 5 deletions(-)

diff --git a/server/src/assembly/resources/tools/tsfileToolSet/data-health-check.sh b/server/src/assembly/resources/tools/tsfileToolSet/data-health-check.sh
new file mode 100755
index 0000000..8944d35
--- /dev/null
+++ b/server/src/assembly/resources/tools/tsfileToolSet/data-health-check.sh
@@ -0,0 +1,48 @@
+#!/bin/sh
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+echo ---------------------
+echo Starting Printing the TsFile Sketch
+echo ---------------------
+
+if [ -z "${IOTDB_HOME}" ]; then
+  export IOTDB_HOME="$(cd "`dirname "$0"`"/../..; pwd)"
+fi
+
+if [ -n "$JAVA_HOME" ]; then
+    for java in "$JAVA_HOME"/bin/amd64/java "$JAVA_HOME"/bin/java; do
+        if [ -x "$java" ]; then
+            JAVA="$java"
+            break
+        fi
+    done
+else
+    JAVA=java
+fi
+
+CLASSPATH=""
+for f in ${IOTDB_HOME}/lib/*.jar; do
+  CLASSPATH=${CLASSPATH}":"$f
+done
+
+MAIN_CLASS=org.apache.iotdb.db.tools.DataHealthChecker
+
+"$JAVA" -cp "$CLASSPATH" "$MAIN_CLASS" "$@"
+exit $?
\ No newline at end of file
diff --git a/server/src/main/java/org/apache/iotdb/db/tools/DataHealthChecker.java b/server/src/main/java/org/apache/iotdb/db/tools/DataHealthChecker.java
new file mode 100644
index 0000000..7c918bf
--- /dev/null
+++ b/server/src/main/java/org/apache/iotdb/db/tools/DataHealthChecker.java
@@ -0,0 +1,207 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iotdb.db.tools;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+import org.apache.iotdb.db.engine.fileSystem.SystemFileFactory;
+import org.apache.iotdb.db.engine.storagegroup.TsFileResource;
+import org.apache.iotdb.tsfile.file.footer.ChunkGroupFooter;
+import org.apache.iotdb.tsfile.file.metadata.ChunkGroupMetaData;
+import org.apache.iotdb.tsfile.file.metadata.ChunkMetaData;
+import org.apache.iotdb.tsfile.file.metadata.TsDeviceMetadata;
+import org.apache.iotdb.tsfile.file.metadata.TsDeviceMetadataIndex;
+import org.apache.iotdb.tsfile.file.metadata.TsFileMetaData;
+import org.apache.iotdb.tsfile.fileSystem.FSFactoryProducer;
+import org.apache.iotdb.tsfile.read.TsFileCheckStatus;
+import org.apache.iotdb.tsfile.read.TsFileSequenceReader;
+import org.apache.iotdb.tsfile.read.common.Chunk;
+import org.apache.iotdb.tsfile.write.schema.MeasurementSchema;
+
+/**
+ * for checking whether the data folder has broken files
+ */
+public class DataHealthChecker {
+  public static void main(String[] args) {
+    String dataFolder = "data/data/";//the dataFoler has two subfolders: sequence and unsequence
+    if (args.length == 1) {
+      dataFolder = args[0];
+    }
+    System.out.println("data folder path:" + dataFolder);
+    File folder = FSFactoryProducer.getFSFactory().getFile(dataFolder);
+
+    if (!folder.isDirectory()) {
+      System.out.println("the input is not a data folder");
+      return;
+    }
+    File[] subFiles = folder.listFiles(File::isDirectory);
+    Arrays.sort(subFiles, (x, y) -> x.getName().compareTo(y.getName()));
+    if (subFiles.length > 2) {
+      System.out.println("[ERROR] there are some unknown folder under the data folder");
+    }
+    for (File seqOrUnSeq : subFiles) {
+      for(File sgFolder : seqOrUnSeq.listFiles(File::isDirectory)) {
+        for (File partitionFolder : sgFolder.listFiles(File::isDirectory)) {
+          //check the resource file first
+          for (File tsfile : partitionFolder.listFiles(x -> x.getName().endsWith(".tsfile"))) {
+            //check resource
+            TsFileResource tsResource = new TsFileResource(SystemFileFactory.INSTANCE.getFile(tsfile.getAbsolutePath()));
+            try {
+              tsResource.deSerialize();
+              if (tsResource.getStartTimeMap().isEmpty()) {
+                System.out.print(String.format("[ERROR] Resource file %s has no devices", tsfile.getAbsolutePath()+".resource"));
+              }
+            } catch (Exception e) {
+              System.out.print(String.format("[ERROR] Resource file %s is broken: %s", tsfile.getAbsolutePath()+".resource", e.getMessage()));
+            }
+            //check data
+            checkTsFile(tsfile);
+          }
+          for (File otherFile : partitionFolder.listFiles(x -> ! (x.getName().endsWith(".tsfile") || x.getName().endsWith(".tsfile.resource")))) {
+            System.out.print(String.format("[ERROR] Bad file %s ", otherFile.getAbsolutePath()));
+          }
+        }
+      }
+    }
+  }
+
+  private static void checkTsFile(File tsfile) {
+    if (tsfile.length() == 0) {
+      System.out.println(
+          String.format("[ERROR] TsFile %s is 0 byte, will remove it.", tsfile.getAbsolutePath()));
+      File temp = FSFactoryProducer.getFSFactory().getFile(tsfile.getAbsoluteFile() + ".resource");
+      tsfile.delete();
+      temp.delete();
+      return;
+    }
+    // get metadata information
+    //
+    TsFileSequenceReader reader = null;
+    try {
+      reader = new TsFileSequenceReader(tsfile.getAbsolutePath());
+    } catch (Exception e) {
+      System.out.println(String
+          .format("[ERROR] TsFile %s is broken: %s", tsfile.getAbsolutePath(), e.getMessage()));
+      return;
+    }
+
+    // this tsfile is complete
+    try {
+      if (!reader.isComplete()) {
+        // uncompleted file
+        List<ChunkGroupMetaData> chunkGroupMetaDataList = new ArrayList<>();
+        Map<String, MeasurementSchema> knownSchemas = new HashMap<>();
+        long truncatedPosition = reader.selfCheck(knownSchemas, chunkGroupMetaDataList, true);
+        if (truncatedPosition == TsFileCheckStatus.INCOMPATIBLE_FILE) {
+          System.out.println(String
+              .format("[ERROR] TsFile %s is not a TsFile format", tsfile.getAbsolutePath()));
+        } else {
+          System.out.println(String
+              .format("[INFO] TsFile %s is an incomplete file, last useful position: %d", tsfile.getAbsolutePath(), truncatedPosition));
+        }
+
+        return;
+      } else {
+        TsFileMetaData tsFileMetaData = null;
+        try {
+          tsFileMetaData = reader.readFileMetadata();
+        } catch (Exception e) {
+          System.out.println(String
+              .format("[ERROR] TsFile (tsFileMetaData) %s is broken : %s", tsfile.getAbsolutePath(),
+                  e.getMessage()));
+          return;
+        }
+        List<TsDeviceMetadataIndex> tsDeviceMetadataIndexSortedList = tsFileMetaData.getDeviceMap()
+            .values()
+            .stream()
+            .sorted((x, y) -> (int) (x.getOffset() - y.getOffset())).collect(Collectors.toList());
+        if (tsDeviceMetadataIndexSortedList.isEmpty()) {
+          System.out.println(String
+              .format("[ERROR] TsFile %s has no device, please check.", tsfile.getAbsolutePath()));
+          return;
+        }
+        List<ChunkGroupMetaData> chunkGroupMetaDataTmpList = new ArrayList<>();
+        List<TsDeviceMetadata> tsDeviceMetadataSortedList = new ArrayList<>();
+        for (TsDeviceMetadataIndex index : tsDeviceMetadataIndexSortedList) {
+          TsDeviceMetadata deviceMetadata = null;
+          try {
+            deviceMetadata = reader.readTsDeviceMetaData(index);
+          } catch (Exception e) {
+            System.out.println(String
+                .format("[ERROR] TsFile (tsDeviceMetadata) %s is broken : %s", tsfile.getAbsolutePath(),
+                    e.getMessage()));
+            return;
+          }
+          tsDeviceMetadataSortedList.add(deviceMetadata);
+          chunkGroupMetaDataTmpList.addAll(deviceMetadata.getChunkGroupMetaDataList());
+        }
+        List<ChunkGroupMetaData> chunkGroupMetaDataSortedList = chunkGroupMetaDataTmpList.stream()
+            .sorted(Comparator.comparingLong(ChunkGroupMetaData::getStartOffsetOfChunkGroup))
+            .collect(Collectors.toList());
+        if (chunkGroupMetaDataSortedList.isEmpty()) {
+          System.out.println(String
+              .format("[ERROR] TsFile %s has no chunkGroups, please check.", tsfile.getAbsolutePath()));
+          return;
+        }
+
+        // chunkGroup begins
+        for (ChunkGroupMetaData chunkGroupMetaData : chunkGroupMetaDataSortedList) {
+          // chunk begins
+          long chunkEndPos = 0;
+          for (ChunkMetaData chunkMetaData : chunkGroupMetaData.getChunkMetaDataList()) {
+            try {
+              Chunk chunk = reader.readMemChunk(chunkMetaData);
+              chunkEndPos =
+                  chunkMetaData.getOffsetOfChunkHeader() + chunk.getHeader().getSerializedSize() + chunk
+                      .getHeader().getDataSize();
+            } catch (Exception e) {
+              System.out.println(String
+                  .format("[ERROR] TsFile (chunk) %s is broken : %s", tsfile.getAbsolutePath(),
+                      e.getMessage()));
+              return;
+            }
+          }
+          // chunkGroupFooter begins
+          try {
+            ChunkGroupFooter chunkGroupFooter = reader.readChunkGroupFooter(chunkEndPos, false);
+          } catch (Exception e) {
+            System.out.println(String
+                .format("[ERROR] TsFile (chunkGroupFooter) %s is broken : %s", tsfile.getAbsolutePath(),
+                    e.getMessage()));
+            return;
+          }
+        }
+        try {
+          reader.close();
+        } catch (Exception e) {
+          e.printStackTrace();
+        }
+        return;
+      }
+    } catch (Exception e) {
+      e.printStackTrace();
+    }
+  }
+}
diff --git a/server/src/main/java/org/apache/iotdb/db/tools/TsFileResourcePrinter.java b/server/src/main/java/org/apache/iotdb/db/tools/TsFileResourcePrinter.java
index d52a220..db193ac 100644
--- a/server/src/main/java/org/apache/iotdb/db/tools/TsFileResourcePrinter.java
+++ b/server/src/main/java/org/apache/iotdb/db/tools/TsFileResourcePrinter.java
@@ -41,13 +41,25 @@ public class TsFileResourcePrinter {
       folder = args[0];
     }
     File folderFile = SystemFileFactory.INSTANCE.getFile(folder);
-    File[] files = FSFactoryProducer.getFSFactory().listFilesBySuffix(folderFile.getAbsolutePath(), ".tsfile.resource");
-    Arrays.sort(files, Comparator.comparingLong(x -> Long.valueOf(x.getName().split("-")[0])));
+    recursiveFolders(folderFile);
+    System.out.println("analyzing the resource file finished.");
+  }
 
-    for (File file : files) {
-      printResource(file.getAbsolutePath());
+  private static void recursiveFolders(File folderFile) throws IOException {
+    File[] files = FSFactoryProducer.getFSFactory().listFilesBySuffix(folderFile.getAbsolutePath(), ".tsfile.resource");
+    //in IoTDB, all tsfiles have the same depth in the file directories. So, either a folder has tsfiles, or has folders only.
+    if (files.length == 0) {
+      //no such files, then try to find all directories.
+      File[] folders = FSFactoryProducer.getFSFactory().listSubFolders(folderFile);
+      for (File folder : folders) {
+        recursiveFolders(folder);
+      }
+    } else {
+      Arrays.sort(files, Comparator.comparingLong(x -> Long.valueOf(x.getName().split("-")[0])));
+      for (File file : files) {
+        printResource(file.getAbsolutePath());
+      }
     }
-    System.out.println("analyzing the resource file finished.");
   }
 
   public static void printResource(String filename) throws IOException {
@@ -69,5 +81,8 @@ public class TsFileResourcePrinter {
           resource.getEndTimeMap().get(device),
           DatetimeUtils.convertMillsecondToZonedDateTime(resource.getEndTimeMap().get(device))));
     }
+    if (resource.getStartTimeMap().isEmpty()) {
+      System.err.println(String.format("%s file has no devices, please check the file", filename));
+    }
   }
 }
diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/fileSystem/fsFactory/FSFactory.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/fileSystem/fsFactory/FSFactory.java
index 0597cc1..7f18c82 100644
--- a/tsfile/src/main/java/org/apache/iotdb/tsfile/fileSystem/fsFactory/FSFactory.java
+++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/fileSystem/fsFactory/FSFactory.java
@@ -116,4 +116,12 @@ public interface FSFactory {
    * @return list of files
    */
   File[] listFilesByPrefix(String fileFolder, String prefix);
+
+  /**
+   * list all its sub folders (non-recursive)
+   * @return
+   */
+  default File[] listSubFolders(File file) {
+    return file.listFiles(File::isDirectory);
+  }
 }
\ No newline at end of file