You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iotdb.apache.org by hx...@apache.org on 2020/02/14 08:37:06 UTC

[incubator-iotdb] branch tsfileTools created (now 60eeb83)

This is an automated email from the ASF dual-hosted git repository.

hxd pushed a change to branch tsfileTools
in repository https://gitbox.apache.org/repos/asf/incubator-iotdb.git.


      at 60eeb83  fix bugs of datahealthChecker

This branch includes the following new commits:

     new ce42d8b  add a data folder check tool
     new 60eeb83  fix bugs of datahealthChecker

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[incubator-iotdb] 02/02: fix bugs of datahealthChecker

Posted by hx...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

hxd pushed a commit to branch tsfileTools
in repository https://gitbox.apache.org/repos/asf/incubator-iotdb.git

commit 60eeb83a61045850f5779c1383bb034f0b08e49c
Author: xiangdong huang <sa...@gmail.com>
AuthorDate: Fri Feb 14 16:36:41 2020 +0800

    fix bugs of datahealthChecker
---
 .../apache/iotdb/db/tools/DataHealthChecker.java   | 68 +++++++++++++++++++---
 1 file changed, 60 insertions(+), 8 deletions(-)

diff --git a/server/src/main/java/org/apache/iotdb/db/tools/DataHealthChecker.java b/server/src/main/java/org/apache/iotdb/db/tools/DataHealthChecker.java
index 7c918bf..8231f94 100644
--- a/server/src/main/java/org/apache/iotdb/db/tools/DataHealthChecker.java
+++ b/server/src/main/java/org/apache/iotdb/db/tools/DataHealthChecker.java
@@ -19,6 +19,8 @@
 package org.apache.iotdb.db.tools;
 
 import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Comparator;
@@ -46,8 +48,12 @@ import org.apache.iotdb.tsfile.write.schema.MeasurementSchema;
 public class DataHealthChecker {
   public static void main(String[] args) {
     String dataFolder = "data/data/";//the dataFoler has two subfolders: sequence and unsequence
+    String progressFile = "progress.log";
     if (args.length == 1) {
       dataFolder = args[0];
+    } if (args.length == 2) {
+      dataFolder = args[0];
+      progressFile = args[1];
     }
     System.out.println("data folder path:" + dataFolder);
     File folder = FSFactoryProducer.getFSFactory().getFile(dataFolder);
@@ -56,35 +62,81 @@ public class DataHealthChecker {
       System.out.println("the input is not a data folder");
       return;
     }
+
     File[] subFiles = folder.listFiles(File::isDirectory);
     Arrays.sort(subFiles, (x, y) -> x.getName().compareTo(y.getName()));
     if (subFiles.length > 2) {
       System.out.println("[ERROR] there are some unknown folder under the data folder");
     }
+
+    int total = 0;
+    for (File seqOrUnSeq : subFiles) {
+      for (File sgFolder : seqOrUnSeq.listFiles(File::isDirectory)) {
+        for (File partitionFolder : sgFolder.listFiles(File::isDirectory)) {
+          //check the resource file first
+          total += partitionFolder.listFiles(x -> x.getName().endsWith(".tsfile")).length;
+        }
+      }
+    }
+
+    FileWriter writer = null;
+    try {
+      writer = new FileWriter(progressFile);
+      System.out.println("The progress is recorded in " + new File(progressFile).getAbsolutePath());
+    } catch (IOException e) {
+      e.printStackTrace();
+      return;
+    }
+
+    int current =0;
     for (File seqOrUnSeq : subFiles) {
       for(File sgFolder : seqOrUnSeq.listFiles(File::isDirectory)) {
         for (File partitionFolder : sgFolder.listFiles(File::isDirectory)) {
           //check the resource file first
           for (File tsfile : partitionFolder.listFiles(x -> x.getName().endsWith(".tsfile"))) {
-            //check resource
-            TsFileResource tsResource = new TsFileResource(SystemFileFactory.INSTANCE.getFile(tsfile.getAbsolutePath()));
             try {
-              tsResource.deSerialize();
-              if (tsResource.getStartTimeMap().isEmpty()) {
-                System.out.print(String.format("[ERROR] Resource file %s has no devices", tsfile.getAbsolutePath()+".resource"));
+              writer.write(String.format("%d/%d\n", ++current, total));
+              writer.flush();
+            } catch (IOException e) {
+              e.printStackTrace();
+              try {
+                writer.close();
+              } catch (IOException ex) {
               }
-            } catch (Exception e) {
-              System.out.print(String.format("[ERROR] Resource file %s is broken: %s", tsfile.getAbsolutePath()+".resource", e.getMessage()));
+              return;
+            }
+            //check resource
+            if (SystemFileFactory.INSTANCE.getFile(tsfile.getAbsolutePath()+".resource").exists()) {
+              TsFileResource tsResource = new TsFileResource(
+                  SystemFileFactory.INSTANCE.getFile(tsfile.getAbsolutePath()));
+              try {
+                tsResource.deSerialize();
+                if (tsResource.getStartTimeMap().isEmpty()) {
+                  System.out.print(String.format("[ERROR] Resource file %s has no devices",
+                      tsfile.getAbsolutePath() + ".resource"));
+                }
+              } catch (Exception e) {
+                System.out.print(String.format("[ERROR] Resource file %s is broken: %s",
+                    tsfile.getAbsolutePath() + ".resource", e.getMessage()));
+              }
+            } else {
+              System.out.print(String.format("[ERROR] TsFile %s has no resource file",
+                  tsfile.getAbsolutePath() ));
             }
             //check data
             checkTsFile(tsfile);
           }
           for (File otherFile : partitionFolder.listFiles(x -> ! (x.getName().endsWith(".tsfile") || x.getName().endsWith(".tsfile.resource")))) {
-            System.out.print(String.format("[ERROR] Bad file %s ", otherFile.getAbsolutePath()));
+            System.out.println(String.format("[ERROR] Bad file %s ", otherFile.getAbsolutePath()));
           }
         }
       }
     }
+    try {
+      writer.close();
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
   }
 
   private static void checkTsFile(File tsfile) {


[incubator-iotdb] 01/02: add a data folder check tool

Posted by hx...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

hxd pushed a commit to branch tsfileTools
in repository https://gitbox.apache.org/repos/asf/incubator-iotdb.git

commit ce42d8b57db319c0468ad2bc61b25547a4d18e24
Author: xiangdong huang <sa...@gmail.com>
AuthorDate: Fri Feb 14 16:13:54 2020 +0800

    add a data folder check tool
---
 .../tools/tsfileToolSet/data-health-check.sh       |  48 +++++
 .../apache/iotdb/db/tools/DataHealthChecker.java   | 207 +++++++++++++++++++++
 .../iotdb/db/tools/TsFileResourcePrinter.java      |  25 ++-
 .../tsfile/fileSystem/fsFactory/FSFactory.java     |   8 +
 4 files changed, 283 insertions(+), 5 deletions(-)

diff --git a/server/src/assembly/resources/tools/tsfileToolSet/data-health-check.sh b/server/src/assembly/resources/tools/tsfileToolSet/data-health-check.sh
new file mode 100755
index 0000000..8944d35
--- /dev/null
+++ b/server/src/assembly/resources/tools/tsfileToolSet/data-health-check.sh
@@ -0,0 +1,48 @@
+#!/bin/sh
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+echo ---------------------
+echo Starting Printing the TsFile Sketch
+echo ---------------------
+
+if [ -z "${IOTDB_HOME}" ]; then
+  export IOTDB_HOME="$(cd "`dirname "$0"`"/../..; pwd)"
+fi
+
+if [ -n "$JAVA_HOME" ]; then
+    for java in "$JAVA_HOME"/bin/amd64/java "$JAVA_HOME"/bin/java; do
+        if [ -x "$java" ]; then
+            JAVA="$java"
+            break
+        fi
+    done
+else
+    JAVA=java
+fi
+
+CLASSPATH=""
+for f in ${IOTDB_HOME}/lib/*.jar; do
+  CLASSPATH=${CLASSPATH}":"$f
+done
+
+MAIN_CLASS=org.apache.iotdb.db.tools.DataHealthChecker
+
+"$JAVA" -cp "$CLASSPATH" "$MAIN_CLASS" "$@"
+exit $?
\ No newline at end of file
diff --git a/server/src/main/java/org/apache/iotdb/db/tools/DataHealthChecker.java b/server/src/main/java/org/apache/iotdb/db/tools/DataHealthChecker.java
new file mode 100644
index 0000000..7c918bf
--- /dev/null
+++ b/server/src/main/java/org/apache/iotdb/db/tools/DataHealthChecker.java
@@ -0,0 +1,207 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iotdb.db.tools;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+import org.apache.iotdb.db.engine.fileSystem.SystemFileFactory;
+import org.apache.iotdb.db.engine.storagegroup.TsFileResource;
+import org.apache.iotdb.tsfile.file.footer.ChunkGroupFooter;
+import org.apache.iotdb.tsfile.file.metadata.ChunkGroupMetaData;
+import org.apache.iotdb.tsfile.file.metadata.ChunkMetaData;
+import org.apache.iotdb.tsfile.file.metadata.TsDeviceMetadata;
+import org.apache.iotdb.tsfile.file.metadata.TsDeviceMetadataIndex;
+import org.apache.iotdb.tsfile.file.metadata.TsFileMetaData;
+import org.apache.iotdb.tsfile.fileSystem.FSFactoryProducer;
+import org.apache.iotdb.tsfile.read.TsFileCheckStatus;
+import org.apache.iotdb.tsfile.read.TsFileSequenceReader;
+import org.apache.iotdb.tsfile.read.common.Chunk;
+import org.apache.iotdb.tsfile.write.schema.MeasurementSchema;
+
+/**
+ * for checking whether the data folder has broken files
+ */
+public class DataHealthChecker {
+  public static void main(String[] args) {
+    String dataFolder = "data/data/";//the dataFoler has two subfolders: sequence and unsequence
+    if (args.length == 1) {
+      dataFolder = args[0];
+    }
+    System.out.println("data folder path:" + dataFolder);
+    File folder = FSFactoryProducer.getFSFactory().getFile(dataFolder);
+
+    if (!folder.isDirectory()) {
+      System.out.println("the input is not a data folder");
+      return;
+    }
+    File[] subFiles = folder.listFiles(File::isDirectory);
+    Arrays.sort(subFiles, (x, y) -> x.getName().compareTo(y.getName()));
+    if (subFiles.length > 2) {
+      System.out.println("[ERROR] there are some unknown folder under the data folder");
+    }
+    for (File seqOrUnSeq : subFiles) {
+      for(File sgFolder : seqOrUnSeq.listFiles(File::isDirectory)) {
+        for (File partitionFolder : sgFolder.listFiles(File::isDirectory)) {
+          //check the resource file first
+          for (File tsfile : partitionFolder.listFiles(x -> x.getName().endsWith(".tsfile"))) {
+            //check resource
+            TsFileResource tsResource = new TsFileResource(SystemFileFactory.INSTANCE.getFile(tsfile.getAbsolutePath()));
+            try {
+              tsResource.deSerialize();
+              if (tsResource.getStartTimeMap().isEmpty()) {
+                System.out.print(String.format("[ERROR] Resource file %s has no devices", tsfile.getAbsolutePath()+".resource"));
+              }
+            } catch (Exception e) {
+              System.out.print(String.format("[ERROR] Resource file %s is broken: %s", tsfile.getAbsolutePath()+".resource", e.getMessage()));
+            }
+            //check data
+            checkTsFile(tsfile);
+          }
+          for (File otherFile : partitionFolder.listFiles(x -> ! (x.getName().endsWith(".tsfile") || x.getName().endsWith(".tsfile.resource")))) {
+            System.out.print(String.format("[ERROR] Bad file %s ", otherFile.getAbsolutePath()));
+          }
+        }
+      }
+    }
+  }
+
+  private static void checkTsFile(File tsfile) {
+    if (tsfile.length() == 0) {
+      System.out.println(
+          String.format("[ERROR] TsFile %s is 0 byte, will remove it.", tsfile.getAbsolutePath()));
+      File temp = FSFactoryProducer.getFSFactory().getFile(tsfile.getAbsoluteFile() + ".resource");
+      tsfile.delete();
+      temp.delete();
+      return;
+    }
+    // get metadata information
+    //
+    TsFileSequenceReader reader = null;
+    try {
+      reader = new TsFileSequenceReader(tsfile.getAbsolutePath());
+    } catch (Exception e) {
+      System.out.println(String
+          .format("[ERROR] TsFile %s is broken: %s", tsfile.getAbsolutePath(), e.getMessage()));
+      return;
+    }
+
+    // this tsfile is complete
+    try {
+      if (!reader.isComplete()) {
+        // uncompleted file
+        List<ChunkGroupMetaData> chunkGroupMetaDataList = new ArrayList<>();
+        Map<String, MeasurementSchema> knownSchemas = new HashMap<>();
+        long truncatedPosition = reader.selfCheck(knownSchemas, chunkGroupMetaDataList, true);
+        if (truncatedPosition == TsFileCheckStatus.INCOMPATIBLE_FILE) {
+          System.out.println(String
+              .format("[ERROR] TsFile %s is not a TsFile format", tsfile.getAbsolutePath()));
+        } else {
+          System.out.println(String
+              .format("[INFO] TsFile %s is an incomplete file, last useful position: %d", tsfile.getAbsolutePath(), truncatedPosition));
+        }
+
+        return;
+      } else {
+        TsFileMetaData tsFileMetaData = null;
+        try {
+          tsFileMetaData = reader.readFileMetadata();
+        } catch (Exception e) {
+          System.out.println(String
+              .format("[ERROR] TsFile (tsFileMetaData) %s is broken : %s", tsfile.getAbsolutePath(),
+                  e.getMessage()));
+          return;
+        }
+        List<TsDeviceMetadataIndex> tsDeviceMetadataIndexSortedList = tsFileMetaData.getDeviceMap()
+            .values()
+            .stream()
+            .sorted((x, y) -> (int) (x.getOffset() - y.getOffset())).collect(Collectors.toList());
+        if (tsDeviceMetadataIndexSortedList.isEmpty()) {
+          System.out.println(String
+              .format("[ERROR] TsFile %s has no device, please check.", tsfile.getAbsolutePath()));
+          return;
+        }
+        List<ChunkGroupMetaData> chunkGroupMetaDataTmpList = new ArrayList<>();
+        List<TsDeviceMetadata> tsDeviceMetadataSortedList = new ArrayList<>();
+        for (TsDeviceMetadataIndex index : tsDeviceMetadataIndexSortedList) {
+          TsDeviceMetadata deviceMetadata = null;
+          try {
+            deviceMetadata = reader.readTsDeviceMetaData(index);
+          } catch (Exception e) {
+            System.out.println(String
+                .format("[ERROR] TsFile (tsDeviceMetadata) %s is broken : %s", tsfile.getAbsolutePath(),
+                    e.getMessage()));
+            return;
+          }
+          tsDeviceMetadataSortedList.add(deviceMetadata);
+          chunkGroupMetaDataTmpList.addAll(deviceMetadata.getChunkGroupMetaDataList());
+        }
+        List<ChunkGroupMetaData> chunkGroupMetaDataSortedList = chunkGroupMetaDataTmpList.stream()
+            .sorted(Comparator.comparingLong(ChunkGroupMetaData::getStartOffsetOfChunkGroup))
+            .collect(Collectors.toList());
+        if (chunkGroupMetaDataSortedList.isEmpty()) {
+          System.out.println(String
+              .format("[ERROR] TsFile %s has no chunkGroups, please check.", tsfile.getAbsolutePath()));
+          return;
+        }
+
+        // chunkGroup begins
+        for (ChunkGroupMetaData chunkGroupMetaData : chunkGroupMetaDataSortedList) {
+          // chunk begins
+          long chunkEndPos = 0;
+          for (ChunkMetaData chunkMetaData : chunkGroupMetaData.getChunkMetaDataList()) {
+            try {
+              Chunk chunk = reader.readMemChunk(chunkMetaData);
+              chunkEndPos =
+                  chunkMetaData.getOffsetOfChunkHeader() + chunk.getHeader().getSerializedSize() + chunk
+                      .getHeader().getDataSize();
+            } catch (Exception e) {
+              System.out.println(String
+                  .format("[ERROR] TsFile (chunk) %s is broken : %s", tsfile.getAbsolutePath(),
+                      e.getMessage()));
+              return;
+            }
+          }
+          // chunkGroupFooter begins
+          try {
+            ChunkGroupFooter chunkGroupFooter = reader.readChunkGroupFooter(chunkEndPos, false);
+          } catch (Exception e) {
+            System.out.println(String
+                .format("[ERROR] TsFile (chunkGroupFooter) %s is broken : %s", tsfile.getAbsolutePath(),
+                    e.getMessage()));
+            return;
+          }
+        }
+        try {
+          reader.close();
+        } catch (Exception e) {
+          e.printStackTrace();
+        }
+        return;
+      }
+    } catch (Exception e) {
+      e.printStackTrace();
+    }
+  }
+}
diff --git a/server/src/main/java/org/apache/iotdb/db/tools/TsFileResourcePrinter.java b/server/src/main/java/org/apache/iotdb/db/tools/TsFileResourcePrinter.java
index d52a220..db193ac 100644
--- a/server/src/main/java/org/apache/iotdb/db/tools/TsFileResourcePrinter.java
+++ b/server/src/main/java/org/apache/iotdb/db/tools/TsFileResourcePrinter.java
@@ -41,13 +41,25 @@ public class TsFileResourcePrinter {
       folder = args[0];
     }
     File folderFile = SystemFileFactory.INSTANCE.getFile(folder);
-    File[] files = FSFactoryProducer.getFSFactory().listFilesBySuffix(folderFile.getAbsolutePath(), ".tsfile.resource");
-    Arrays.sort(files, Comparator.comparingLong(x -> Long.valueOf(x.getName().split("-")[0])));
+    recursiveFolders(folderFile);
+    System.out.println("analyzing the resource file finished.");
+  }
 
-    for (File file : files) {
-      printResource(file.getAbsolutePath());
+  private static void recursiveFolders(File folderFile) throws IOException {
+    File[] files = FSFactoryProducer.getFSFactory().listFilesBySuffix(folderFile.getAbsolutePath(), ".tsfile.resource");
+    //in IoTDB, all tsfiles have the same depth in the file directories. So, either a folder has tsfiles, or has folders only.
+    if (files.length == 0) {
+      //no such files, then try to find all directories.
+      File[] folders = FSFactoryProducer.getFSFactory().listSubFolders(folderFile);
+      for (File folder : folders) {
+        recursiveFolders(folder);
+      }
+    } else {
+      Arrays.sort(files, Comparator.comparingLong(x -> Long.valueOf(x.getName().split("-")[0])));
+      for (File file : files) {
+        printResource(file.getAbsolutePath());
+      }
     }
-    System.out.println("analyzing the resource file finished.");
   }
 
   public static void printResource(String filename) throws IOException {
@@ -69,5 +81,8 @@ public class TsFileResourcePrinter {
           resource.getEndTimeMap().get(device),
           DatetimeUtils.convertMillsecondToZonedDateTime(resource.getEndTimeMap().get(device))));
     }
+    if (resource.getStartTimeMap().isEmpty()) {
+      System.err.println(String.format("%s file has no devices, please check the file", filename));
+    }
   }
 }
diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/fileSystem/fsFactory/FSFactory.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/fileSystem/fsFactory/FSFactory.java
index 0597cc1..7f18c82 100644
--- a/tsfile/src/main/java/org/apache/iotdb/tsfile/fileSystem/fsFactory/FSFactory.java
+++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/fileSystem/fsFactory/FSFactory.java
@@ -116,4 +116,12 @@ public interface FSFactory {
    * @return list of files
    */
   File[] listFilesByPrefix(String fileFolder, String prefix);
+
+  /**
+   * list all its sub folders (non-recursive)
+   * @return
+   */
+  default File[] listSubFolders(File file) {
+    return file.listFiles(File::isDirectory);
+  }
 }
\ No newline at end of file