You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iotdb.apache.org by ro...@apache.org on 2023/04/19 17:15:51 UTC

[iotdb] branch rel/0.13 updated: [To rel/0.13][IOTDB-5797] Load remote TsFile (#9646)

This is an automated email from the ASF dual-hosted git repository.

rong pushed a commit to branch rel/0.13
in repository https://gitbox.apache.org/repos/asf/iotdb.git


The following commit(s) were added to refs/heads/rel/0.13 by this push:
     new 002793af2b [To rel/0.13][IOTDB-5797] Load remote TsFile (#9646)
002793af2b is described below

commit 002793af2b55e04e9599134d5efca6e1656f8f6d
Author: yschengzi <87...@users.noreply.github.com>
AuthorDate: Thu Apr 20 01:15:43 2023 +0800

    [To rel/0.13][IOTDB-5797] Load remote TsFile (#9646)
    
    * support remote HTTP load
    
    * handle IllegalArgumentException
    
    * add user guide
---
 .../Write-And-Delete-Data/Load-External-Tsfile.md  | 13 ++++
 .../Write-And-Delete-Data/Load-External-Tsfile.md  | 17 ++++-
 .../java/org/apache/iotdb/db/conf/IoTDBConfig.java |  6 ++
 .../db/engine/storagegroup/TsFileResource.java     | 26 +++++++
 .../apache/iotdb/db/qp/executor/PlanExecutor.java  | 85 ++++++++++++++--------
 .../iotdb/db/qp/logical/sys/LoadFilesOperator.java | 17 +++--
 .../iotdb/db/qp/physical/sys/OperateFilePlan.java  |  9 ++-
 .../apache/iotdb/db/qp/sql/IoTDBSqlVisitor.java    |  2 +-
 8 files changed, 135 insertions(+), 40 deletions(-)

diff --git a/docs/UserGuide/Write-And-Delete-Data/Load-External-Tsfile.md b/docs/UserGuide/Write-And-Delete-Data/Load-External-Tsfile.md
index 53390d431d..9d24ef130f 100644
--- a/docs/UserGuide/Write-And-Delete-Data/Load-External-Tsfile.md
+++ b/docs/UserGuide/Write-And-Delete-Data/Load-External-Tsfile.md
@@ -71,6 +71,19 @@ Examples:
 * `load '/Users/Desktop/data' autoregister=true,sglevel=1`
 * `load '/Users/Desktop/data' autoregister=false,sglevel=1,verify=true`
 
+#### Remote Load TsFile
+
+Normally, the file path must be the local file path of the machine where the IoTDB instance is located. In IoTDB 0.13.5 and later, the file path supports for HTTP-style URIs that allow individual files to be loaded remotely via the HTTP protocol. The format is `load 'http://host:port/filePath'`.
+
+For example, if your IoTDB instance is running on machine A with IP address 168.121.0.1 and you want to load the file `/root/data/1-1-0-0.tsfile` from machine B with IP address 168.121.0.2 into your IoTDB instance, you need to follow these steps
+
+1. Start the HTTP service on Machine B. For example, you can use the python command `python -m http.server` to start a simple HTTP service.
+2. Use the Cli.sh to connect to the IoTDB instance on Machine A.
+3. Enter the SQL command `load 'http://168.121.0.2:8000/root/data/1-1-0-0.tsfile'` 
+4. Wait for the load to complete
+
+**Please note**: In the case of remote loading, only a single file is supported, i.e. the path parameter must be a single TsFile file path. It is also not recommended to use remote loading if your TsFile has undergone a delete operation (i.e., the TsFile file has an attached .mods file), which will result in data that should have been deleted not being deleted after the load.
+
 ### remove a tsfile
 
 The command to delete a tsfile is: `remove '<path>'`.
diff --git a/docs/zh/UserGuide/Write-And-Delete-Data/Load-External-Tsfile.md b/docs/zh/UserGuide/Write-And-Delete-Data/Load-External-Tsfile.md
index 4d59860985..65cbb2f766 100644
--- a/docs/zh/UserGuide/Write-And-Delete-Data/Load-External-Tsfile.md
+++ b/docs/zh/UserGuide/Write-And-Delete-Data/Load-External-Tsfile.md
@@ -70,7 +70,20 @@ VERIFY 选项表示是否对载入的 tsfile 中的所有时间序列进行元
 * `load '/Users/Desktop/data' autoregister=true,sglevel=1`
 * `load '/Users/Desktop/data' autoregister=false,sglevel=1,verify=true`
 
-#### 删除 tsfile 文件
+#### 远程加载
+
+通常情况下,文件路径必须是 IoTDB 实例所在机器的本地文件路径,在 IoTDB 0.13.5 及之后的版本中,文件路径新增支持 HTTP 风格的 URI,可以通过 HTTP 协议远程加载单个文件。格式为 `load 'http://host:port/filePath'`。
+
+例如,如果您的 IoTDB 实例在 IP 地址为 168.121.0.1 的机器 A 上运行,您希望将 IP 地址为 168.121.0.2 机器 B 上的文件`/root/data/1-1-0-0.tsfile`加载进 IoTDB 实例,您需要按照以下步骤操作
+
+1. 在机器 B 上启动 HTTP 服务,例如您可以使用 python 命令 `python -m http.server `来启动一个简单的 HTTP 服务。
+2. 使用 Cli 工具连接到机器 A 上的 IoTDB 实例
+3. 输入 SQL 指令 `load 'http://168.121.0.2:8000/root/data/1-1-0-0.tsfile'`
+4. 等待加载完成
+
+**请注意**:采用远程加载的情况下,仅支持加载单个文件,即路径参数必须为单个 TsFile 文件路径。同时如果您的 TsFile 经历过删除操作(即,TsFile 文件有附带的 .mods 文件),也不推荐您使用远程加载,这将导致本该被删除的数据在加载之后仍然没有被删除。
+
+### 删除 tsfile 文件
 
 删除 tsfile 文件的指令为:`remove '<path>'`
 
@@ -80,7 +93,7 @@ VERIFY 选项表示是否对载入的 tsfile 中的所有时间序列进行元
 
 * `remove '/Users/Desktop/data/data/root.vehicle/0/0/1575028885956-101-0.tsfile'`
 
-#### 卸载 tsfile 文件至指定目录
+### 卸载 tsfile 文件至指定目录
 
 卸载 tsfile 文件的指令为:`unload '<path>' '<dir>'`
 
diff --git a/server/src/main/java/org/apache/iotdb/db/conf/IoTDBConfig.java b/server/src/main/java/org/apache/iotdb/db/conf/IoTDBConfig.java
index 449ee57d40..6078eef64a 100644
--- a/server/src/main/java/org/apache/iotdb/db/conf/IoTDBConfig.java
+++ b/server/src/main/java/org/apache/iotdb/db/conf/IoTDBConfig.java
@@ -225,6 +225,8 @@ public class IoTDBConfig {
   /** System directory, including version file for each storage group and metadata */
   private String systemDir = DEFAULT_BASE_DIR + File.separator + IoTDBConstant.SYSTEM_FOLDER_NAME;
 
+  private final String loadTempDirName = "load";
+
   /** Schema directory, including storage set of values. */
   private String schemaDir =
       DEFAULT_BASE_DIR
@@ -1209,6 +1211,10 @@ public class IoTDBConfig {
     this.systemDir = systemDir;
   }
 
+  public String getLoadTempDir() {
+    return getSystemDir() + File.separator + loadTempDirName;
+  }
+
   public String getSchemaDir() {
     return schemaDir;
   }
diff --git a/server/src/main/java/org/apache/iotdb/db/engine/storagegroup/TsFileResource.java b/server/src/main/java/org/apache/iotdb/db/engine/storagegroup/TsFileResource.java
index cdade7ef17..71f8b4ba4a 100644
--- a/server/src/main/java/org/apache/iotdb/db/engine/storagegroup/TsFileResource.java
+++ b/server/src/main/java/org/apache/iotdb/db/engine/storagegroup/TsFileResource.java
@@ -1093,4 +1093,30 @@ public class TsFileResource {
   public boolean isFileInList() {
     return prev != null || next != null;
   }
+
+  /**
+   * Compare two TsFile's name.This method will first check whether the two names meet the standard
+   * naming specifications, and then use the generating time as the first keyword, and use the
+   * version number as the second keyword to compare the size of the two names. Notice that this
+   * method will not compare the merge count.
+   *
+   * @param fileName1 a name of TsFile
+   * @param fileName2 a name of TsFile
+   * @return -1, if fileName1 is smaller than fileNam2, 1 if bigger, 0 means fileName1 equals to
+   *     fileName2
+   * @throws IOException if fileName1 or fileName2 do not meet the standard naming specifications.
+   */
+  public static int checkAndCompareFileName(String fileName1, String fileName2) throws IOException {
+    TsFileNameGenerator.TsFileName tsFileName1 = TsFileNameGenerator.getTsFileName(fileName1);
+    TsFileNameGenerator.TsFileName tsFileName2 = TsFileNameGenerator.getTsFileName(fileName2);
+    long timeDiff = tsFileName1.getTime() - tsFileName2.getTime();
+    if (timeDiff != 0) {
+      return timeDiff < 0 ? -1 : 1;
+    }
+    long versionDiff = tsFileName1.getVersion() - tsFileName2.getVersion();
+    if (versionDiff != 0) {
+      return versionDiff < 0 ? -1 : 1;
+    }
+    return 0;
+  }
 }
diff --git a/server/src/main/java/org/apache/iotdb/db/qp/executor/PlanExecutor.java b/server/src/main/java/org/apache/iotdb/db/qp/executor/PlanExecutor.java
index 32fbe387ed..69a1f3b27f 100644
--- a/server/src/main/java/org/apache/iotdb/db/qp/executor/PlanExecutor.java
+++ b/server/src/main/java/org/apache/iotdb/db/qp/executor/PlanExecutor.java
@@ -181,12 +181,17 @@ import org.apache.iotdb.tsfile.utils.Pair;
 import org.apache.iotdb.tsfile.write.schema.IMeasurementSchema;
 import org.apache.iotdb.tsfile.write.writer.RestorableTsFileIOWriter;
 
+import org.apache.commons.io.FileUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.File;
 import java.io.IOException;
 import java.lang.reflect.InvocationTargetException;
+import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URL;
 import java.time.ZonedDateTime;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -201,6 +206,7 @@ import java.util.Map.Entry;
 import java.util.Set;
 import java.util.concurrent.Future;
 import java.util.concurrent.ThreadPoolExecutor;
+import java.util.stream.Collectors;
 
 import static org.apache.iotdb.db.audit.AuditLogOperation.DDL;
 import static org.apache.iotdb.db.audit.AuditLogOperation.DML;
@@ -236,7 +242,6 @@ import static org.apache.iotdb.db.conf.IoTDBConstant.COLUMN_TIMESERIES_ENCODING;
 import static org.apache.iotdb.db.conf.IoTDBConstant.COLUMN_TTL;
 import static org.apache.iotdb.db.conf.IoTDBConstant.COLUMN_USER;
 import static org.apache.iotdb.db.conf.IoTDBConstant.COLUMN_VALUE;
-import static org.apache.iotdb.db.conf.IoTDBConstant.FILE_NAME_SEPARATOR;
 import static org.apache.iotdb.db.conf.IoTDBConstant.FUNCTION_TYPE_BUILTIN_UDAF;
 import static org.apache.iotdb.db.conf.IoTDBConstant.FUNCTION_TYPE_BUILTIN_UDTF;
 import static org.apache.iotdb.db.conf.IoTDBConstant.FUNCTION_TYPE_EXTERNAL_UDAF;
@@ -1464,7 +1469,7 @@ public class PlanExecutor implements IPlanExecutor {
   }
 
   private void operateLoadFiles(OperateFilePlan plan) throws QueryProcessException {
-    File file = plan.getFile();
+    File file = getOrDownloadFile(plan.getFilePath());
     if (!file.exists()) {
       throw new QueryProcessException(
           String.format("File path '%s' doesn't exists.", file.getPath()));
@@ -1479,38 +1484,58 @@ public class PlanExecutor implements IPlanExecutor {
     }
   }
 
-  private void loadDir(File curFile, OperateFilePlan plan) throws QueryProcessException {
-    File[] files = curFile.listFiles();
-    long[] establishTime = new long[files.length];
-    List<Integer> tsfiles = new ArrayList<>();
-    List<String> failedFiles = new ArrayList<>();
-
-    for (int i = 0; i < files.length; i++) {
-      File file = files[i];
-      if (!file.isDirectory()) {
-        String fileName = file.getName();
-        if (fileName.endsWith(TSFILE_SUFFIX)) {
-          establishTime[i] = Long.parseLong(fileName.split(FILE_NAME_SEPARATOR)[0]);
-          tsfiles.add(i);
-        }
-      }
+  private File getOrDownloadFile(String filePath) throws QueryProcessException {
+    try {
+      // try to get remote files
+      URL url = new URI(filePath).toURL();
+      String fileName = filePath.substring(filePath.lastIndexOf("/") + 1);
+      File destFile =
+          new File(
+              IoTDBDescriptor.getInstance().getConfig().getLoadTempDir()
+                  + File.separator
+                  + fileName);
+      FileUtils.copyURLToFile(url, destFile);
+      return destFile;
+    } catch (MalformedURLException | URISyntaxException | IllegalArgumentException e) {
+      // try to get local files
+      File file = new File(filePath);
+      if (!file.exists()) {
+        throw new QueryProcessException(String.format("File path '%s' doesn't exists.", filePath));
+      }
+      return file;
+    } catch (Exception e) {
+      logger.warn(String.format("Can not find loading file %s.", filePath), e);
+      throw new QueryProcessException(
+          String.format("Find loading file %s error, because of %s", filePath, e.getMessage()));
     }
-    Collections.sort(
-        tsfiles,
+  }
+
+  private void loadDir(File curFile, OperateFilePlan plan) throws QueryProcessException {
+    final File[] files = curFile.listFiles();
+    final List<String> failedFiles = new ArrayList<>();
+    final List<File> tsFiles =
+        Arrays.stream(files)
+            .filter(o -> o.getName().endsWith(TSFILE_SUFFIX))
+            .collect(Collectors.toList());
+    tsFiles.sort(
         (o1, o2) -> {
-          if (establishTime[o1] == establishTime[o2]) {
-            return 0;
+          String file1Name = o1.getName();
+          String file2Name = o2.getName();
+          try {
+            return TsFileResource.checkAndCompareFileName(file1Name, file2Name);
+          } catch (IOException e) {
+            return file1Name.compareTo(file2Name);
+          }
+        });
+    tsFiles.forEach(
+        f -> {
+          try {
+            loadFile(f, plan);
+          } catch (QueryProcessException e) {
+            logger.error("{}, skip load {}.", e.getMessage(), f.getAbsolutePath());
+            failedFiles.add(f.getAbsolutePath());
           }
-          return establishTime[o1] < establishTime[o2] ? -1 : 1;
         });
-    for (Integer i : tsfiles) {
-      try {
-        loadFile(files[i], plan);
-      } catch (QueryProcessException e) {
-        logger.error("{}, skip load {}.", e.getMessage(), files[i].getAbsolutePath());
-        failedFiles.add(files[i].getAbsolutePath());
-      }
-    }
 
     for (File file : files) {
       if (file.isDirectory()) {
diff --git a/server/src/main/java/org/apache/iotdb/db/qp/logical/sys/LoadFilesOperator.java b/server/src/main/java/org/apache/iotdb/db/qp/logical/sys/LoadFilesOperator.java
index 5a860b6388..bbb295bb56 100644
--- a/server/src/main/java/org/apache/iotdb/db/qp/logical/sys/LoadFilesOperator.java
+++ b/server/src/main/java/org/apache/iotdb/db/qp/logical/sys/LoadFilesOperator.java
@@ -34,23 +34,23 @@ import java.io.File;
  */
 public class LoadFilesOperator extends Operator {
 
-  private File file;
+  private String filePath;
   private boolean autoCreateSchema;
   private int sgLevel;
   private boolean verifyMetadata;
 
   public LoadFilesOperator(
-      File file, boolean autoCreateSchema, int sgLevel, boolean verifyMetadata) {
+      String filePath, boolean autoCreateSchema, int sgLevel, boolean verifyMetadata) {
     super(SQLConstant.TOK_LOAD_FILES);
-    this.file = file;
+    this.filePath = filePath;
     this.autoCreateSchema = autoCreateSchema;
     this.sgLevel = sgLevel;
     this.verifyMetadata = verifyMetadata;
     this.operatorType = OperatorType.LOAD_FILES;
   }
 
-  public File getFile() {
-    return file;
+  public String getFilePath() {
+    return filePath;
   }
 
   public boolean isAutoCreateSchema() {
@@ -77,6 +77,11 @@ public class LoadFilesOperator extends Operator {
   public PhysicalPlan generatePhysicalPlan(PhysicalGenerator generator)
       throws QueryProcessException {
     return new OperateFilePlan(
-        file, OperatorType.LOAD_FILES, autoCreateSchema, sgLevel, verifyMetadata);
+        new File(filePath),
+        OperatorType.LOAD_FILES,
+        autoCreateSchema,
+        sgLevel,
+        verifyMetadata,
+        filePath);
   }
 }
diff --git a/server/src/main/java/org/apache/iotdb/db/qp/physical/sys/OperateFilePlan.java b/server/src/main/java/org/apache/iotdb/db/qp/physical/sys/OperateFilePlan.java
index aff6be9b43..d3fa480672 100644
--- a/server/src/main/java/org/apache/iotdb/db/qp/physical/sys/OperateFilePlan.java
+++ b/server/src/main/java/org/apache/iotdb/db/qp/physical/sys/OperateFilePlan.java
@@ -33,6 +33,7 @@ public class OperateFilePlan extends PhysicalPlan {
   private boolean autoCreateSchema;
   private int sgLevel;
   private boolean verifyMetadata;
+  private String filePath;
 
   public OperateFilePlan(File file, OperatorType operatorType) {
     super(operatorType);
@@ -53,12 +54,14 @@ public class OperateFilePlan extends PhysicalPlan {
       OperatorType operatorType,
       boolean autoCreateSchema,
       int sgLevel,
-      boolean verifyMetadata) {
+      boolean verifyMetadata,
+      String filePath) {
     super(operatorType);
     this.file = file;
     this.autoCreateSchema = autoCreateSchema;
     this.sgLevel = sgLevel;
     this.verifyMetadata = verifyMetadata;
+    this.filePath = filePath;
   }
 
   public OperateFilePlan(File file, File targetDir, OperatorType operatorType) {
@@ -92,6 +95,10 @@ public class OperateFilePlan extends PhysicalPlan {
     return verifyMetadata;
   }
 
+  public String getFilePath() {
+    return filePath;
+  }
+
   @Override
   public String toString() {
     return "OperateFilePlan{"
diff --git a/server/src/main/java/org/apache/iotdb/db/qp/sql/IoTDBSqlVisitor.java b/server/src/main/java/org/apache/iotdb/db/qp/sql/IoTDBSqlVisitor.java
index 78870d3e26..5367051fc0 100644
--- a/server/src/main/java/org/apache/iotdb/db/qp/sql/IoTDBSqlVisitor.java
+++ b/server/src/main/java/org/apache/iotdb/db/qp/sql/IoTDBSqlVisitor.java
@@ -2250,7 +2250,7 @@ public class IoTDBSqlVisitor extends IoTDBSqlParserBaseVisitor<Operator> {
   public Operator visitLoadFile(IoTDBSqlParser.LoadFileContext ctx) {
     LoadFilesOperator loadFilesOperator =
         new LoadFilesOperator(
-            new File(parseFilePath(ctx.fileName.getText())),
+            parseFilePath(ctx.fileName.getText()),
             true,
             IoTDBDescriptor.getInstance().getConfig().getDefaultStorageGroupLevel(),
             true);