You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iotdb.apache.org by ro...@apache.org on 2023/04/19 17:15:51 UTC
[iotdb] branch rel/0.13 updated: [To rel/0.13][IOTDB-5797] Load remote TsFile (#9646)
This is an automated email from the ASF dual-hosted git repository.
rong pushed a commit to branch rel/0.13
in repository https://gitbox.apache.org/repos/asf/iotdb.git
The following commit(s) were added to refs/heads/rel/0.13 by this push:
new 002793af2b [To rel/0.13][IOTDB-5797] Load remote TsFile (#9646)
002793af2b is described below
commit 002793af2b55e04e9599134d5efca6e1656f8f6d
Author: yschengzi <87...@users.noreply.github.com>
AuthorDate: Thu Apr 20 01:15:43 2023 +0800
[To rel/0.13][IOTDB-5797] Load remote TsFile (#9646)
* support remote HTTP load
* handle IllegalArgumentException
* add user guide
---
.../Write-And-Delete-Data/Load-External-Tsfile.md | 13 ++++
.../Write-And-Delete-Data/Load-External-Tsfile.md | 17 ++++-
.../java/org/apache/iotdb/db/conf/IoTDBConfig.java | 6 ++
.../db/engine/storagegroup/TsFileResource.java | 26 +++++++
.../apache/iotdb/db/qp/executor/PlanExecutor.java | 85 ++++++++++++++--------
.../iotdb/db/qp/logical/sys/LoadFilesOperator.java | 17 +++--
.../iotdb/db/qp/physical/sys/OperateFilePlan.java | 9 ++-
.../apache/iotdb/db/qp/sql/IoTDBSqlVisitor.java | 2 +-
8 files changed, 135 insertions(+), 40 deletions(-)
diff --git a/docs/UserGuide/Write-And-Delete-Data/Load-External-Tsfile.md b/docs/UserGuide/Write-And-Delete-Data/Load-External-Tsfile.md
index 53390d431d..9d24ef130f 100644
--- a/docs/UserGuide/Write-And-Delete-Data/Load-External-Tsfile.md
+++ b/docs/UserGuide/Write-And-Delete-Data/Load-External-Tsfile.md
@@ -71,6 +71,19 @@ Examples:
* `load '/Users/Desktop/data' autoregister=true,sglevel=1`
* `load '/Users/Desktop/data' autoregister=false,sglevel=1,verify=true`
+#### Remote Load TsFile
+
+Normally, the file path must be the local file path of the machine where the IoTDB instance is located. In IoTDB 0.13.5 and later, the file path supports for HTTP-style URIs that allow individual files to be loaded remotely via the HTTP protocol. The format is `load 'http://host:port/filePath'`.
+
+For example, if your IoTDB instance is running on machine A with IP address 168.121.0.1 and you want to load the file `/root/data/1-1-0-0.tsfile` from machine B with IP address 168.121.0.2 into your IoTDB instance, you need to follow these steps
+
+1. Start the HTTP service on Machine B. For example, you can use the python command `python -m http.server` to start a simple HTTP service.
+2. Use the Cli.sh to connect to the IoTDB instance on Machine A.
+3. Enter the SQL command `load 'http://168.121.0.2:8000/root/data/1-1-0-0.tsfile'`
+4. Wait for the load to complete
+
+**Please note**: In the case of remote loading, only a single file is supported, i.e. the path parameter must be a single TsFile file path. It is also not recommended to use remote loading if your TsFile has undergone a delete operation (i.e., the TsFile file has an attached .mods file), which will result in data that should have been deleted not being deleted after the load.
+
### remove a tsfile
The command to delete a tsfile is: `remove '<path>'`.
diff --git a/docs/zh/UserGuide/Write-And-Delete-Data/Load-External-Tsfile.md b/docs/zh/UserGuide/Write-And-Delete-Data/Load-External-Tsfile.md
index 4d59860985..65cbb2f766 100644
--- a/docs/zh/UserGuide/Write-And-Delete-Data/Load-External-Tsfile.md
+++ b/docs/zh/UserGuide/Write-And-Delete-Data/Load-External-Tsfile.md
@@ -70,7 +70,20 @@ VERIFY 选项表示是否对载入的 tsfile 中的所有时间序列进行元
* `load '/Users/Desktop/data' autoregister=true,sglevel=1`
* `load '/Users/Desktop/data' autoregister=false,sglevel=1,verify=true`
-#### 删除 tsfile 文件
+#### 远程加载
+
+通常情况下,文件路径必须是 IoTDB 实例所在机器的本地文件路径,在 IoTDB 0.13.5 及之后的版本中,文件路径新增支持 HTTP 风格的 URI,可以通过 HTTP 协议远程加载单个文件。格式为 `load 'http://host:port/filePath'`。
+
+例如,如果您的 IoTDB 实例在 IP 地址为 168.121.0.1 的机器 A 上运行,您希望将 IP 地址为 168.121.0.2 机器 B 上的文件`/root/data/1-1-0-0.tsfile`加载进 IoTDB 实例,您需要按照以下步骤操作
+
+1. 在机器 B 上启动 HTTP 服务,例如您可以使用 python 命令 `python -m http.server `来启动一个简单的 HTTP 服务。
+2. 使用 Cli 工具连接到机器 A 上的 IoTDB 实例
+3. 输入 SQL 指令 `load 'http://168.121.0.2:8000/root/data/1-1-0-0.tsfile'`
+4. 等待加载完成
+
+**请注意**:采用远程加载的情况下,仅支持加载单个文件,即路径参数必须为单个 TsFile 文件路径。同时如果您的 TsFile 经历过删除操作(即,TsFile 文件有附带的 .mods 文件),也不推荐您使用远程加载,这将导致本该被删除的数据在加载之后仍然没有被删除。
+
+### 删除 tsfile 文件
删除 tsfile 文件的指令为:`remove '<path>'`
@@ -80,7 +93,7 @@ VERIFY 选项表示是否对载入的 tsfile 中的所有时间序列进行元
* `remove '/Users/Desktop/data/data/root.vehicle/0/0/1575028885956-101-0.tsfile'`
-#### 卸载 tsfile 文件至指定目录
+### 卸载 tsfile 文件至指定目录
卸载 tsfile 文件的指令为:`unload '<path>' '<dir>'`
diff --git a/server/src/main/java/org/apache/iotdb/db/conf/IoTDBConfig.java b/server/src/main/java/org/apache/iotdb/db/conf/IoTDBConfig.java
index 449ee57d40..6078eef64a 100644
--- a/server/src/main/java/org/apache/iotdb/db/conf/IoTDBConfig.java
+++ b/server/src/main/java/org/apache/iotdb/db/conf/IoTDBConfig.java
@@ -225,6 +225,8 @@ public class IoTDBConfig {
/** System directory, including version file for each storage group and metadata */
private String systemDir = DEFAULT_BASE_DIR + File.separator + IoTDBConstant.SYSTEM_FOLDER_NAME;
+ private final String loadTempDirName = "load";
+
/** Schema directory, including storage set of values. */
private String schemaDir =
DEFAULT_BASE_DIR
@@ -1209,6 +1211,10 @@ public class IoTDBConfig {
this.systemDir = systemDir;
}
+ public String getLoadTempDir() {
+ return getSystemDir() + File.separator + loadTempDirName;
+ }
+
public String getSchemaDir() {
return schemaDir;
}
diff --git a/server/src/main/java/org/apache/iotdb/db/engine/storagegroup/TsFileResource.java b/server/src/main/java/org/apache/iotdb/db/engine/storagegroup/TsFileResource.java
index cdade7ef17..71f8b4ba4a 100644
--- a/server/src/main/java/org/apache/iotdb/db/engine/storagegroup/TsFileResource.java
+++ b/server/src/main/java/org/apache/iotdb/db/engine/storagegroup/TsFileResource.java
@@ -1093,4 +1093,30 @@ public class TsFileResource {
public boolean isFileInList() {
return prev != null || next != null;
}
+
+ /**
+ * Compare two TsFile's name.This method will first check whether the two names meet the standard
+ * naming specifications, and then use the generating time as the first keyword, and use the
+ * version number as the second keyword to compare the size of the two names. Notice that this
+ * method will not compare the merge count.
+ *
+ * @param fileName1 a name of TsFile
+ * @param fileName2 a name of TsFile
+ * @return -1, if fileName1 is smaller than fileNam2, 1 if bigger, 0 means fileName1 equals to
+ * fileName2
+ * @throws IOException if fileName1 or fileName2 do not meet the standard naming specifications.
+ */
+ public static int checkAndCompareFileName(String fileName1, String fileName2) throws IOException {
+ TsFileNameGenerator.TsFileName tsFileName1 = TsFileNameGenerator.getTsFileName(fileName1);
+ TsFileNameGenerator.TsFileName tsFileName2 = TsFileNameGenerator.getTsFileName(fileName2);
+ long timeDiff = tsFileName1.getTime() - tsFileName2.getTime();
+ if (timeDiff != 0) {
+ return timeDiff < 0 ? -1 : 1;
+ }
+ long versionDiff = tsFileName1.getVersion() - tsFileName2.getVersion();
+ if (versionDiff != 0) {
+ return versionDiff < 0 ? -1 : 1;
+ }
+ return 0;
+ }
}
diff --git a/server/src/main/java/org/apache/iotdb/db/qp/executor/PlanExecutor.java b/server/src/main/java/org/apache/iotdb/db/qp/executor/PlanExecutor.java
index 32fbe387ed..69a1f3b27f 100644
--- a/server/src/main/java/org/apache/iotdb/db/qp/executor/PlanExecutor.java
+++ b/server/src/main/java/org/apache/iotdb/db/qp/executor/PlanExecutor.java
@@ -181,12 +181,17 @@ import org.apache.iotdb.tsfile.utils.Pair;
import org.apache.iotdb.tsfile.write.schema.IMeasurementSchema;
import org.apache.iotdb.tsfile.write.writer.RestorableTsFileIOWriter;
+import org.apache.commons.io.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
+import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URL;
import java.time.ZonedDateTime;
import java.util.ArrayList;
import java.util.Arrays;
@@ -201,6 +206,7 @@ import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadPoolExecutor;
+import java.util.stream.Collectors;
import static org.apache.iotdb.db.audit.AuditLogOperation.DDL;
import static org.apache.iotdb.db.audit.AuditLogOperation.DML;
@@ -236,7 +242,6 @@ import static org.apache.iotdb.db.conf.IoTDBConstant.COLUMN_TIMESERIES_ENCODING;
import static org.apache.iotdb.db.conf.IoTDBConstant.COLUMN_TTL;
import static org.apache.iotdb.db.conf.IoTDBConstant.COLUMN_USER;
import static org.apache.iotdb.db.conf.IoTDBConstant.COLUMN_VALUE;
-import static org.apache.iotdb.db.conf.IoTDBConstant.FILE_NAME_SEPARATOR;
import static org.apache.iotdb.db.conf.IoTDBConstant.FUNCTION_TYPE_BUILTIN_UDAF;
import static org.apache.iotdb.db.conf.IoTDBConstant.FUNCTION_TYPE_BUILTIN_UDTF;
import static org.apache.iotdb.db.conf.IoTDBConstant.FUNCTION_TYPE_EXTERNAL_UDAF;
@@ -1464,7 +1469,7 @@ public class PlanExecutor implements IPlanExecutor {
}
private void operateLoadFiles(OperateFilePlan plan) throws QueryProcessException {
- File file = plan.getFile();
+ File file = getOrDownloadFile(plan.getFilePath());
if (!file.exists()) {
throw new QueryProcessException(
String.format("File path '%s' doesn't exists.", file.getPath()));
@@ -1479,38 +1484,58 @@ public class PlanExecutor implements IPlanExecutor {
}
}
- private void loadDir(File curFile, OperateFilePlan plan) throws QueryProcessException {
- File[] files = curFile.listFiles();
- long[] establishTime = new long[files.length];
- List<Integer> tsfiles = new ArrayList<>();
- List<String> failedFiles = new ArrayList<>();
-
- for (int i = 0; i < files.length; i++) {
- File file = files[i];
- if (!file.isDirectory()) {
- String fileName = file.getName();
- if (fileName.endsWith(TSFILE_SUFFIX)) {
- establishTime[i] = Long.parseLong(fileName.split(FILE_NAME_SEPARATOR)[0]);
- tsfiles.add(i);
- }
- }
+ private File getOrDownloadFile(String filePath) throws QueryProcessException {
+ try {
+ // try to get remote files
+ URL url = new URI(filePath).toURL();
+ String fileName = filePath.substring(filePath.lastIndexOf("/") + 1);
+ File destFile =
+ new File(
+ IoTDBDescriptor.getInstance().getConfig().getLoadTempDir()
+ + File.separator
+ + fileName);
+ FileUtils.copyURLToFile(url, destFile);
+ return destFile;
+ } catch (MalformedURLException | URISyntaxException | IllegalArgumentException e) {
+ // try to get local files
+ File file = new File(filePath);
+ if (!file.exists()) {
+ throw new QueryProcessException(String.format("File path '%s' doesn't exists.", filePath));
+ }
+ return file;
+ } catch (Exception e) {
+ logger.warn(String.format("Can not find loading file %s.", filePath), e);
+ throw new QueryProcessException(
+ String.format("Find loading file %s error, because of %s", filePath, e.getMessage()));
}
- Collections.sort(
- tsfiles,
+ }
+
+ private void loadDir(File curFile, OperateFilePlan plan) throws QueryProcessException {
+ final File[] files = curFile.listFiles();
+ final List<String> failedFiles = new ArrayList<>();
+ final List<File> tsFiles =
+ Arrays.stream(files)
+ .filter(o -> o.getName().endsWith(TSFILE_SUFFIX))
+ .collect(Collectors.toList());
+ tsFiles.sort(
(o1, o2) -> {
- if (establishTime[o1] == establishTime[o2]) {
- return 0;
+ String file1Name = o1.getName();
+ String file2Name = o2.getName();
+ try {
+ return TsFileResource.checkAndCompareFileName(file1Name, file2Name);
+ } catch (IOException e) {
+ return file1Name.compareTo(file2Name);
+ }
+ });
+ tsFiles.forEach(
+ f -> {
+ try {
+ loadFile(f, plan);
+ } catch (QueryProcessException e) {
+ logger.error("{}, skip load {}.", e.getMessage(), f.getAbsolutePath());
+ failedFiles.add(f.getAbsolutePath());
}
- return establishTime[o1] < establishTime[o2] ? -1 : 1;
});
- for (Integer i : tsfiles) {
- try {
- loadFile(files[i], plan);
- } catch (QueryProcessException e) {
- logger.error("{}, skip load {}.", e.getMessage(), files[i].getAbsolutePath());
- failedFiles.add(files[i].getAbsolutePath());
- }
- }
for (File file : files) {
if (file.isDirectory()) {
diff --git a/server/src/main/java/org/apache/iotdb/db/qp/logical/sys/LoadFilesOperator.java b/server/src/main/java/org/apache/iotdb/db/qp/logical/sys/LoadFilesOperator.java
index 5a860b6388..bbb295bb56 100644
--- a/server/src/main/java/org/apache/iotdb/db/qp/logical/sys/LoadFilesOperator.java
+++ b/server/src/main/java/org/apache/iotdb/db/qp/logical/sys/LoadFilesOperator.java
@@ -34,23 +34,23 @@ import java.io.File;
*/
public class LoadFilesOperator extends Operator {
- private File file;
+ private String filePath;
private boolean autoCreateSchema;
private int sgLevel;
private boolean verifyMetadata;
public LoadFilesOperator(
- File file, boolean autoCreateSchema, int sgLevel, boolean verifyMetadata) {
+ String filePath, boolean autoCreateSchema, int sgLevel, boolean verifyMetadata) {
super(SQLConstant.TOK_LOAD_FILES);
- this.file = file;
+ this.filePath = filePath;
this.autoCreateSchema = autoCreateSchema;
this.sgLevel = sgLevel;
this.verifyMetadata = verifyMetadata;
this.operatorType = OperatorType.LOAD_FILES;
}
- public File getFile() {
- return file;
+ public String getFilePath() {
+ return filePath;
}
public boolean isAutoCreateSchema() {
@@ -77,6 +77,11 @@ public class LoadFilesOperator extends Operator {
public PhysicalPlan generatePhysicalPlan(PhysicalGenerator generator)
throws QueryProcessException {
return new OperateFilePlan(
- file, OperatorType.LOAD_FILES, autoCreateSchema, sgLevel, verifyMetadata);
+ new File(filePath),
+ OperatorType.LOAD_FILES,
+ autoCreateSchema,
+ sgLevel,
+ verifyMetadata,
+ filePath);
}
}
diff --git a/server/src/main/java/org/apache/iotdb/db/qp/physical/sys/OperateFilePlan.java b/server/src/main/java/org/apache/iotdb/db/qp/physical/sys/OperateFilePlan.java
index aff6be9b43..d3fa480672 100644
--- a/server/src/main/java/org/apache/iotdb/db/qp/physical/sys/OperateFilePlan.java
+++ b/server/src/main/java/org/apache/iotdb/db/qp/physical/sys/OperateFilePlan.java
@@ -33,6 +33,7 @@ public class OperateFilePlan extends PhysicalPlan {
private boolean autoCreateSchema;
private int sgLevel;
private boolean verifyMetadata;
+ private String filePath;
public OperateFilePlan(File file, OperatorType operatorType) {
super(operatorType);
@@ -53,12 +54,14 @@ public class OperateFilePlan extends PhysicalPlan {
OperatorType operatorType,
boolean autoCreateSchema,
int sgLevel,
- boolean verifyMetadata) {
+ boolean verifyMetadata,
+ String filePath) {
super(operatorType);
this.file = file;
this.autoCreateSchema = autoCreateSchema;
this.sgLevel = sgLevel;
this.verifyMetadata = verifyMetadata;
+ this.filePath = filePath;
}
public OperateFilePlan(File file, File targetDir, OperatorType operatorType) {
@@ -92,6 +95,10 @@ public class OperateFilePlan extends PhysicalPlan {
return verifyMetadata;
}
+ public String getFilePath() {
+ return filePath;
+ }
+
@Override
public String toString() {
return "OperateFilePlan{"
diff --git a/server/src/main/java/org/apache/iotdb/db/qp/sql/IoTDBSqlVisitor.java b/server/src/main/java/org/apache/iotdb/db/qp/sql/IoTDBSqlVisitor.java
index 78870d3e26..5367051fc0 100644
--- a/server/src/main/java/org/apache/iotdb/db/qp/sql/IoTDBSqlVisitor.java
+++ b/server/src/main/java/org/apache/iotdb/db/qp/sql/IoTDBSqlVisitor.java
@@ -2250,7 +2250,7 @@ public class IoTDBSqlVisitor extends IoTDBSqlParserBaseVisitor<Operator> {
public Operator visitLoadFile(IoTDBSqlParser.LoadFileContext ctx) {
LoadFilesOperator loadFilesOperator =
new LoadFilesOperator(
- new File(parseFilePath(ctx.fileName.getText())),
+ parseFilePath(ctx.fileName.getText()),
true,
IoTDBDescriptor.getInstance().getConfig().getDefaultStorageGroupLevel(),
true);