You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@seatunnel.apache.org by ki...@apache.org on 2022/02/14 07:08:24 UTC
[incubator-seatunnel-website] branch main updated: Document sync and build (#49)
This is an automated email from the ASF dual-hosted git repository.
kirs pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-seatunnel-website.git
The following commit(s) were added to refs/heads/main by this push:
new 92a8005 Document sync and build (#49)
92a8005 is described below
commit 92a8005fd7ff5d455834c3ed3546aa570d9a4b9f
Author: wuchunfu <31...@qq.com>
AuthorDate: Mon Feb 14 15:08:19 2022 +0800
Document sync and build (#49)
---
.github/workflows/deploy.yml | 4 +-
docs/configuration/_category_.json | 4 -
docs/configuration/base.md | 114 --------
docs/configuration/filter-plugin.md | 45 ---
docs/configuration/filter-plugins/Add.docs | 10 -
docs/configuration/filter-plugins/Add.md | 39 ---
docs/configuration/filter-plugins/Checksum.docs | 11 -
docs/configuration/filter-plugins/Checksum.md | 48 ----
docs/configuration/filter-plugins/Convert.docs | 10 -
docs/configuration/filter-plugins/Convert.md | 44 ---
docs/configuration/filter-plugins/Date.docs | 15 -
docs/configuration/filter-plugins/Date.md | 92 ------
docs/configuration/filter-plugins/Drop.docs | 9 -
docs/configuration/filter-plugins/Drop.md | 36 ---
docs/configuration/filter-plugins/Grok.docs | 13 -
docs/configuration/filter-plugins/Grok.md | 81 ------
docs/configuration/filter-plugins/Join.md | 62 -----
docs/configuration/filter-plugins/Json.docs | 12 -
docs/configuration/filter-plugins/Json.md | 197 -------------
docs/configuration/filter-plugins/Kv.docs | 15 -
docs/configuration/filter-plugins/Kv.md | 133 ---------
docs/configuration/filter-plugins/Lowercase.docs | 10 -
docs/configuration/filter-plugins/Lowercase.md | 40 ---
docs/configuration/filter-plugins/Remove.docs | 9 -
docs/configuration/filter-plugins/Remove.md | 36 ---
docs/configuration/filter-plugins/Rename.docs | 10 -
docs/configuration/filter-plugins/Rename.md | 42 ---
docs/configuration/filter-plugins/Repartition.docs | 9 -
docs/configuration/filter-plugins/Repartition.md | 34 ---
docs/configuration/filter-plugins/Replace.docs | 12 -
docs/configuration/filter-plugins/Replace.md | 54 ----
docs/configuration/filter-plugins/Sample.docs | 10 -
docs/configuration/filter-plugins/Sample.md | 41 ---
docs/configuration/filter-plugins/Script.docs | 14 -
docs/configuration/filter-plugins/Script.md | 79 ------
docs/configuration/filter-plugins/Split.docs | 12 -
docs/configuration/filter-plugins/Split.md | 66 -----
docs/configuration/filter-plugins/Sql.docs | 10 -
docs/configuration/filter-plugins/Sql.md | 58 ----
docs/configuration/filter-plugins/Table.docs | 14 -
docs/configuration/filter-plugins/Table.md | 74 -----
docs/configuration/filter-plugins/Truncate.docs | 11 -
docs/configuration/filter-plugins/Truncate.md | 45 ---
docs/configuration/filter-plugins/Uppercase.docs | 10 -
docs/configuration/filter-plugins/Uppercase.md | 40 ---
docs/configuration/filter-plugins/Urldecode.md | 47 ----
docs/configuration/filter-plugins/Urlencode.md | 47 ----
docs/configuration/filter-plugins/Uuid.docs | 9 -
docs/configuration/filter-plugins/Uuid.md | 34 ---
docs/configuration/filter-plugins/Watermark.md | 57 ----
docs/configuration/input-plugin.md | 31 ---
docs/configuration/input-plugins/Alluxio.md | 93 -------
docs/configuration/input-plugins/Elasticsearch.md | 73 -----
docs/configuration/input-plugins/FakeStream.docs | 13 -
docs/configuration/input-plugins/FakeStream.md | 93 -------
docs/configuration/input-plugins/File.docs | 10 -
docs/configuration/input-plugins/File.md | 79 ------
docs/configuration/input-plugins/FileStream.docs | 10 -
docs/configuration/input-plugins/FileStream.md | 56 ----
docs/configuration/input-plugins/Hdfs.md | 91 ------
docs/configuration/input-plugins/HdfsStream.docs | 10 -
docs/configuration/input-plugins/HdfsStream.md | 64 -----
docs/configuration/input-plugins/Hive.docs | 11 -
docs/configuration/input-plugins/Hive.md | 64 -----
docs/configuration/input-plugins/Jdbc.md | 112 --------
docs/configuration/input-plugins/KafkaStream.docs | 11 -
docs/configuration/input-plugins/KafkaStream.md | 101 -------
docs/configuration/input-plugins/Kudu.docs | 12 -
docs/configuration/input-plugins/Kudu.md | 41 ---
docs/configuration/input-plugins/MongoDB.docs | 13 -
docs/configuration/input-plugins/MongoDB.md | 62 -----
docs/configuration/input-plugins/MySQL.md | 92 ------
docs/configuration/input-plugins/Redis.md | 72 -----
docs/configuration/input-plugins/RedisStream.md | 83 ------
docs/configuration/input-plugins/S3Stream.docs | 10 -
docs/configuration/input-plugins/S3Stream.md | 34 ---
docs/configuration/input-plugins/SocketStream.docs | 10 -
docs/configuration/input-plugins/SocketStream.md | 31 ---
docs/configuration/input-plugins/Tidb.md | 61 ----
docs/configuration/output-plugin.md | 32 ---
docs/configuration/output-plugins/Alluxio.md | 89 ------
docs/configuration/output-plugins/Clickhouse.docs | 14 -
docs/configuration/output-plugins/Clickhouse.md | 150 ----------
.../output-plugins/Elasticsearch.docs | 11 -
docs/configuration/output-plugins/Elasticsearch.md | 82 ------
docs/configuration/output-plugins/File.md | 71 -----
docs/configuration/output-plugins/Hdfs.md | 74 -----
docs/configuration/output-plugins/Hive.md | 73 -----
docs/configuration/output-plugins/Jdbc.docs | 14 -
docs/configuration/output-plugins/Jdbc.md | 88 ------
docs/configuration/output-plugins/Kafka.docs | 10 -
docs/configuration/output-plugins/Kafka.md | 79 ------
docs/configuration/output-plugins/Kudu.docs | 12 -
docs/configuration/output-plugins/Kudu.md | 48 ----
docs/configuration/output-plugins/MongoDB.docs | 12 -
docs/configuration/output-plugins/MongoDB.md | 92 ------
docs/configuration/output-plugins/MySQL.docs | 13 -
docs/configuration/output-plugins/MySQL.md | 61 ----
docs/configuration/output-plugins/Opentsdb.md | 121 --------
docs/configuration/output-plugins/S3.md | 69 -----
docs/configuration/output-plugins/Stdout.md | 41 ---
docs/configuration/output-plugins/Tidb.md | 73 -----
docs/contribution/_category_.json | 4 -
docs/contribution/contribution.md | 70 -----
docs/deployment/_category_.json | 4 -
docs/deployment/deployment.md | 58 ----
docs/deployment/installation.md | 34 ---
docs/development/_category_.json | 4 -
docs/development/development.md | 309 ---------------------
docs/internal/_category_.json | 4 -
docs/internal/internal.md | 34 ---
docs/internal/monitoring.md | 290 -------------------
docs/introduction.md | 213 +++++++-------
docs/quickstart/_category_.json | 4 -
docs/quickstart/quickstart.md | 135 ---------
docs/roadmap/_category_.json | 4 -
docs/roadmap/roadmap.md | 14 -
docs/usecase/1.md | 218 ---------------
docs/usecase/2.md | 186 -------------
docs/usecase/3.md | 231 ---------------
docs/usecase/4.md | 259 -----------------
docs/usecase/5.md | 280 -------------------
docs/usecase/README.md | 24 --
docs/usecase/_category_.json | 4 -
docusaurus.config.js | 2 +-
tools/build-docs.sh | 32 +++
126 files changed, 137 insertions(+), 6980 deletions(-)
diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
index 0019735..23ed46c 100644
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -16,7 +16,9 @@ jobs:
- uses: actions/setup-node@v2.2.0
with:
node-version: 14
-
+ - name: Documents Sync
+ run: |
+ bash ./tools/build-docs.sh
- name: install
run: |
npm install
diff --git a/docs/configuration/_category_.json b/docs/configuration/_category_.json
deleted file mode 100644
index d8d4d6c..0000000
--- a/docs/configuration/_category_.json
+++ /dev/null
@@ -1,4 +0,0 @@
-{
- "label": "配置",
- "position": 3
-}
\ No newline at end of file
diff --git a/docs/configuration/base.md b/docs/configuration/base.md
deleted file mode 100644
index f82dc22..0000000
--- a/docs/configuration/base.md
+++ /dev/null
@@ -1,114 +0,0 @@
-# 通用配置
-
-## 核心概念
-
-* Row 是seatunnel逻辑意义上一条数据,是数据处理的基本单位。在Filter处理数据时,所有的数据都会被映射为Row。
-
-* Field 是Row的一个字段。Row可以包含嵌套层级的字段。
-
-* raw_message 指的是从input输入的数据在Row中的`raw_message`字段。
-
-* __root__ 指的是Row的最顶级的字段相同的字段层级,常用于指定数据处理过程中生成的新字段在Row中的存储位置(top level field)。
-
-
----
-
-## 配置文件
-
-一个完整的seatunnel配置包含`spark`, `input`, `filter`, `output`, 即:
-
-```
-spark {
- ...
-}
-
-input {
- ...
-}
-
-filter {
- ...
-}
-
-output {
- ...
-}
-
-```
-
-* `spark`是spark相关的配置,
-
-可配置的spark参数见:
-[Spark Configuration](https://spark.apache.org/docs/latest/configuration.html#available-properties),
-其中master, deploy-mode两个参数不能在这里配置,需要在seatunnel启动脚本中指定。
-
-* `input`可配置任意的input插件及其参数,具体参数随不同的input插件而变化。
-
-* `filter`可配置任意的filter插件及其参数,具体参数随不同的filter插件而变化。
-
-filter中的多个插件按配置顺序形成了数据处理的pipeline, 上一个filter的输出是下一个filter的输入。
-
-* `output`可配置任意的output插件及其参数,具体参数随不同的output插件而变化。
-
-`filter`处理完的数据,会发送给`output`中配置的每个插件。
-
-
----
-
-## 配置文件示例
-
-一个示例如下:
-
-> 配置中, 以`#`开头的行为注释。
-
-```
-spark {
- # You can set spark configuration here
- # seatunnel defined streaming batch duration in seconds
- spark.streaming.batchDuration = 5
-
- # see available properties defined by spark: https://spark.apache.org/docs/latest/configuration.html#available-properties
- spark.app.name = "seatunnel"
- spark.executor.instances = 2
- spark.executor.cores = 1
- spark.executor.memory = "1g"
-}
-
-input {
- # This is a example input plugin **only for test and demonstrate the feature input plugin**
- fakestream {
- content = ["Hello World, InterestingLab"]
- rate = 1
- }
-
-
- # If you would like to get more information about how to configure seatunnel and see full list of input plugins,
- # please go to https://interestinglab.github.io/seatunnel-docs/#/zh-cn/v1/configuration/base
-}
-
-filter {
- split {
- fields = ["msg", "name"]
- delimiter = ","
- }
-
- # If you would like to get more information about how to configure seatunnel and see full list of filter plugins,
- # please go to https://interestinglab.github.io/seatunnel-docs/#/zh-cn/v1/configuration/base
-}
-
-output {
- stdout {}
-
-
- # If you would like to get more information about how to configure seatunnel and see full list of output plugins,
- # please go to https://interestinglab.github.io/seatunnel-docs/#/zh-cn/v1/configuration/base
-}
-```
-
-其他配置可参考:
-
-[配置示例1 : Streaming 流式计算](https://github.com/InterestingLab/seatunnel/blob/master/config/streaming.conf.template)
-
-[配置示例2 : Batch 离线批处理](https://github.com/InterestingLab/seatunnel/blob/master/config/batch.conf.template)
-
-[配置示例3 : 一个灵活的多数据流程处理](https://github.com/InterestingLab/seatunnel/blob/master/config/complex.conf.template)
diff --git a/docs/configuration/filter-plugin.md b/docs/configuration/filter-plugin.md
deleted file mode 100644
index e645d2b..0000000
--- a/docs/configuration/filter-plugin.md
+++ /dev/null
@@ -1,45 +0,0 @@
-# Filter 插件
-
-### Filter插件通用参数
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [source_table_name](#source_table_name-string) | string | no | - |
-| [result_table_name](#result_table_name-string) | string | no | - |
-
-
-##### source_table_name [string]
-
-不指定 `source_table_name` 时,当前插件处理的就是配置文件中上一个插件输出的数据集(dataset);
-
-指定 `source_table_name` 的时候,当前插件处理的就是此参数对应的数据集。
-
-##### result_table_name [string]
-
-不指定 `result_table_name时` ,此插件处理后的数据,不会被注册为一个可供其他插件直接访问的数据集(dataset),或者被称为临时表(table);
-
-指定 `result_table_name` 时,此插件处理后的数据,会被注册为一个可供其他插件直接访问的数据集(dataset),或者被称为临时表(table)。此处注册的数据集(dataset),其他插件可通过指定 `source_table_name` 来直接访问。
-
-### 使用样例
-
-```
-split {
- source_table_name = "view_table_1"
- source_field = "message"
- delimiter = "&"
- fields = ["field1", "field2"]
- result_table_name = "view_table_2"
-}
-```
-
-> `Split` 插件将会处理临时表 `view_table_1` 中的数据,并将处理结果注册为名为 `view_table_2` 的临时表, 这张临时表可以被后续任意 `Filter` 或 `Output` 插件通过指定 `source_table_name` 使用。
-
-```
-split {
- source_field = "message"
- delimiter = "&"
- fields = ["field1", "field2"]
-}
-```
-
-> 没有配置 `source_table_name`,`Split` 插件会读取上一个插件传递过来的数据集,并且传递给下一个插件。
diff --git a/docs/configuration/filter-plugins/Add.docs b/docs/configuration/filter-plugins/Add.docs
deleted file mode 100644
index d7be00d..0000000
--- a/docs/configuration/filter-plugins/Add.docs
+++ /dev/null
@@ -1,10 +0,0 @@
-@seatunnelPlugin
-@pluginGroup filter
-@pluginName Add
-@pluginDesc "在源数据中新增一个字段"
-@pluginAuthor InterestingLab
-@pluginHomepage https://interestinglab.github.io/seatunnel-docs
-@pluginVersion 1.0.0
-
-@pluginOption string target_field yes "新增的字段名"
-@pluginOption string value yes "新增字段的值"
diff --git a/docs/configuration/filter-plugins/Add.md b/docs/configuration/filter-plugins/Add.md
deleted file mode 100644
index f8f8a79..0000000
--- a/docs/configuration/filter-plugins/Add.md
+++ /dev/null
@@ -1,39 +0,0 @@
-## Filter plugin : Add
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.0.0
-
-### Description
-
-在源数据中新增一个字段
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [target_field](#target_field-string) | string | yes | - |
-| [value](#value-string) | string | yes | - |
-| [common-options](#common-options-string)| string | no | - |
-
-##### target_field [string]
-
-新增的字段名
-
-##### value [string]
-
-新增字段的值, 目前仅支持固定值,不支持变量
-
-##### common options [string]
-
-`Filter` 插件通用参数,详情参照 [Filter Plugin](/zh-cn/v1/configuration/filter-plugin)
-
-### Examples
-
-```
-add {
- value = "1"
-}
-```
-
-> 新增一个字段,其值为1
diff --git a/docs/configuration/filter-plugins/Checksum.docs b/docs/configuration/filter-plugins/Checksum.docs
deleted file mode 100644
index f1ff832..0000000
--- a/docs/configuration/filter-plugins/Checksum.docs
+++ /dev/null
@@ -1,11 +0,0 @@
-@seatunnelPlugin
-@pluginGroup filter
-@pluginName Checksum
-@pluginDesc "获取指定字段的校验码"
-@pluginAuthor InterestingLab
-@pluginHomepage https://interestinglab.github.io/seatunnel-docs
-@pluginVersion 1.0.0
-
-@pluginOption string source_field="raw_message" no "源字段"
-@pluginOption string target_field="checksum" no "转换后的字段"
-@pluginOption string method="SHA1" no "校验方法"
diff --git a/docs/configuration/filter-plugins/Checksum.md b/docs/configuration/filter-plugins/Checksum.md
deleted file mode 100644
index a3e738e..0000000
--- a/docs/configuration/filter-plugins/Checksum.md
+++ /dev/null
@@ -1,48 +0,0 @@
-## Filter plugin : Checksum
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.0.0
-
-### Description
-
-获取指定字段的校验码
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [method](#method-string) | string | no | SHA1 |
-| [source_field](#source_field-string) | string | no | raw_message |
-| [target_field](#target_field-string) | string | no | checksum |
-| [common-options](#common-options-string)| string | no | - |
-
-
-##### method [string]
-
-校验方法,当前支持SHA1、MD5和CRC32
-
-##### source_field [string]
-
-源字段
-
-##### target_field [string]
-
-转换后的字段
-
-##### common options [string]
-
-`Filter` 插件通用参数,详情参照 [Filter Plugin](/zh-cn/v1/configuration/filter-plugin)
-
-
-### Examples
-
-```
-checksum {
- source_field = "deviceId"
- target_field = "device_crc32"
- method = "CRC32"
-}
-```
-
-> 获取`deviceId`字段CRC32校验码
diff --git a/docs/configuration/filter-plugins/Convert.docs b/docs/configuration/filter-plugins/Convert.docs
deleted file mode 100644
index 5ed40ec..0000000
--- a/docs/configuration/filter-plugins/Convert.docs
+++ /dev/null
@@ -1,10 +0,0 @@
-@seatunnelPlugin
-@pluginGroup filter
-@pluginName Convert
-@pluginDesc "对指定字段进行类型转换"
-@pluginAuthor InterestingLab
-@pluginHomepage https://interestinglab.github.io/seatunnel-docs
-@pluginVersion 1.0.0
-
-@pluginOption string source_field yes "源字段"
-@pluginOption string new_type yes "需要转换的结果类型"
diff --git a/docs/configuration/filter-plugins/Convert.md b/docs/configuration/filter-plugins/Convert.md
deleted file mode 100644
index 2ea4590..0000000
--- a/docs/configuration/filter-plugins/Convert.md
+++ /dev/null
@@ -1,44 +0,0 @@
-## Filter plugin : Convert
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.0.0
-
-### Description
-
-对指定字段进行类型转换
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [new_type](#new_type-string) | string | yes | - |
-| [source_field](#source_field-string) | string | yes | - |
-| [common-options](#common-options-string)| string | no | - |
-
-
-##### new_type [string]
-
-需要转换的结果类型,当前支持的类型有`string`、`integer`、`long`、`float`、`double`和`boolean`等
-
-##### source_field [string]
-
-源数据字段
-
-##### common options [string]
-
-`Filter` 插件通用参数,详情参照 [Filter Plugin](/zh-cn/v1/configuration/filter-plugin)
-
-
-### Examples
-
-```
-convert {
- source_field = "age"
- new_type = "integer"
-}
-```
-
-> 将源数据中的`age`字段转换为`integer`类型
-
-
diff --git a/docs/configuration/filter-plugins/Date.docs b/docs/configuration/filter-plugins/Date.docs
deleted file mode 100644
index 4918f25..0000000
--- a/docs/configuration/filter-plugins/Date.docs
+++ /dev/null
@@ -1,15 +0,0 @@
-@seatunnelPlugin
-@pluginGroup filter
-@pluginName Date
-@pluginDesc "对指定字段进行时间格式转换"
-@pluginAuthor InterestingLab
-@pluginHomepage https://interestinglab.github.io/seatunnel-docs
-@pluginVersion 1.0.0
-
-@pluginOption string source_field="__ROOT__" no "源字段,若不配置将使用当前时间"
-@pluginOption string target_field="datetime" no "目标字段,若不配置默认为'datetime'"
-@pluginOption string source_time_format="UNIX_MS" no "源字段时间格式,当前支持UNIX、UNIX_MS以及'SimpleDateFormat'格式"
-@pluginOption string target_time_format="yyyy/MM/dd HH:mm:ss" no "目标字段时间格式"
-@pluginOption string time_zone="" no "时区"
-@pluginOption string default_value="${now}" no "如果日期转换失败将会使用当前时间生成指定格式的值"
-@pluginOption string locale="Locale.US" no "编码类型"
diff --git a/docs/configuration/filter-plugins/Date.md b/docs/configuration/filter-plugins/Date.md
deleted file mode 100644
index 05fd9b3..0000000
--- a/docs/configuration/filter-plugins/Date.md
+++ /dev/null
@@ -1,92 +0,0 @@
-## Filter plugin : Date
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.0.0
-
-### Description
-
-对指定字段进行时间格式转换
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [default_value](#default_value-string) | string | no | ${now} |
-| [locale](#locale-string) | string | no | Locale.US |
-| [source_field](#source_field-string) | string | no | \_\_root\_\_ |
-| [source_time_format](#source_time_format-string) | string | no | UNIX_MS |
-| [target_field](#target_field-string) | string | no | datetime |
-| [target_time_format](#target_time_format-string) | string | no | `yyyy/MM/dd HH:mm:ss` |
-| [time_zone](#time_zone-string) | string | no | - |
-| [common-options](#common-options-string)| string | no | - |
-
-
-##### default_value [string]
-
-如果日期转换失败将会使用当前时间生成指定格式的时间
-
-##### locale [string]
-
-编码类型
-
-##### source_field [string]
-
-源字段,若不配置将使用当前时间
-
-##### source_time_format [string]
-
-源字段时间格式,当前支持UNIX(10位的秒时间戳)、UNIX_MS(13位的毫秒时间戳)以及`SimpleDateFormat`时间格式。常用的时间格式列举如下:
-
-| Symbol | Description |
-| --- | --- |
-| y | Year |
-| M | Month |
-| d | Day of month |
-| H | Hour in day (0-23) |
-| m | Minute in hour |
-| s | Second in minute |
-
-详细的时间格式语法见[Java SimpleDateFormat](https://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html)。
-
-
-##### target_field [string]
-
-目标字段,若不配置默认为`datetime`
-
-##### target_time_format [string]
-
-目标字段时间格式,详细的时间格式语法见[Java SimpleDateFormat](https://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html)。
-
-##### time_zone [string]
-
-时区
-
-##### common options [string]
-
-`Filter` 插件通用参数,详情参照 [Filter Plugin](/zh-cn/v1/configuration/filter-plugin)
-
-
-### Examples
-
-```
-date {
- source_field = "timestamp"
- target_field = "date"
- source_time_format = "UNIX"
- target_time_format = "yyyy/MM/dd"
-}
-```
-
-> 将源数据中的`timestamp`字段由UNIX时间戳,例如*1517128894*转换为`yyyy/MM/dd`格式的`date`字段,例如*2018/01/28*
-
-```
-date {
- source_field = "httpdate"
- target_field = "datetime"
- source_time_format = "dd/MMM/yyyy:HH:mm:ss Z"
- target_time_format = "yyyy/MM/dd HH:mm:ss"
-}
-```
-
-> 将源数据中的`httpdate`字段由`dd/MMM/yyyy:HH:mm:ss Z`格式转化为`yyyy/MM/dd HH:mm:ss`格式的`datetime`字段
diff --git a/docs/configuration/filter-plugins/Drop.docs b/docs/configuration/filter-plugins/Drop.docs
deleted file mode 100644
index 625c2ff..0000000
--- a/docs/configuration/filter-plugins/Drop.docs
+++ /dev/null
@@ -1,9 +0,0 @@
-@seatunnelPlugin
-@pluginGroup filter
-@pluginName Drop
-@pluginDesc "丢弃掉符合指定条件的Event"
-@pluginAuthor InterestingLab
-@pluginHomepage https://interestinglab.github.io/seatunnel-docs
-@pluginVersion 1.0.0
-
-@pluginOption string condition yes "条件表达式,符合此条件表达式的Event将被丢弃。条件表达式语法即sql中where条件中的条件表达式,如 `name = 'garyelephant'`, `status = '200' and resp_time > 100`"
diff --git a/docs/configuration/filter-plugins/Drop.md b/docs/configuration/filter-plugins/Drop.md
deleted file mode 100644
index e786bfc..0000000
--- a/docs/configuration/filter-plugins/Drop.md
+++ /dev/null
@@ -1,36 +0,0 @@
-## Filter plugin : Drop
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.0.0
-
-### Description
-
-丢弃掉符合指定条件的Row
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [condition](#condition-string) | string | yes | - |
-| [common-options](#common-options-string)| string | no | - |
-
-
-##### condition [string]
-
-条件表达式,符合此条件表达式的Row将被丢弃。条件表达式语法即sql中where条件中的条件表达式,如 `name = 'garyelephant'`, `status = '200' and resp_time > 100`
-
-##### common options [string]
-
-`Filter` 插件通用参数,详情参照 [Filter Plugin](/zh-cn/v1/configuration/filter-plugin)
-
-
-### Examples
-
-```
-drop {
- condition = "status = '200'"
-}
-```
-
-> 状态码为200的Row将被丢弃
diff --git a/docs/configuration/filter-plugins/Grok.docs b/docs/configuration/filter-plugins/Grok.docs
deleted file mode 100644
index 6ef0851..0000000
--- a/docs/configuration/filter-plugins/Grok.docs
+++ /dev/null
@@ -1,13 +0,0 @@
-@seatunnelPlugin
-@pluginGroup filter
-@pluginName Grok
-@pluginDesc "对指定字段进行正则解析"
-@pluginAuthor InterestingLab
-@pluginHomepage https://interestinglab.github.io/seatunnel-docs
-@pluginVersion 1.0.0
-
-@pluginOption string pattern="" yes "正则表达式"
-@pluginOption string patterns_dir="-" no "patterns文件路径"
-@pluginOption boolean named_captures_only="true" no "If true, only store named captures from grok."
-@pluginOption string source_field="raw_message" no "数据源字段"
-@pluginOption string target_field="__root__" no "目标字段"
diff --git a/docs/configuration/filter-plugins/Grok.md b/docs/configuration/filter-plugins/Grok.md
deleted file mode 100644
index 2ad3669..0000000
--- a/docs/configuration/filter-plugins/Grok.md
+++ /dev/null
@@ -1,81 +0,0 @@
-## Filter plugin : Grok
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.0.0
-
-### Description
-
-使用Grok Pattern来解析字段,[支持的grok pattern](https://github.com/InterestingLab/seatunnel/blob/master/plugins/grok/files/grok-patterns/grok-patterns),
-
-grok pattern[grok pattern 测试地址](https://grokdebug.herokuapp.com/)
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [named_captures_only](#named_captures_only-boolean) | boolean | no | true |
-| [pattern](#pattern-string) | string | yes | - |
-| [patterns_dir](#patterns_dir-string) | string | no | - |
-| [source_field](#source_field-string) | string | no | raw_message |
-| [target_field](#target_field-string) | string | no | __root__ |
-| [common-options](#common-options-string)| string | no | - |
-
-
-##### named_captures_only [boolean]
-
-If true, only store named captures from grok.
-
-##### pattern [string]
-
-用于处理数据的grok pattern.
-
-##### patterns_dir [string]
-
-patterns文件路径,可不填,seatunnel自带了丰富的[grok-patterns文件](https://github.com/InterestingLab/seatunnel/tree/master/plugins/grok/files/grok-patterns)
-
-##### source_field [string]
-
-数据源字段
-
-##### target_field [string]
-
-目标字段
-
-##### common options [string]
-
-`Filter` 插件通用参数,详情参照 [Filter Plugin](/zh-cn/v1/configuration/filter-plugin)
-
-
-### Example
-
-```
-grok {
- source_field = "raw_message"
- pattern = "%{WORD:name} is %{WORD:gender}, %{NUMBER:age} years old and weighs %{NUMBER:weight} kilograms"
- target_field = "info_detail"
-}
-```
-
-* **Input**
-
-```
-+----------------------------------------------------+
-|raw_message |
-+----------------------------------------------------+
-|gary is male, 25 years old and weighs 68.5 kilograms|
-|gary is male, 25 years old and weighs 68.5 kilograms|
-+----------------------------------------------------+
-```
-
-* **Output**
-
-```
-+----------------------------------------------------+------------------------------------------------------------+
-|raw_message |info_detail |
-+----------------------------------------------------+------------------------------------------------------------+
-|gary is male, 25 years old and weighs 68.5 kilograms|Map(age -> 25, gender -> male, name -> gary, weight -> 68.5)|
-|gary is male, 25 years old and weighs 68.5 kilograms|Map(age -> 25, gender -> male, name -> gary, weight -> 68.5)|
-+----------------------------------------------------+------------------------------------------------------------+
-
-```
diff --git a/docs/configuration/filter-plugins/Join.md b/docs/configuration/filter-plugins/Join.md
deleted file mode 100644
index dec987e..0000000
--- a/docs/configuration/filter-plugins/Join.md
+++ /dev/null
@@ -1,62 +0,0 @@
-## Filter plugin : Join
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.3.0
-
-### Description
-
-和指定的临时表进行Join操作, 目前仅支持Stream-static Inner Joins
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [source_field](#source_field-string) | string | no | raw_message |
-| [table_name](#table_name-string) | string | yes | - |
-| [common-options](#common-options-string)| string | no | - |
-
-
-##### source_field [string]
-
-源字段,若不配置默认为`raw_message`
-
-##### table_name [string]
-
-临时表表名
-
-##### common options [string]
-
-`Filter` 插件通用参数,详情参照 [Filter Plugin](/zh-cn/v1/configuration/filter-plugin)
-
-
-### Examples
-
-```
-input {
- fakestream {
- content = ["Hello World,seatunnel"]
- rate = 1
- }
-
- mysql {
- url = "jdbc:mysql://localhost:3306/info"
- table = "project_info"
- table_name = "spark_project_info"
- user = "username"
- password = "password"
- }
-}
-
-filter {
- split {
- fields = ["msg", "project"]
- delimiter = ","
- }
-
- join {
- table_name = "spark_project_info"
- source_field = "project"
- }
-}
-```
diff --git a/docs/configuration/filter-plugins/Json.docs b/docs/configuration/filter-plugins/Json.docs
deleted file mode 100644
index d7b4339..0000000
--- a/docs/configuration/filter-plugins/Json.docs
+++ /dev/null
@@ -1,12 +0,0 @@
-@seatunnelPlugin
-@pluginGroup filter
-@pluginName Json
-@pluginDesc "对原始数据集指定字段进行Json解析"
-@pluginAuthor InterestingLab
-@pluginHomepage https://interestinglab.github.io/seatunnel-docs
-@pluginVersion 1.0.0
-
-@pluginOption string source_field="raw_message" no "源字段,若不配置默认为'raw_message'"
-@pluginOption string target_field="__root__" no "目标字段,若不配置默认为'__root__'"
-@pluginOption string schema_dir="-" no "json schema文件夹路径"
-@pluginOption string schema_file="-" no "json schema文件名"
diff --git a/docs/configuration/filter-plugins/Json.md b/docs/configuration/filter-plugins/Json.md
deleted file mode 100644
index 9048024..0000000
--- a/docs/configuration/filter-plugins/Json.md
+++ /dev/null
@@ -1,197 +0,0 @@
-## Filter plugin : Json
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.0.0
-
-### Description
-
-对原始数据集指定字段进行Json解析
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [source_field](#source_field-string) | string | no | raw_message |
-| [target_field](#target_field-string) | string | no | \_\_root\_\_ |
-| [schema_dir](#schema_dir-string) | string | no | - |
-| [schema_file](#schema_file-string) | string | no | - |
-| [common-options](#common-options-string)| string | no | - |
-
-
-##### source_field [string]
-
-源字段,若不配置默认为`raw_message`
-
-##### target_field [string]
-
-目标字段,若不配置默认为`__root__`,Json解析后的结果将统一放置Dataframe最顶层
-
-##### schema_dir [string]
-
-样式目录,若不配置默认为`$seatunnelRoot/plugins/json/files/schemas/`
-
-##### schema_file [string]
-
-样式文件名,若不配置默认为空,即不指定结构,由系统根据数据源输入自行推导。
-
-##### common options [string]
-
-`Filter` 插件通用参数,详情参照 [Filter Plugin](/zh-cn/v1/configuration/filter-plugin)
-
-
-### Use cases
-
-1. `json schema` **使用场景**
-
-单个任务的数据源中可能包含不同样式的 json 数据,比如来自 kafka 的 topicA 样式为
-
-```json
-{
- "A": "a_val",
- "B": "b_val"
-}
-```
-
-来自 topicB 样式为
-
-```json
-{
- "C": "c_val",
- "D": "d_val"
-}
-```
-
-运行 filter 时需要将 topicA 和 topicB 的数据融合在一张宽表中进行计算。则可指定一份 schema,其内容样式为:
-```json
-{
- "A": "a_val",
- "B": "b_val",
- "C": "c_val",
- "D": "d_val"
-}
-```
-
-则 topicA 和 topicB 的融合输出结果为:
-
-```
-+-----+-----+-----+-----+
-|A |B |C |D |
-+-----+-----+-----+-----+
-|a_val|b_val|null |null |
-|null |null |c_val|d_val|
-+-----+-----+-----+-----+
-```
-
-### Examples
-
-1. 不使用 `target_field`
-
-```
-json {
- source_field = "message"
-}
-```
-
-* **Input**
-
-```
-+----------------------------+
-|message |
-+----------------------------+
-|{"name": "ricky", "age": 24}|
-|{"name": "gary", "age": 28} |
-+----------------------------+
-```
-
-* **Output**
-
-```
-+----------------------------+---+-----+
-|message |age|name |
-+----------------------------+---+-----+
-|{"name": "gary", "age": 28} |28 |gary |
-|{"name": "ricky", "age": 23}|23 |ricky|
-+----------------------------+---+-----+
-```
-
-2. 使用 `target_field`
-
-使用 `target_field` 会将解析后的嵌套结果存储在指定字段中。
-
-```
-json {
- source_field = "message"
- target_field = "info"
- result_table_name = "view_1"
-}
-```
-
-* **Input**
-
-```
-+----------------------------+
-|message |
-+----------------------------+
-|{"name": "ricky", "age": 24}|
-|{"name": "gary", "age": 28} |
-+----------------------------+
-```
-
-* **Output**
-
-```
-+----------------------------+----------+
-|message |info |
-+----------------------------+----------+
-|{"name": "gary", "age": 28} |[28,gary] |
-|{"name": "ricky", "age": 23}|[23,ricky]|
-+----------------------------+----------+
-
-```
-
-> json处理的结果支持**select * from view_1 where info.age = 23**此类SQL语句
-
-3. 使用`schema_file`
-
-```
-json {
- source_field = "message"
- schema_file = "demo.json"
-}
-```
-
-* **Schema**
-
-在 Driver Node 的 `/opt/seatunnel/plugins/json/files/schemas/demo.json` 中放置内容如下:
-
-```json
-{
- "name": "demo",
- "age": 24,
- "city": "LA"
-}
-```
-
-* **Input**
-```
-+----------------------------+
-|message |
-+----------------------------+
-|{"name": "ricky", "age": 24}|
-|{"name": "gary", "age": 28} |
-+----------------------------+
-```
-
-* **Output**
-
-```
-+----------------------------+---+-----+-----+
-|message |age|name |city |
-+----------------------------+---+-----+-----+
-|{"name": "gary", "age": 28} |28 |gary |null |
-|{"name": "ricky", "age": 23}|23 |ricky|null |
-+----------------------------+---+-----+-----+
-```
-
-> 若使用 cluster 模式进行部署,需确保 json schemas 目录被打包到 plugins.tar.gz 中
diff --git a/docs/configuration/filter-plugins/Kv.docs b/docs/configuration/filter-plugins/Kv.docs
deleted file mode 100644
index e85da26..0000000
--- a/docs/configuration/filter-plugins/Kv.docs
+++ /dev/null
@@ -1,15 +0,0 @@
-@seatunnelPlugin
-@pluginGroup filter
-@pluginName Kv
-@pluginDesc "提取指定字段所有的Key-Value, 常用于解析url参数中的key和value"
-@pluginAuthor InterestingLab
-@pluginHomepage https://interestinglab.github.io/seatunnel-docs
-@pluginVersion 1.0.0
-
-@pluginOption string field_split="&" no "字段分隔符"
-@pluginOption string value_split="=" no "字段值分隔符"
-@pluginOption string field_prefix="" no "字段指定前缀"
-@pluginOption string include_fields="[]" no "需要包括的字段"
-@pluginOption string exclude_fields="[]" no "不需要包括的字段"
-@pluginOption string source_field="raw_message" no "源字段,若不配置默认为'raw_message'"
-@pluginOption string target_field="\_\_root\_\_" no "目标字段,若不配置默认为'\_\_root\_\_'"
diff --git a/docs/configuration/filter-plugins/Kv.md b/docs/configuration/filter-plugins/Kv.md
deleted file mode 100644
index a24ba9e..0000000
--- a/docs/configuration/filter-plugins/Kv.md
+++ /dev/null
@@ -1,133 +0,0 @@
-## Filter plugin : Kv
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.0.0
-
-### Description
-
-提取指定字段所有的Key-Value, 常用于解析url参数中的key和value
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [default_values](#default_values-array) | array | no | [] |
-| [exclude_fields](#exclude_fields-array) | array | no | [] |
-| [field_prefix](#field_prefix-string) | string | no | |
-| [field_split](#field_split-string) | string | no | & |
-| [include_fields](#include_fields-array) | array | no | [] |
-| [source_field](#source_field-string) | string | no | raw_message |
-| [target_field](#target_field-string) | string | no | \_\_root\_\_ |
-| [value_split](#value_split-string) | string | no | = |
-| [common-options](#common-options-string)| string | no | - |
-
-
-##### default_values [array]
-
-指定kv默认值,格式为 `key=defalut_value`, key与value之间使用`=`分割,可以指定多个,举例:
-
-`default_values = ["mykey1=123", "mykey2=seatunnel"]`
-
-##### exclude_fields [array]
-
-不需要包括的字段
-
-##### field_prefix [string]
-
-字段指定前缀
-
-##### field_split [string]
-
-字段分隔符
-
-##### include_fields [array]
-
-需要包括的字段
-
-##### source_field [string]
-
-源字段,若不配置默认为`raw_message`
-
-##### target_field [string]
-
-目标字段,若不配置默认为`__root__`
-
-##### value_split [string]
-
-字段值分隔符
-
-##### common options [string]
-
-`Filter` 插件通用参数,详情参照 [Filter Plugin](/zh-cn/v1/configuration/filter-plugin)
-
-
-### Examples
-
-1. 使用`target_field`
-
- ```
- kv {
- source_field = "message"
- target_field = "kv_map"
- field_split = "&"
- value_split = "="
- }
- ```
-
- * **Input**
-
- ```
- +-----------------+
- |message |
- +-----------------+
- |name=ricky&age=23|
- |name=gary&age=28 |
- +-----------------+
- ```
-
- * **Output**
-
- ```
- +-----------------+-----------------------------+
- |message |kv_map |
- +-----------------+-----------------------------+
- |name=ricky&age=23|Map(name -> ricky, age -> 23)|
- |name=gary&age=28 |Map(name -> gary, age -> 28) |
- +-----------------+-----------------------------+
- ```
-
- > kv处理的结果支持**select * from where kv_map.age = 23**此类SQL语句
-
-2. 不使用`target_field`
-
- ```
- kv {
- source_field = "message"
- field_split = "&"
- value_split = "="
- }
- ```
-
- * **Input**
-
- ```
- +-----------------+
- |message |
- +-----------------+
- |name=ricky&age=23|
- |name=gary&age=28 |
- +-----------------+
- ```
-
- * **Output**
-
- ```
- +-----------------+---+-----+
- |message |age|name |
- +-----------------+---+-----+
- |name=ricky&age=23|23 |ricky|
- |name=gary&age=28 |28 |gary |
- +-----------------+---+-----+
-
- ```
diff --git a/docs/configuration/filter-plugins/Lowercase.docs b/docs/configuration/filter-plugins/Lowercase.docs
deleted file mode 100644
index 1b8d6bc..0000000
--- a/docs/configuration/filter-plugins/Lowercase.docs
+++ /dev/null
@@ -1,10 +0,0 @@
-@seatunnelPlugin
-@pluginGroup filter
-@pluginName Lowercase
-@pluginDesc "将指定字段内容全部转换为小写字母"
-@pluginAuthor InterestingLab
-@pluginHomepage https://interestinglab.github.io/seatunnel-docs
-@pluginVersion 1.0.0
-
-@pluginOption string source_field="raw_message" no "源字段,若不配置默认为'raw_message'"
-@pluginOption string target_field="lowercased" no "目标字段,若不配置默认为'lowercased'"
diff --git a/docs/configuration/filter-plugins/Lowercase.md b/docs/configuration/filter-plugins/Lowercase.md
deleted file mode 100644
index ae182eb..0000000
--- a/docs/configuration/filter-plugins/Lowercase.md
+++ /dev/null
@@ -1,40 +0,0 @@
-## Filter plugin : Lowercase
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.0.0
-
-### Description
-
-将指定字段内容全部转换为小写字母
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [source_field](#source_field-string) | string | no | raw_message |
-| [target_field](#target_field-string) | string | no | lowercased |
-| [common-options](#common-options-string)| string | no | - |
-
-
-##### source_field [string]
-
-源字段,若不配置默认为`raw_message`
-
-##### target_field [string]
-
-目标字段,若不配置默认为`lowercased`
-
-##### common options [string]
-
-`Filter` 插件通用参数,详情参照 [Filter Plugin](/zh-cn/v1/configuration/filter-plugin)
-
-
-# Examples
-
-```
-lowercase {
- source_field = "address"
- target_field = "address_lowercased"
-}
-```
diff --git a/docs/configuration/filter-plugins/Remove.docs b/docs/configuration/filter-plugins/Remove.docs
deleted file mode 100644
index d80574e..0000000
--- a/docs/configuration/filter-plugins/Remove.docs
+++ /dev/null
@@ -1,9 +0,0 @@
-@seatunnelPlugin
-@pluginGroup filter
-@pluginName Remove
-@pluginDesc "删除数据中的字段"
-@pluginAuthor InterestingLab
-@pluginHomepage https://interestinglab.github.io/seatunnel-docs
-@pluginVersion 1.0.0
-
-@pluginOption array source_field yes "需要删除的字段列表"
diff --git a/docs/configuration/filter-plugins/Remove.md b/docs/configuration/filter-plugins/Remove.md
deleted file mode 100644
index fa1b051..0000000
--- a/docs/configuration/filter-plugins/Remove.md
+++ /dev/null
@@ -1,36 +0,0 @@
-## Filter plugin : Remove
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.0.0
-
-### Description
-
-删除数据中的字段
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [source_field](#source_field-array) | array | yes | - |
-| [common-options](#common-options-string)| string | no | - |
-
-
-##### source_field [array]
-
-需要删除的字段列表
-
-##### common options [string]
-
-`Filter` 插件通用参数,详情参照 [Filter Plugin](/zh-cn/v1/configuration/filter-plugin)
-
-
-### Examples
-
-```
-remove {
- source_field = ["field1", "field2"]
-}
-```
-
-> 删除原始数据中的`field1`和`field2`字段
diff --git a/docs/configuration/filter-plugins/Rename.docs b/docs/configuration/filter-plugins/Rename.docs
deleted file mode 100644
index 8a222d3..0000000
--- a/docs/configuration/filter-plugins/Rename.docs
+++ /dev/null
@@ -1,10 +0,0 @@
-@seatunnelPlugin
-@pluginGroup filter
-@pluginName Remove
-@pluginDesc "重命名数据中的字段"
-@pluginAuthor InterestingLab
-@pluginHomepage https://interestinglab.github.io/seatunnel-docs
-@pluginVersion 1.0.0
-
-@pluginOption array source_field yes "需要重命名的字段"
-@pluginOption array target_field yes "变更之后的字段名"
diff --git a/docs/configuration/filter-plugins/Rename.md b/docs/configuration/filter-plugins/Rename.md
deleted file mode 100644
index eaf0b7e..0000000
--- a/docs/configuration/filter-plugins/Rename.md
+++ /dev/null
@@ -1,42 +0,0 @@
-## Filter plugin : Remove
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.0.0
-
-### Description
-
-重命名数据中的字段
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [source_field](#source_field-string) | array | yes | - |
-| [target_field](#target_field-string) | array | yes | - |
-| [common-options](#common-options-string)| string | no | - |
-
-
-##### source_field [string]
-
-需要重命名的字段
-
-##### target_field [string]
-
-变更之后的字段名
-
-##### common options [string]
-
-`Filter` 插件通用参数,详情参照 [Filter Plugin](/zh-cn/v1/configuration/filter-plugin)
-
-
-### Examples
-
-```
-rename {
- source_field = "field1"
- target_field = "field2"
-}
-```
-
-> 将原始数据中的`field1`字段重命名为`field2`字段
diff --git a/docs/configuration/filter-plugins/Repartition.docs b/docs/configuration/filter-plugins/Repartition.docs
deleted file mode 100644
index ad5a715..0000000
--- a/docs/configuration/filter-plugins/Repartition.docs
+++ /dev/null
@@ -1,9 +0,0 @@
-@seatunnelPlugin
-@pluginGroup filter
-@pluginName Repartition
-@pluginDesc "重新给Dataframe分区"
-@pluginAuthor InterestingLab
-@pluginHomepage https://interestinglab.github.io/seatunnel-docs
-@pluginVersion 1.0.0
-
-@pluginOption number num_partitions="-" yes "分区个数"
diff --git a/docs/configuration/filter-plugins/Repartition.md b/docs/configuration/filter-plugins/Repartition.md
deleted file mode 100644
index d8a6096..0000000
--- a/docs/configuration/filter-plugins/Repartition.md
+++ /dev/null
@@ -1,34 +0,0 @@
-## Filter plugin : Repartition
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.0.0
-
-### Description
-
-调整数据处理的分区个数,并行度。这个filter主要是为了调节数据处理性能,不对数据本身做任何处理。
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [num_partitions](#num_partitions-number) | number | yes | - |
-| [common-options](#common-options-string)| string | no | - |
-
-
-##### num_partitions [number]
-
-目标分区个数
-
-##### common options [string]
-
-`Filter` 插件通用参数,详情参照 [Filter Plugin](/zh-cn/v1/configuration/filter-plugin)
-
-
-### Examples
-
-```
-repartition {
- num_partitions = 8
-}
-```
diff --git a/docs/configuration/filter-plugins/Replace.docs b/docs/configuration/filter-plugins/Replace.docs
deleted file mode 100644
index 102f7eb..0000000
--- a/docs/configuration/filter-plugins/Replace.docs
+++ /dev/null
@@ -1,12 +0,0 @@
-@seatunnelPlugin
-@pluginGroup filter
-@pluginName Lowercase
-@pluginDesc "将指定字段内容根据正则表达式进行替换"
-@pluginAuthor InterestingLab
-@pluginHomepage https://interestinglab.github.io/seatunnel-docs
-@pluginVersion 1.0.0
-
-@pluginOption string source_field="raw_message" no "源字段,若不配置默认为'raw_message'"
-@pluginOption string target_field="replaced" no "目标字段,若不配置默认为'replaced'"
-@pluginOption string pattern="-" yes "正则表达式"
-@pluginOption string replacement="-" yes "替换的字符串"
diff --git a/docs/configuration/filter-plugins/Replace.md b/docs/configuration/filter-plugins/Replace.md
deleted file mode 100644
index b7ca9d4..0000000
--- a/docs/configuration/filter-plugins/Replace.md
+++ /dev/null
@@ -1,54 +0,0 @@
-## Filter plugin : Replace
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.0.0
-
-### Description
-
-将指定字段内容根据正则表达式进行替换
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [pattern](#pattern-string) | string | yes | - |
-| [replacement](#replacement-string) | string | yes | - |
-| [source_field](#source_field-string) | string | no | raw_message |
-| [target_field](#target_field-string) | string | no | replaced |
-| [common-options](#common-options-string)| string | no | - |
-
-
-##### pattern [string]
-
-用于做匹配的正则表达式。常见的书写方式如 `"[a-zA-Z0-9_-]+"`, 详见[Regex Pattern](https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html)。
-也可以到这里测试正则表达式是正确:[Regex 101](https://regex101.com/)
-
-##### replacement [string]
-
-替换的字符串
-
-##### source_field [string]
-
-源字段,若不配置默认为`raw_message`
-
-##### target_field [string]
-
-目标字段,若不配置默认为`replaced`
-
-##### common options [string]
-
-`Filter` 插件通用参数,详情参照 [Filter Plugin](/zh-cn/v1/configuration/filter-plugin)
-
-
-### Examples
-
-```
-replace {
- target_field = "tmp"
- source_field = "message"
- pattern = "is"
- replacement = "are"
-}
-```
-> 将`message`中的**is**替换为**are**,并赋值给`tmp`
diff --git a/docs/configuration/filter-plugins/Sample.docs b/docs/configuration/filter-plugins/Sample.docs
deleted file mode 100644
index 5dad7ac..0000000
--- a/docs/configuration/filter-plugins/Sample.docs
+++ /dev/null
@@ -1,10 +0,0 @@
-@seatunnelPlugin
-@pluginGroup filter
-@pluginName Sample
-@pluginDesc "对原始数据集进行抽样"
-@pluginAuthor InterestingLab
-@pluginHomepage https://interestinglab.github.io/seatunnel-docs
-@pluginVersion 1.0.0
-
-@pluginOption number fraction="0.1" no "数据采样的比例,例如fraction=0.8,就是抽取其中80%的数据"
-@pluginOption number limit="-1" no "数据采样后的条数,其中`-1`代表不限制"
diff --git a/docs/configuration/filter-plugins/Sample.md b/docs/configuration/filter-plugins/Sample.md
deleted file mode 100644
index 399d11f..0000000
--- a/docs/configuration/filter-plugins/Sample.md
+++ /dev/null
@@ -1,41 +0,0 @@
-## Filter plugin : Sample
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.0.0
-
-### Description
-
-对原始数据集进行抽样
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [fraction](#fraction-number) | number | no | 0.1 |
-| [limit](#limit-number) | number | no | -1 |
-| [common-options](#common-options-string)| string | no | - |
-
-
-##### fraction [number]
-
-数据采样的比例,例如fraction=0.8,就是抽取其中80%的数据
-
-##### limit [number]
-
-数据采样后的条数,其中`-1`代表不限制
-
-##### common options [string]
-
-`Filter` 插件通用参数,详情参照 [Filter Plugin](/zh-cn/v1/configuration/filter-plugin)
-
-
-### Examples
-
-```
-sample {
- fraction = 0.8
-}
-```
-
-> 抽取80%的数据
diff --git a/docs/configuration/filter-plugins/Script.docs b/docs/configuration/filter-plugins/Script.docs
deleted file mode 100644
index 1138cfb..0000000
--- a/docs/configuration/filter-plugins/Script.docs
+++ /dev/null
@@ -1,14 +0,0 @@
-@seatunnelPlugin
-@pluginGroup filter
-@pluginName Script
-@pluginDesc "解析并执行自定义脚本中逻辑"
-@pluginAuthor InterestingLab
-@pluginHomepage https://interestinglab.github.io/seatunnel-docs
-@pluginVersion 1.0.0
-
-@pluginOption string object_name="event" no "脚本内置JSONObject的引用名,不设置默认为'event'"
-@pluginOption string script_name yes "脚本名称"
-@pluginOption boolean errorList no "是否需要输出的错误信息List"
-@pluginOption boolean isCache no "是否使用Cache中的指令集"
-@pluginOption boolean isTrace no "是否输出所有的跟踪信息,同时还需要log级别是DEBUG级"
-@pluginOption boolean isPrecise no "是否需要高精度的计算"
diff --git a/docs/configuration/filter-plugins/Script.md b/docs/configuration/filter-plugins/Script.md
deleted file mode 100644
index 82bc922..0000000
--- a/docs/configuration/filter-plugins/Script.md
+++ /dev/null
@@ -1,79 +0,0 @@
-## Filter plugin : Script
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.1.1
-
-### Description
-
-解析并执行自定义脚本中逻辑, 即接受`object_name`(默认是event) 指定的JSONObject,
-完成自定义的处理逻辑,再返回一个新的event.
-
-脚本解析引擎的实现,采用的是[QLExpress](https://github.com/alibaba/QLExpress),
-具体语法可参考[QLExpress 语法](https://github.com/alibaba/QLExpress#%E4%B8%89%E8%AF%AD%E6%B3%95%E4%BB%8B%E7%BB%8D).
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [object_name](#object_name-string) | string | no | events |
-| [script_name](#script_name-string) | string | yes | - |
-| [errorList](#errorList-boolean) | boolean | no | false |
-| [isCache](#isCache-boolean) | boolean | no | false |
-| [isTrace](#isTrace-boolean) | boolean | no | false |
-| [isPrecise](#isPrecise-boolean) | boolean | no | false |
-| [common-options](#common-options-string)| string | no | - |
-
-
-##### object_name [string]
-
-脚本内置JSONObject的引用名, 不设置默认为'event'
-
-##### script_name [string]
-
-需要执行脚本的文件名称, 注意脚本文件必须放到`plugins/script/files`目录下面.
-
-##### errorList [boolean]
-
-输出的错误信息List
-
-##### isCache [boolean]
-
-是否使用Cache中的指令集
-
-##### isTrace [boolean]
-
-是否输出所有的跟踪信息,同时还需要log级别是DEBUG级
-
-##### isPrecise [boolean]
-
-是否需要高精度的计算
-
-##### common options [string]
-
-`Filter` 插件通用参数,详情参照 [Filter Plugin](/zh-cn/v1/configuration/filter-plugin)
-
-
-### Examples
-
-* conf文件插件配置
-
-```
- script {
- script_name = "my_script.ql"
- }
-```
-
-* 自定义脚本(my_script.ql)
-
-```
-newEvent = new java.util.HashMap();
-you = event.getString("name");
-age = event.getLong("age");
-if(age > 10){
-newEvent.put("name",you);
-}
-return newEvent;
-```
-
-> 如果age大于10,则获取name放入map中并返回
diff --git a/docs/configuration/filter-plugins/Split.docs b/docs/configuration/filter-plugins/Split.docs
deleted file mode 100644
index 01def7c..0000000
--- a/docs/configuration/filter-plugins/Split.docs
+++ /dev/null
@@ -1,12 +0,0 @@
-@seatunnelPlugin
-@pluginGroup filter
-@pluginName Split
-@pluginDesc "根据delimiter对字符串拆分"
-@pluginAuthor InterestingLab
-@pluginHomepage https://interestinglab.github.io/seatunnel-docs
-@pluginVersion 1.0.0
-
-@pluginOption string source_field="raw_message" no "源字段,若不配置默认为'raw_message'"
-@pluginOption string target_field="_ROOT_" no "目标字段,若不配置默认为'ROOT'"
-@pluginOption string delimiter yes "分隔符"
-@pluginOption list fields yes "分割后的字段"
diff --git a/docs/configuration/filter-plugins/Split.md b/docs/configuration/filter-plugins/Split.md
deleted file mode 100644
index 48c5077..0000000
--- a/docs/configuration/filter-plugins/Split.md
+++ /dev/null
@@ -1,66 +0,0 @@
-## Filter plugin : Split
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.0.0
-
-### Description
-
-根据delimiter分割字符串。
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [delimiter](#delimiter-string) | string | no | " "(空格) |
-| [fields](#fields-array) | array | yes | - |
-| [source_field](#source_field-string) | string | no | raw_message |
-| [target_field](#target_field-string) | string | no | _root_ |
-| [common-options](#common-options-string)| string | no | - |
-
-
-##### delimiter [string]
-
-分隔符,根据分隔符对输入字符串进行分隔操作,默认分隔符为一个空格(" ")。
-
-##### fields [list]
-
-分割后的字段名称列表,按照顺序指定被分割后的各个字符串的字段名称。
-若`fields`长度大于分隔结果长度,则多余字段赋值为空字符。
-
-##### source_field [string]
-
-被分割前的字符串来源字段,若不配置默认为`raw_message`
-
-##### target_field [string]
-
-`target_field` 可以指定被分割后的多个字段被添加到Event的位置,若不配置默认为`_root_`,即将所有分割后的字段,添加到Event最顶级。
-如果指定了特定的字段,则被分割后的字段将被添加到这个字段的下面一级。
-
-##### common options [string]
-
-`Filter` 插件通用参数,详情参照 [Filter Plugin](/zh-cn/v1/configuration/filter-plugin)
-
-
-### Examples
-
-```
-split {
- source_field = "message"
- delimiter = "&"
- fields = ["field1", "field2"]
-}
-```
-
-> 将源数据中的`message`字段根据**&**进行分割,可以以`field1`或`field2`为key获取相应value
-
-```
-split {
- source_field = "message"
- target_field = "info"
- delimiter = ","
- fields = ["field1", "field2"]
-}
-```
-
-> 将源数据中的`message`字段根据**,**进行分割,分割后的字段为`info`,可以以`info.field1`或`info.field2`为key获取相应value
diff --git a/docs/configuration/filter-plugins/Sql.docs b/docs/configuration/filter-plugins/Sql.docs
deleted file mode 100644
index 5a7b68f..0000000
--- a/docs/configuration/filter-plugins/Sql.docs
+++ /dev/null
@@ -1,10 +0,0 @@
-@seatunnelPlugin
-@pluginGroup filter
-@pluginName Sql
-@pluginDesc "在原始数据集Dataframe的基础上执行SQL"
-@pluginAuthor InterestingLab
-@pluginHomepage https://interestinglab.github.io/seatunnel-docs
-@pluginVersion 1.0.0
-
-@pluginOption string table="-" no "表名,可为任意字符串"
-@pluginOption string sql="-" yes "SQL语句"
diff --git a/docs/configuration/filter-plugins/Sql.md b/docs/configuration/filter-plugins/Sql.md
deleted file mode 100644
index b3341ab..0000000
--- a/docs/configuration/filter-plugins/Sql.md
+++ /dev/null
@@ -1,58 +0,0 @@
-## Filter plugin : Sql
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.0.0
-
-### Description
-
-使用SQL处理数据,支持Spark丰富的[UDF函数](http://spark.apache.org/docs/latest/api/sql/)
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [sql](#sql-string) | string | yes | - |
-| [table_name](#table_name-string) | string | no | - |
-| [common-options](#common-options-string)| string | no | - |
-
-
-##### sql [string]
-
-SQL语句,SQL中使用的表名为 `Input` 或 `Filter` 插件中配置的 `result_table_name`
-
-##### table_name [string]
-
-**\[从v1.4开始废弃\]**,后续 Release 版本中将删除此参数
-
-##### common options [string]
-
-`Filter` 插件通用参数,详情参照 [Filter Plugin](/zh-cn/v1/configuration/filter-plugin)
-
-
-### Examples
-
-```
-sql {
- sql = "select username, address from user_info",
-}
-```
-
-> 仅保留`username`和`address`字段,将丢弃其余字段。`user_info` 为之前插件配置的 `result_table_name`
-
-```
-sql {
- sql = "select substring(telephone, 0, 10) from user_info",
-}
-```
-
-> 使用[substring functions](http://spark.apache.org/docs/latest/api/sql/#substring)对`telephone`字段进行截取操作
-
-```
-sql {
- sql = "select avg(age) from user_info",
- table_name = "user_info"
-}
-```
-
-> 使用[avg functions](http://spark.apache.org/docs/latest/api/sql/#avg)对原始数据集进行聚合操作,取出`age`平均值
diff --git a/docs/configuration/filter-plugins/Table.docs b/docs/configuration/filter-plugins/Table.docs
deleted file mode 100644
index e3dee9a..0000000
--- a/docs/configuration/filter-plugins/Table.docs
+++ /dev/null
@@ -1,14 +0,0 @@
-@seatunnelPlugin
-@pluginGroup filter
-@pluginName Table
-@pluginDesc "Table 用于将静态文件映射为一张表,可与实时处理的流进行关联,常用于用户昵称,国家省市等字典表关联"
-@pluginAuthor InterestingLab
-@pluginHomepage https://interestinglab.github.io/seatunnel-docs
-@pluginVersion 1.0.0
-
-@pluginOption string path yes "Hadoop支持的文件路径(默认hdfs路径, 如/path/to/file), 如本地文件:file:///path/to/file, hdfs:///path/to/file, s3:///path/to/file ..."
-@pluginOption string delimiter="," no "文件中列与列之间的分隔符"
-@pluginOption string table_name yes "将文件载入后将注册为一张表,这里指定的是表名称,可用于在SQL中直接与流处理数据关联"
-@pluginOption array fields yes "文件中,每行中各个列的名称,按照数据中实际列顺序提供"
-@pluginOption array field_types no "每个列的类型,顺序与个数必须与`fields`参数一一对应, 不指定此参数,默认所有列的类型为字符串; 支持的数据类型包括:boolean, double, long, string"
-@pluginOption boolean cache="true" no "是否内存中缓存文件内容,true表示缓存,false表示每次需要时重新加载"
diff --git a/docs/configuration/filter-plugins/Table.md b/docs/configuration/filter-plugins/Table.md
deleted file mode 100644
index 6992051..0000000
--- a/docs/configuration/filter-plugins/Table.md
+++ /dev/null
@@ -1,74 +0,0 @@
-## Filter plugin : Table
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.0.0
-
-### Description
-
-Table 用于将静态文件映射为一张表,可与实时处理的流进行关联,常用于用户昵称,国家省市等字典表关联
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [cache](#cache-boolean) | boolean | no | true |
-| [delimiter](#delimiter-string) | string | no | , |
-| [field_types](#field_types-array) | array | no | - |
-| [fields](#fields-array) | array | yes | - |
-| [path](#path-string) | string | yes | - |
-| [table_name](#table_name-string) | string | yes | - |
-| [common-options](#common-options-string)| string | no | - |
-
-
-##### cache [boolean]
-
-是否内存中缓存文件内容,true表示缓存,false表示每次需要时重新加载
-
-##### delimiter [string]
-
-文件中列与列之间的分隔符
-
-##### field_types [array]
-
-每个列的类型,顺序与个数必须与`fields`参数一一对应, 不指定此参数,默认所有列的类型为字符串; 支持的数据类型包括:boolean, double, long, string
-
-##### fields [array]
-
-文件中,每行中各个列的名称,按照数据中实际列顺序提供
-
-##### path [string]
-
-Hadoop支持的文件路径(默认hdfs路径, 如/path/to/file), 如本地文件:file:///path/to/file, hdfs:///path/to/file, s3:///path/to/file ...
-
-##### table_name [string]
-
-将文件载入后将注册为一张表,这里指定的是表名称,可用于在SQL中直接与流处理数据关联
-
-##### common options [string]
-
-`Filter` 插件通用参数,详情参照 [Filter Plugin](/zh-cn/v1/configuration/filter-plugin)
-
-
-### Example
-
-> 不指定列的类型,默认为string
-
-```
-table {
- table_name = "mydict"
- path = "/user/seatunnel/mylog/a.txt"
- fields = ['city', 'population']
-}
-```
-
-> 指定列的类型
-
-```
-table {
- table_name = "mydict"
- path = "/user/seatunnel/mylog/a.txt"
- fields = ['city', 'population']
- field_types = ['string', 'long']
-}
-```
diff --git a/docs/configuration/filter-plugins/Truncate.docs b/docs/configuration/filter-plugins/Truncate.docs
deleted file mode 100644
index 76d983c..0000000
--- a/docs/configuration/filter-plugins/Truncate.docs
+++ /dev/null
@@ -1,11 +0,0 @@
-@seatunnelPlugin
-@pluginGroup filter
-@pluginName Truncate
-@pluginDesc "对指定字段进行字符串截取"
-@pluginAuthor InterestingLab
-@pluginHomepage https://interestinglab.github.io/seatunnel-docs
-@pluginVersion 1.0.0
-
-@pluginOption string source_field="raw_message" no "源字段,若不配置默认为'raw_message'"
-@pluginOption string target_field="truncated" no "目标字段,若不配置默认为'\_\_root\_\_'"
-@pluginOption number max_length="256" no "截取字符串的最大长度"
diff --git a/docs/configuration/filter-plugins/Truncate.md b/docs/configuration/filter-plugins/Truncate.md
deleted file mode 100644
index 403e155..0000000
--- a/docs/configuration/filter-plugins/Truncate.md
+++ /dev/null
@@ -1,45 +0,0 @@
-## Filter plugin : Truncate
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.0.0
-
-### Description
-
-对指定字段进行字符串截取
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [max_length](#max_length-number) | number | no | 256 |
-| [source_field](#source_field-string) | string | no | raw_message |
-| [target_field](#target_field-string) | string | no | truncated |
-| [common-options](#common-options-string)| string | no | - |
-
-
-##### max_length [number]
-
-截取字符串的最大长度
-
-##### source_field [string]
-
-源字段,若不配置默认为`raw_message`
-
-##### target_field [string]
-
-目标字段,若不配置默认为`__root__`
-
-##### common options [string]
-
-`Filter` 插件通用参数,详情参照 [Filter Plugin](/zh-cn/v1/configuration/filter-plugin)
-
-
-### Example
-
-```
-truncate {
- source_field = "telephone"
- max_length = 10
-}
-```
diff --git a/docs/configuration/filter-plugins/Uppercase.docs b/docs/configuration/filter-plugins/Uppercase.docs
deleted file mode 100644
index 394ec94..0000000
--- a/docs/configuration/filter-plugins/Uppercase.docs
+++ /dev/null
@@ -1,10 +0,0 @@
-@seatunnelPlugin
-@pluginGroup filter
-@pluginName Uppercase
-@pluginDesc "将指定字段内容全部转换为大写字母"
-@pluginAuthor InterestingLab
-@pluginHomepage https://interestinglab.github.io/seatunnel-docs
-@pluginVersion 1.0.0
-
-@pluginOption string source_field="raw_message" no "源字段,若不配置默认为'raw_message'"
-@pluginOption string target_field="uppercased" no "目标字段,若不配置默认为'uppercased'"
diff --git a/docs/configuration/filter-plugins/Uppercase.md b/docs/configuration/filter-plugins/Uppercase.md
deleted file mode 100644
index 5b0de9c..0000000
--- a/docs/configuration/filter-plugins/Uppercase.md
+++ /dev/null
@@ -1,40 +0,0 @@
-## Filter plugin : Uppercase
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.0.0
-
-### Description
-
-将指定字段内容全部转换为大写字母
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [source_field](#source_field-string) | string | no | raw_message |
-| [target_field](#target_field-string) | string | no | uppercased |
-| [common-options](#common-options-string)| string | no | - |
-
-
-##### source_field [string]
-
-源字段,若不配置默认为`raw_message`
-
-##### target_field [string]
-
-目标字段,若不配置默认为`uppercased`
-
-##### common options [string]
-
-`Filter` 插件通用参数,详情参照 [Filter Plugin](/zh-cn/v1/configuration/filter-plugin)
-
-
-### Example
-
-```
-uppercase {
- source_field = "username"
- target_field = "username_uppercased"
-}
-```
diff --git a/docs/configuration/filter-plugins/Urldecode.md b/docs/configuration/filter-plugins/Urldecode.md
deleted file mode 100644
index f21293c..0000000
--- a/docs/configuration/filter-plugins/Urldecode.md
+++ /dev/null
@@ -1,47 +0,0 @@
-## Filter plugin : UrlDecode
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.5.0
-
-### Description
-
-UrlDecode
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [source_field](#source_field-string) | string | no | raw_message |
-| [target_field](#target_field-string)| string | no | - |
-
-
-##### source_field [string]
-
-需要进行 `UrlDecode` 处理的字段。
-
-
-##### target_field [string]
-
-存储 `UrlDecode` 处理结果的目标字段,若不配置则与 `source_field` 保持一致。
-
-##### common options [string]
-
-`Filter` 插件通用参数,详情参照 [Filter Plugin](/zh-cn/v1/configuration/filter-plugin)
-
-
-### Example
-
-```
-urldecode {
- source_field = "url"
-}
-```
-
-`UrlDecode` 方法已经注册为 **UDF**,可以直接在 `SQL` 插件中使用
-
-```
-sql {
- sql = "select urldecode(url) as url from view_1"
-}
-```
diff --git a/docs/configuration/filter-plugins/Urlencode.md b/docs/configuration/filter-plugins/Urlencode.md
deleted file mode 100644
index c10248c..0000000
--- a/docs/configuration/filter-plugins/Urlencode.md
+++ /dev/null
@@ -1,47 +0,0 @@
-## Filter plugin : UrlEncode
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.5.0
-
-### Description
-
-UrlEncode
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [source_field](#source_field-string) | string | no | raw_message |
-| [target_field](#target_field-string)| string | no | - |
-
-
-##### source_field [string]
-
-需要进行 `UrlEncode` 处理的字段。
-
-
-##### target_field [string]
-
-存储 `UrlEncode` 处理结果的目标字段,若不配置则与 `source_field` 保持一致。
-
-##### common options [string]
-
-`Filter` 插件通用参数,详情参照 [Filter Plugin](/zh-cn/v1/configuration/filter-plugin)
-
-
-### Example
-
-```
-urlencode {
- source_field = "url"
-}
-```
-
-`UrlEncode` 方法已经注册为 **UDF**,可以直接在 `SQL` 插件中使用
-
-```
-sql {
- sql = "select urlencode(url) as url from view_1"
-}
-```
diff --git a/docs/configuration/filter-plugins/Uuid.docs b/docs/configuration/filter-plugins/Uuid.docs
deleted file mode 100644
index 2ed303b..0000000
--- a/docs/configuration/filter-plugins/Uuid.docs
+++ /dev/null
@@ -1,9 +0,0 @@
-@seatunnelPlugin
-@pluginGroup filter
-@pluginName Uuid
-@pluginDesc "为原始数据集新增自增id字段"
-@pluginAuthor InterestingLab
-@pluginHomepage https://interestinglab.github.io/seatunnel-docs
-@pluginVersion 1.0.0
-
-@pluginOption string target_field="uuid" no "自增id字段,若不配置默认为'uuid'"
diff --git a/docs/configuration/filter-plugins/Uuid.md b/docs/configuration/filter-plugins/Uuid.md
deleted file mode 100644
index 27b99a2..0000000
--- a/docs/configuration/filter-plugins/Uuid.md
+++ /dev/null
@@ -1,34 +0,0 @@
-## Filter plugin : Uuid
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.0.0
-
-### Description
-
-为原始数据集新增一个全局唯一且自增的UUID字段,使用的是spark的`monotonically_increasing_id()`函数。
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [target_field](#target_field-string) | string | no | uuid |
-| [common-options](#common-options-string)| string | no | - |
-
-
-##### target_field [string]
-
-存储uuid的目标字段,若不配置默认为`uuid`
-
-##### common options [string]
-
-`Filter` 插件通用参数,详情参照 [Filter Plugin](/zh-cn/v1/configuration/filter-plugin)
-
-
-### Example
-
-```
-uuid {
- target_field = "id"
-}
-```
diff --git a/docs/configuration/filter-plugins/Watermark.md b/docs/configuration/filter-plugins/Watermark.md
deleted file mode 100644
index 69ac22a..0000000
--- a/docs/configuration/filter-plugins/Watermark.md
+++ /dev/null
@@ -1,57 +0,0 @@
-## Filter plugin : Watermark
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.3.0
-
-### Description
-
-Spark Structured Streaming Watermark
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [time_field](#time_field-string) | string | yes | - |
-| [time_type](#time_type-string) | string | no | UNIX |
-| [time_pattern](#time_pattern-string) | string | no | yyyy-MM-dd HH:mm:ss |
-| [delay_threshold](#delay_threshold-string) | string | yes | - |
-| [watermark_field](#watermark_field-string) | string | yes | - |
-| [common-options](#common-options-string)| string | no | - |
-
-
-##### time_field [string]
-
-日志中的事件时间字段
-
-##### time_type [string]
-
-日志中的事件时间字段的类型,支持三种类型 `UNIX_MS|UNIX|string`,UNIX_MS为13位的时间戳,UNIX为10位的时间戳,string为字符串类型的时间,如2019-04-08 22:10:23
-
-##### time_pattern [string]
-
-当你的`time_type`选择为string时,你可以指定这个参数来进行时间字符串的匹配,默认匹配格式为yyyy-MM-dd HH:mm:ss
-
-##### delay_threshold [string]
-
-等待数据到达的最小延迟。
-
-##### watermark_field [string]
-
-经过这个filter处理之后将会增加一个timestamp类型的字段,这个字段用于添加watermark
-
-##### common options [string]
-
-`Filter` 插件通用参数,详情参照 [Filter Plugin](/zh-cn/v1/configuration/filter-plugin)
-
-
-### Example
-
-```
-Watermark {
- delay_threshold = "5 minutes"
- time_field = "tf"
- time_type = "UNIX"
- watermark_field = "wm"
-}
-```
diff --git a/docs/configuration/input-plugin.md b/docs/configuration/input-plugin.md
deleted file mode 100644
index ee9054e..0000000
--- a/docs/configuration/input-plugin.md
+++ /dev/null
@@ -1,31 +0,0 @@
-# Input 插件
-
-### Input插件通用参数
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [result_table_name](#result_table_name-string) | string | yes | - |
-| [table_name](#table_name-string) | string | no | - |
-
-
-##### result_table_name [string]
-
-不指定 `result_table_name时` ,此插件处理后的数据,不会被注册为一个可供其他插件直接访问的数据集(dataset),或者被称为临时表(table);
-
-指定 `result_table_name` 时,此插件处理后的数据,会被注册为一个可供其他插件直接访问的数据集(dataset),或者被称为临时表(table)。此处注册的数据集(dataset),其他插件可通过指定 `source_table_name` 来直接访问。
-
-
-##### table_name [string]
-
-**\[从v1.4开始废弃\]** 功能同 `result_table_name`,后续 Release 版本中将删除此参数,建议使用 `result_table_name` 参数
-
-
-### 使用样例
-
-```
-fake {
- result_table_name = "view_table_2"
-}
-```
-
-> 数据源 `fake` 的结果将注册为名为 `view_table_2` 的临时表。这个临时表,可以被任意 `Filter` 或者 `Output` 插件通过指定 `source_table_name` 使用。
diff --git a/docs/configuration/input-plugins/Alluxio.md b/docs/configuration/input-plugins/Alluxio.md
deleted file mode 100644
index faf93ea..0000000
--- a/docs/configuration/input-plugins/Alluxio.md
+++ /dev/null
@@ -1,93 +0,0 @@
-## Input plugin : Alluxio [Static]
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.1.0
-
-### Description
-
-从Alluxio文件中读取数据。
-
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [format](#format-string) | string | no | json |
-| [options.*](#options-object) | object | no | - |
-| [options.rowTag](#optionsrowTag-string) | string | no | - |
-| [path](#path-string) | string | yes | - |
-| [common-options](#common-options-string)| string | yes | - |
-
-##### format [string]
-
-从Alluxio中读取文件的格式,目前支持`csv`、`json`、`parquet` 、`xml`、`orc`和 `text`.
-
-
-##### options [object]
-
-自定义参数,当`format = "xml"`时必须设置`optionss.rowTag`,配置XML格式数据的Tag,其他参数不是必填参数。
-
-
-##### options.rowTag [string]
-
-当format为xml必须设置`optionss.rowTag`,配置XML格式数据的Tag
-
-
-##### path [string]
-
-Alluxio内存文件路径,以alluxio://开头
-
-##### common options [string]
-
-`Input` 插件通用参数,详情参照 [Input Plugin](/zh-cn/v1/configuration/input-plugin)
-
-### Note
-
-如果使用zookeeper控制alluxio,请将以下语句加入到start-seatunnel.sh中
-
-```
-driverJavaOpts="-Dalluxio.user.file.writetype.default=CACHE_THROUGH -Dalluxio.zookeeper.address=your.zookeeper.address:zookeeper.port -Dalluxio.zookeeper.enabled=true"
-executorJavaOpts="-Dalluxio.user.file.writetype.default=CACHE_THROUGH -Dalluxio.zookeeper.address=your.zookeeper.address:zookeeper.port -Dalluxio.zookeeper.enabled=true"
-```
-
-或者在1.5版本之后,可以通过在配置文件中的spark{}增加以下参数
-
-```
-spark.driverJavaOpts="-Dalluxio.user.file.writetype.default=CACHE_THROUGH -Dalluxio.zookeeper.address=your.zookeeper.address:zookeeper.port -Dalluxio.zookeeper.enabled=true"
-spark.executorJavaOpts="-Dalluxio.user.file.writetype.default=CACHE_THROUGH -Dalluxio.zookeeper.address=your.zookeeper.address:zookeeper.port -Dalluxio.zookeeper.enabled=true"
-```
-
-### Example
-
-```
-alluxio {
- path = "alluxio:///var/seatunnel-logs"
- result_table_name = "access_log"
- format = "json"
-}
-```
-
-> 从Alluxio中读取json文件,加载到seatunnel中待后续处理.
-
-
-或者可以指定 alluxio name service:
-
-```
-alluxio {
- result_table_name = "access_log"
- path = "alluxio://m2:19999/seatunnel-logs/access.log"
-}
-```
-
-
-也支持读取XML格式的文件:
-
-```
-alluxio {
- result_table_name = "books"
- path = "alluxio://m2:19999/seatunnel-logs/books.xml"
- options.rowTag = "book"
- format = "xml"
-}
-```
diff --git a/docs/configuration/input-plugins/Elasticsearch.md b/docs/configuration/input-plugins/Elasticsearch.md
deleted file mode 100644
index 5217527..0000000
--- a/docs/configuration/input-plugins/Elasticsearch.md
+++ /dev/null
@@ -1,73 +0,0 @@
-## Input plugin : Elasticsearch [Static]
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.3.2
-
-### Description
-
-从 Elasticsearch 中读取数据
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [hosts](#hosts-array) | array | yes | - |
-| [index](#index-string) | string | yes | |
-| [es](#es-string) | string | no | |
-| [common-options](#common-options-string)| string | yes | - |
-
-
-##### hosts [array]
-
-ElasticSearch 集群地址,格式为host:port,允许指定多个host。如 \["host1:9200", "host2:9200"]。
-
-
-##### index [string]
-
-ElasticSearch index名称,支持 `*` 模糊匹配
-
-
-##### es.* [string]
-
-用户还可以指定多个非必须参数,详细的参数列表见[Elasticsearch支持的参数](https://www.elastic.co/guide/en/elasticsearch/hadoop/current/configuration.html#cfg-mapping).
-
-如指定 `es.read.metadata` 的方式是: `es.read.metadata = true`。如果不指定这些非必须参数,它们将使用官方文档给出的默认值。
-
-### Tips
-
-在使用 ElasticSearch插件时,可以配置参数 `es.input.max.docs.per.partition`,用以最大化 seatunnel 读取 es 的效率,该参数用于决定任务的分区个数:
-
-> 分区数 = 总数据条数 / es.input.max.docs.per.partition
-
-通过增大任务分区数以支持更高的并发能力,根据实践优化这个参数的设置,读取ElasticSearch的效率可以提升3-10倍。
-
-
-如上所述 `es.input.max.docs.per.partition`,支持用户自行根据实际的数据量进行调整,否则分区数为 ElasticSearch 索引 Shard 的个数。
-
-##### common options [string]
-
-`Input` 插件通用参数,详情参照 [Input Plugin](/zh-cn/v1/configuration/input-plugin)
-
-
-### Examples
-
-```
-elasticsearch {
- hosts = ["localhost:9200"]
- index = "seatunnel-20190424"
- result_table_name = "my_dataset"
- }
-```
-
-
-```
-elasticsearch {
- hosts = ["localhost:9200"]
- index = "seatunnel-*"
- es.read.field.include = "name, age"
- resulttable_name = "my_dataset"
- }
-```
-
-> 匹配所有以 `seatunnel-` 开头的索引, 并且仅仅读取 `name`和 `age` 两个字段。
diff --git a/docs/configuration/input-plugins/FakeStream.docs b/docs/configuration/input-plugins/FakeStream.docs
deleted file mode 100644
index 7120624..0000000
--- a/docs/configuration/input-plugins/FakeStream.docs
+++ /dev/null
@@ -1,13 +0,0 @@
-@seatunnelPlugin
-@pluginGroup input
-@pluginName Fake
-@pluginDesc "生成测试数据以供逻辑测试使用"
-@pluginAuthor InterestingLab
-@pluginHomepage https://interestinglab.github.io/seatunnel-docs
-@pluginVersion 1.0.0
-
-@pluginOption string data_format="text" no "测试数据类型,支持text以及json"
-@pluginOption string text_delimeter="," no "文本数据分隔符,当'data_format'为text时使用"
-@pluginOption array json_keys no "json数据key列表,当'data_format'为json时使用"
-@pluginOption number num_of_fields="10" no "字段个数,当'data_format'为text时使用"
-@pluginOption number rate="1" yes "每秒生成测试用例个数"
diff --git a/docs/configuration/input-plugins/FakeStream.md b/docs/configuration/input-plugins/FakeStream.md
deleted file mode 100644
index 0f34f60..0000000
--- a/docs/configuration/input-plugins/FakeStream.md
+++ /dev/null
@@ -1,93 +0,0 @@
-## Input plugin : FakeStream [Streaming]
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.0.0
-
-### Description
-
-Fake Input主要用于方便得生成用户指定的数据,作为输入来对seatunnel进行功能验证,测试,以及性能测试等。
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [data_format](#data_format-string) | string | no | text |
-| [json_keys](#json_keys-array) | array | no | - |
-| [num_of_fields](#num_of_fields-number) | number | no | 10 |
-| [rate](#rate-number) | number | yes | - |
-| [text_delimeter](#text_delimeter-string) | string | no | , |
-| [common-options](#common-options-string)| string | yes | - |
-
-
-##### data_format [string]
-
-测试数据类型,支持text以及json
-
-##### json_keys [array]
-
-json数据key列表,当`data_format`为json时使用
-
-##### num_of_fields [number]
-
-字段个数,当`data_format`为text时使用
-
-##### rate [number]
-
-每秒生成测试用例个数
-
-##### text_delimeter [string]
-
-文本数据分隔符,当`data_format`为text时使用
-
-##### common options [string]
-
-`Input` 插件通用参数,详情参照 [Input Plugin](/zh-cn/v1/configuration/input-plugin)
-
-
-### Examples
-
-1. 使用`data_format`
-
- ```
- fakeStream {
- data_format = "text"
- text_delimeter = ","
- num_of_fields = 5
- rate = 5
- }
- ```
-
-* **Input**
-
- ```
- +-------------------------------------------------------------------------------------------+
- |raw_message |
- +-------------------------------------------------------------------------------------------+
- |Random1-1462437280,Random215896330,Random3-2009195549,Random41027365838,Random51525395111 |
- |Random1-2135047059,Random2-1030689538,Random3-854912064,Random4126768642,Random5-1483841750|
- +-------------------------------------------------------------------------------------------+
- ```
-
-
-2. 不使用`data_format`
-
- ```
- fakeStream {
- content = ['name=ricky&age=23', 'name=gary&age=28']
- rate = 5
- }
- ```
-
-* **Input**
-
- ```
- +-----------------+
- |raw_message |
- +-----------------+
- |name=gary&age=28 |
- |name=ricky&age=23|
- +-----------------+
- ```
-
- > 从`content`列表中随机抽取其中的字符串
diff --git a/docs/configuration/input-plugins/File.docs b/docs/configuration/input-plugins/File.docs
deleted file mode 100644
index bf7afdd..0000000
--- a/docs/configuration/input-plugins/File.docs
+++ /dev/null
@@ -1,10 +0,0 @@
-@seatunnelPlugin
-@pluginGroup input
-@pluginName File
-@pluginDesc "从文件中读取原始数据"
-@pluginAuthor InterestingLab
-@pluginHomepage https://interestinglab.github.io/seatunnel-docs
-@pluginVersion 1.0.0
-
-@pluginOption
-string path yes "文件路径"
diff --git a/docs/configuration/input-plugins/File.md b/docs/configuration/input-plugins/File.md
deleted file mode 100644
index ac36629..0000000
--- a/docs/configuration/input-plugins/File.md
+++ /dev/null
@@ -1,79 +0,0 @@
-## Input plugin : File [Static]
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.1.1
-
-### Description
-
-从本地文件中读取原始数据。
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [format](#format-string) | string | no | json |
-| [options.*](#options-object) | object | no | - |
-| [options.rowTag](#optionsrowTag-string) | string | no | - |
-| [path](#path-string) | string | yes | - |
-| [common-options](#common-options-string)| string | yes | - |
-
-##### format [string]
-
-文件的格式,目前支持`csv`、`json`、`parquet` 、`xml`、`orc`和 `text`.
-
-
-##### options.* [object]
-
-自定义参数,当 `format` 为 **xml** 时必须设置 `optionss.rowTag`,配置XML格式数据的Tag,其他参数不是必填参数。
-
-
-##### options.rowTag [string]
-
-当format为xml必须设置`optionss.rowTag`,配置XML格式数据的Tag
-
-
-##### path [string]
-
-文件路径,以file://开头
-
-
-##### common options [string]
-
-`Input` 插件通用参数,详情参照 [Input Plugin](/zh-cn/v1/configuration/input-plugin)
-
-
-### Example
-
-```
-file {
- path = "file:///var/log/access.log"
- result_table_name = "accesslog"
- format = "text"
-}
-```
-
-读取XML格式文件
-
-```
-file {
- path = "file:///data0/src/books.xml"
- options.rowTag = "book"
- format = "xml"
- result_table_name = "books"
-}
-```
-
-读取CSV格式文件
-
-```
-file {
- path = "file:///data0/src/books.csv"
- format = "csv"
- # 将第一列的header作为列名
- # 否则将以 _c0,_c1,_c2...依次命名
- options.header = "true"
- result_table_name = "books"
-}
-```
-
diff --git a/docs/configuration/input-plugins/FileStream.docs b/docs/configuration/input-plugins/FileStream.docs
deleted file mode 100644
index a7517ef..0000000
--- a/docs/configuration/input-plugins/FileStream.docs
+++ /dev/null
@@ -1,10 +0,0 @@
-@seatunnelPlugin
-@pluginGroup input
-@pluginName FileStream
-@pluginDesc "从文件中读取原始数据"
-@pluginAuthor InterestingLab
-@pluginHomepage https://interestinglab.github.io/seatunnel-docs
-@pluginVersion 1.0.0
-
-@pluginOption
-string path yes "文件路径"
diff --git a/docs/configuration/input-plugins/FileStream.md b/docs/configuration/input-plugins/FileStream.md
deleted file mode 100644
index 867c4bc..0000000
--- a/docs/configuration/input-plugins/FileStream.md
+++ /dev/null
@@ -1,56 +0,0 @@
-## Input plugin : FileStream [Streaming]
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.1.0
-
-### Description
-
-从本地文件目录中读取原始数据,会监听新文件生成。
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [format](#format-string) | string | yes | text |
-| [path](#path-string) | string | yes | - |
-| [rowTag](#rowtag-string) | string | yes | - |
-| [common-options](#common-options-string)| string | yes | - |
-
-
-##### format [string]
-
-文件格式
-
-
-##### path [string]
-
-文件目录路径
-
-
-##### rowTag [string]
-
-仅当format为xml时使用,表示XML格式数据的Tag
-
-##### common options [string]
-
-`Input` 插件通用参数,详情参照 [Input Plugin](/zh-cn/v1/configuration/input-plugin)
-
-
-### Example
-
-```
-fileStream {
- path = "file:///var/log/"
-}
-```
-
-或者指定`format`
-
-```
-fileStream {
- path = "file:///var/log/"
- format = "xml"
- rowTag = "book"
-}
-```
diff --git a/docs/configuration/input-plugins/Hdfs.md b/docs/configuration/input-plugins/Hdfs.md
deleted file mode 100644
index a44bcac..0000000
--- a/docs/configuration/input-plugins/Hdfs.md
+++ /dev/null
@@ -1,91 +0,0 @@
-## Input plugin : Hdfs [Static]
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.1.0
-
-### Description
-
-从HDFS文件中读取数据。注意此插件与`HdfsStream`不同,它不是流式的。
-
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [format](#format-string) | string | no | json |
-| [options.*](#options-object) | object | no | - |
-| [options.rowTag](#optionsrowTag-string) | string | no | - |
-| [path](#path-string) | string | yes | - |
-| [common-options](#common-options-string)| string | yes | - |
-
-##### format [string]
-
-从HDFS中读取文件的格式,目前支持`csv`、`json`、`parquet` 、`xml`、`orc`和 `text`.
-
-
-##### options [object]
-
-自定义参数,当`format = "xml"`时必须设置`optionss.rowTag`,配置XML格式数据的Tag,其他参数不是必填参数。
-
-
-##### options.rowTag [string]
-
-当 `format` 为 **xml** 必须设置 `optionss.rowTag`,配置XML格式数据的Tag
-
-
-##### path [string]
-
-Hadoop集群文件路径,以hdfs://开头
-
-##### common options [string]
-
-`Input` 插件通用参数,详情参照 [Input Plugin](/zh-cn/v1/configuration/input-plugin)
-
-
-
-### Example
-
-```
-hdfs {
- path = "hdfs:///var/seatunnel-logs"
- result_table_name = "access_log"
- format = "json"
-}
-```
-
-> 从HDFS中读取json文件,加载到seatunnel中待后续处理.
-
-
-或者可以指定 hdfs name service:
-
-```
-hdfs {
- result_table_name = "access_log"
- path = "hdfs://m2:8022/seatunnel-logs/access.log"
-}
-```
-
-读取XML格式的文件:
-
-```
-hdfs {
- result_table_name = "books"
- path = "hdfs://m2:8022/seatunnel-logs/books.xml"
- options.rowTag = "book"
- format = "xml"
-}
-```
-
-读取CSV格式文件
-
-```
-hdfs {
- path = "hdfs://m2:8022/seatunnel-logs/books.csv"
- format = "csv"
- # 将第一列的header作为列名
- # 否则将以 _c0,_c1,_c2...依次命名
- options.header = "true"
- result_table_name = "books"
-}
-```
diff --git a/docs/configuration/input-plugins/HdfsStream.docs b/docs/configuration/input-plugins/HdfsStream.docs
deleted file mode 100644
index 0187529..0000000
--- a/docs/configuration/input-plugins/HdfsStream.docs
+++ /dev/null
@@ -1,10 +0,0 @@
-@seatunnelPlugin
-@pluginGroup input
-@pluginName Hdfs
-@pluginDesc "从HDFS中读取原始数据"
-@pluginAuthor InterestingLab
-@pluginHomepage https://interestinglab.github.io/seatunnel-docs
-@pluginVersion 1.0.0
-
-@pluginOption
-string path yes "Hadoop集群上文件路径"
diff --git a/docs/configuration/input-plugins/HdfsStream.md b/docs/configuration/input-plugins/HdfsStream.md
deleted file mode 100644
index 9c60faf..0000000
--- a/docs/configuration/input-plugins/HdfsStream.md
+++ /dev/null
@@ -1,64 +0,0 @@
-## Input plugin : HdfsStream [Streaming]
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.1.0
-
-### Description
-
-监听HDFS目录中的文件变化,实时加载并处理新文件,形成文件处理流。
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [format](#format-string) | no | yes | text |
-| [path](#path-string) | string | yes | - |
-| [rowTag](#rowtag-string) | no | yes | - |
-| [common-options](#common-options-string)| string | yes | - |
-
-
-##### format [string]
-
-文件格式
-
-
-##### path [string]
-
-文件目录路径
-
-
-##### rowTag [string]
-
-仅当format为xml时使用,表示XML格式数据的Tag
-
-##### common options [string]
-
-`Input` 插件通用参数,详情参照 [Input Plugin](/zh-cn/v1/configuration/input-plugin)
-
-
-### Example
-
-```
-hdfsStream {
- path = "hdfs:///access/log/"
-}
-```
-
-或者可以指定 hdfs name service:
-
-```
-hdfsStream {
- path = "hdfs://m2:8022/access/log/"
-}
-```
-
-或者指定`format`
-
-```
-hdfsStream {
- path = "hdfs://m2:8022/access/log/"
- format = "xml"
- rowTag = "book"
-}
-```
diff --git a/docs/configuration/input-plugins/Hive.docs b/docs/configuration/input-plugins/Hive.docs
deleted file mode 100644
index 4359011..0000000
--- a/docs/configuration/input-plugins/Hive.docs
+++ /dev/null
@@ -1,11 +0,0 @@
-@seatunnelPlugin
-@pluginGroup input
-@pluginName Hive
-@pluginDesc "从hive读取原始数据"
-@pluginAuthor InterestingLab
-@pluginHomepage https://interestinglab.github.io/seatunnel-docs
-@pluginVersion 1.0.0
-
-@pluginOption
-string pre_sql yes "进行预处理的sql, 如果不需要预处理,可以使用select * from hive_db.hive_table"
-string table_name yes "预处理sql的到数据注册成的临时表名"
diff --git a/docs/configuration/input-plugins/Hive.md b/docs/configuration/input-plugins/Hive.md
deleted file mode 100644
index 8d53af7..0000000
--- a/docs/configuration/input-plugins/Hive.md
+++ /dev/null
@@ -1,64 +0,0 @@
-## Input plugin : Hive
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.1.2
-
-### Description
-
-从hive中获取数据,
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [pre_sql](#pre_sql-string) | string | yes | - |
-| [common-options](#common-options-string)| string | yes | - |
-
-
-##### pre_sql [string]
-
-进行预处理的sql, 如果不需要预处理,可以使用select * from hive_db.hive_table
-
-##### common options [string]
-
-`Input` 插件通用参数,详情参照 [Input Plugin](/zh-cn/v1/configuration/input-plugin)
-
-
-**注意:从seatunnel v1.3.4 开始,使用hive input必须做如下配置:**
-
-```
-# seatunnel 配置文件中的spark section中:
-
-spark {
- ...
- spark.sql.catalogImplementation = "hive"
- ...
-}
-
-```
-
-
-### Example
-
-```
-spark {
- ...
- spark.sql.catalogImplementation = "hive"
- ...
-}
-
-input {
- hive {
- pre_sql = "select * from mydb.mytb"
- result_table_name = "myTable"
- }
-}
-
-...
-```
-
-### Notes
-必须保证hive的metastore是在服务状态。启动命令 `hive --service metastore` 服务的默认端口的`9083`
-cluster、client、local模式下必须把hive-site.xml置于提交任务节点的$HADOOP_CONF目录下(或者放在$SPARK_HOME/conf下面),IDE本地调试将其放在resources目录
-
diff --git a/docs/configuration/input-plugins/Jdbc.md b/docs/configuration/input-plugins/Jdbc.md
deleted file mode 100644
index 6a69d42..0000000
--- a/docs/configuration/input-plugins/Jdbc.md
+++ /dev/null
@@ -1,112 +0,0 @@
-## Input plugin : Jdbc
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.0.0
-
-### Description
-
-通过JDBC读取外部数据源数据
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [driver](#driver-string) | string | yes | - |
-| [jdbc.*](#jdbc-string) | string| no ||
-| [password](#password-string) | string | yes | - |
-| [table](#table-string) | string | yes | - |
-| [url](#url-string) | string | yes | - |
-| [user](#user-string) | string | yes | - |
-| [common-options](#common-options-string)| string | yes | - |
-
-
-##### driver [string]
-
-用来连接远端数据源的JDBC类名
-
-
-##### jdbc [string]
-
-除了以上必须指定的参数外,用户还可以指定多个非必须参数,覆盖了Spark JDBC提供的所有[参数](https://spark.apache.org/docs/2.4.0/sql-programming-guide.html#jdbc-to-other-databases).
-
-指定参数的方式是在原参数名称上加上前缀"jdbc.",如指定fetchsize的方式是: jdbc.fetchsize = 50000。如果不指定这些非必须参数,它们将使用Spark JDBC给出的默认值。
-
-
-##### password [string]
-
-密码
-
-##### table [string]
-
-表名,或者指定SQL语句用于过滤
-
-
-##### url [string]
-
-JDBC连接的URL。参考一个案例: `jdbc:postgresql://localhost/test`
-
-
-##### user [string]
-
-用户名
-
-##### common options [string]
-
-`Input` 插件通用参数,详情参照 [Input Plugin](/zh-cn/v1/configuration/input-plugin)
-
-
-### Example
-
-```
-jdbc {
- driver = "com.mysql.jdbc.Driver"
- url = "jdbc:mysql://localhost:3306/info"
- table = "access"
- result_table_name = "access_log"
- user = "username"
- password = "password"
-}
-```
-
-```
-jdbc {
- driver = "com.mysql.jdbc.Driver"
- url = "jdbc:mysql://localhost:3306/info"
- table = "(select * from access) AS a"
- result_table_name = "access_log"
- user = "username"
- password = "password"
-}
-```
-
-> 通过JDBC读取MySQL数据
-
-```yaml
-jdbc {
- driver = "com.mysql.jdbc.Driver"
- url = "jdbc:mysql://localhost:3306/info"
- table = "access"
- result_table_name = "access_log"
- user = "username"
- password = "password"
- jdbc.partitionColumn = "item_id"
- jdbc.numPartitions = "10"
- jdbc.lowerBound = 0
- jdbc.upperBound = 100
-}
-```
-> 根据指定字段划分分区
-
-
-#### Tips
-
-指定的JDBC的Jar包放置在 `plugins` 目录下的指定结构中,如下
-
-```
-cd seatunnel
-mkdir -p plugins/my_plugins/lib
-cp third-part.jar plugins/my_plugins/lib
-```
-
-即可被 `seatunnel` 识别加载。
diff --git a/docs/configuration/input-plugins/KafkaStream.docs b/docs/configuration/input-plugins/KafkaStream.docs
deleted file mode 100644
index 32de3d1..0000000
--- a/docs/configuration/input-plugins/KafkaStream.docs
+++ /dev/null
@@ -1,11 +0,0 @@
-@seatunnelPlugin
-@pluginGroup input
-@pluginName Kafka
-@pluginDesc "Kafka作为数据源"
-@pluginAuthor InterestingLab
-@pluginHomepage https://interestinglab.github.io/seatunnel-docs
-@pluginVersion 1.0.0
-
-@pluginOption string topic yes "Kafka Topic"
-@pluginOption string consumer.zookeeper.connect yes "Kafka zookeeper broker"
-@pluginOption string consumer.group.id yes "Kafka consumer group id"
diff --git a/docs/configuration/input-plugins/KafkaStream.md b/docs/configuration/input-plugins/KafkaStream.md
deleted file mode 100644
index 3b75c48..0000000
--- a/docs/configuration/input-plugins/KafkaStream.md
+++ /dev/null
@@ -1,101 +0,0 @@
-## Input plugin : KafkaStream [Streaming]
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.1.0
-
-### Description
-
-从Kafka消费数据,支持的Kafka版本 >= 0.10.0.
-
-
-### Options
-
-| name | type | required | default value | engine |
-| --- | --- | --- | --- | --- |
-| [topics](#topics-string) | string | yes | - | all streaming |
-| [consumer.group.id](#consumergroupid-string) | string | yes | - | all streaming |
-| [consumer.bootstrap.servers](#consumerbootstrapservers-string) | string | yes | - | all streaming |
-| [consumer.*](#consumer-string) | string | no | - | all streaming |
-| [offset.location](#offset.location-string) | string | no | - | Structured streaming |
-| [common-options](#common-options-string)| string | yes | - |
-
-
-##### topics [string]
-
-Kafka topic名称。如果有多个topic,用","分割,例如: "tpc1,tpc2"。
-
-##### consumer.group.id [string]
-
-Kafka consumer group id,用于区分不同的消费组。structured streaming本不用设置group id,但是其group id 每次启动都是不一样的,为了便于监控,将这个参数加上。
-
-##### consumer.bootstrap.servers [string]
-
-Kafka集群地址,多个用","隔开
-
-##### consumer.* [string]
-
-除了以上必备的kafka consumer客户端必须指定的参数外,用户还可以指定多个consumer客户端非必须参数,覆盖了[kafka官方文档指定的所有consumer参数](http://kafka.apache.org/documentation.html#oldconsumerconfigs).
-
-Spark Structured Streaming 中 Kafka Source 可选参数参考 [Structured Streaming + Kafka Integration Guide](https://spark.apache.org/docs/latest/structured-streaming-kafka-integration.html#reading-data-from-kafka)
-
-指定参数的方式是在原参数名称上加上前缀"consumer.",如指定`auto.offset.reset`的方式是: `consumer.auto.offset.reset = latest`。如果不指定这些非必须参数,它们将使用Kafka官方文档给出的默认值。
-
-##### offset.location [string]
-
-这个参数只有一个值,当你的checkpoint不可使用时,设置这个值为`broker`,将从broker获取offset进行消费。此参数仅在checkpoint不可使用时设置,否则可能发生不可预测的结果
-
-##### common options [string]
-
-`Input` 插件通用参数,详情参照 [Input Plugin](/zh-cn/v1/configuration/input-plugin)
-
-
-### Examples
-
-* Spark Streaming
-
-```
-kafkaStream {
- topics = "seatunnel"
- consumer.bootstrap.servers = "localhost:9092"
- consumer.group.id = "seatunnel_group"
-}
-```
-
-* Spark Structured Streaming
-
-```
-kafkaStream {
- topics = "seatunnel"
- consumer.bootstrap.servers = "localhost:9092"
- consumer.group.id = "seatunnel_group"
- consumer.failOnDataLoss = false
-}
-```
-### Notes
-* 在 `Spark Structured Streaming` 模式下,如果kafka里的数据是json格式,可以指定json的schema,`input` 将按照指定的schema进行解析,如果你需要流关联功能,还需要指定 `table_name` 这个参数以便在sql插件中使用
-如下
-```
-kafkaStream {
- topics = "seatunnel"
- consumer.bootstrap.servers = "localhost:9092"
- consumer.group.id = "seatunnel_group"
- consumer.rebalance.max.retries = 100
- consumer.failOnDataLoss = false
- schema = "{\"name\":\"string\",\"age\":\"integer\",\"addrs\":{\"country\":\"string\",\"city\":\"string\"}}"
-}
-```
-
-* 在 `Spark Streaming` 模式下,会将数据统一按照字符串进行处理,生成如下格式
-
-```
-+--------------+-------------------------------+
-| topic | raw_message |
-+--------------+-------------------------------+
-| topic_name | kafka_message_1 |
-+--------------+-------------------------------+
-| topic_name | kafka_message_2 |
-+--------------+-------------------------------+
-```
-
-后续可以在 `filter` 部分使用 `json`、`grok`、`split` 等插件进行处理
diff --git a/docs/configuration/input-plugins/Kudu.docs b/docs/configuration/input-plugins/Kudu.docs
deleted file mode 100644
index cbb0b3b..0000000
--- a/docs/configuration/input-plugins/Kudu.docs
+++ /dev/null
@@ -1,12 +0,0 @@
-@seatunnelPlugin
-@pluginGroup input
-@pluginName Kudu
-@pluginDesc "从[Apache Kudu](https://kudu.apache.org) 表中读取数据"
-@pluginAuthor InterestingLab
-@pluginHomepage https://interestinglab.github.io/seatunnel-docs
-@pluginVersion 1.0.0
-
-@pluginOption
-string kudu_master yes "kudu的master,多个master以逗号隔开"
-string kudu_table yes "kudu要读取的表名"
-string table_name yes "获取到数据注册成的临时表名"
diff --git a/docs/configuration/input-plugins/Kudu.md b/docs/configuration/input-plugins/Kudu.md
deleted file mode 100644
index 28c7b54..0000000
--- a/docs/configuration/input-plugins/Kudu.md
+++ /dev/null
@@ -1,41 +0,0 @@
-## Input plugin : Kudu
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.1.2
-
-### Description
-
-从[Apache Kudu](https://kudu.apache.org) 表中读取数据.
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [kudu_master](#kudu_master-string) | string | yes | - |
-| [kudu_table](#kudu_table) | string | yes | - |
-| [common-options](#common-options-string)| string | yes | - |
-
-
-##### kudu_master [string]
-
-kudu的master,多个master以逗号隔开
-
-##### kudu_table [string]
-
-kudu中要读取的表名
-
-##### common options [string]
-
-`Input` 插件通用参数,详情参照 [Input Plugin](/zh-cn/v1/configuration/input-plugin)
-
-
-### Example
-
-```
-kudu{
- kudu_master="hadoop01:7051,hadoop02:7051,hadoop03:7051"
- kudu_table="my_kudu_table"
- result_table_name="reg_table"
- }
-```
diff --git a/docs/configuration/input-plugins/MongoDB.docs b/docs/configuration/input-plugins/MongoDB.docs
deleted file mode 100644
index e3603e6..0000000
--- a/docs/configuration/input-plugins/MongoDB.docs
+++ /dev/null
@@ -1,13 +0,0 @@
-@seatunnelPlugin
-@pluginGroup input
-@pluginName MongoDB
-@pluginDesc "从[MongoDB](https://www.mongodb.com/)读取数据"
-@pluginAuthor InterestingLab
-@pluginHomepage https://interestinglab.github.io/seatunnel-docs
-@pluginVersion 1.0.0
-
-@pluginOption
-string readConfig.uri yes "mongoDB uri"
-string readConfig.database yes "要读取的database"
-string readConfig.collection yes "要读取的collection"
-string table_name yes "读取数据注册成的临时表名"
diff --git a/docs/configuration/input-plugins/MongoDB.md b/docs/configuration/input-plugins/MongoDB.md
deleted file mode 100644
index 0e628d1..0000000
--- a/docs/configuration/input-plugins/MongoDB.md
+++ /dev/null
@@ -1,62 +0,0 @@
-## Input plugin : MongoDB
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.1.2
-
-### Description
-
-从[MongoDB](https://www.mongodb.com/)读取数据
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [readconfig.uri](#readconfig.uri-string) | string | yes | - |
-| [readconfig.database](#readconfig.database-string) | string | yes | - |
-| [readconfig.collection](#readconfig.collection-string) | string | yes | - |
-| [readconfig.*](#readconfig.*-string) | string | no | - |
-| [schema](#schema-string) | string | no | - |
-| [common-options](#common-options-string)| string | yes | - |
-
-
-##### readconfig.uri [string]
-
-要读取mongoDB的uri
-
-##### readconfig.database [string]
-
-要读取mongoDB的database
-
-##### readconfig.collection [string]
-
-要读取mongoDB的collection
-
-#### readconfig.*
-
-这里还可以配置更多其他参数,详见https://docs.mongodb.com/spark-connector/v1.1/configuration/, 参见其中的`Input Configuration`部分
-指定参数的方式是在原参数名称上加上前缀"readconfig." 如设置`spark.mongodb.input.partitioner`的方式是 `readconfig.spark.mongodb.input.partitioner="MongoPaginateBySizePartitioner"`。如果不指定这些非必须参数,将使用MongoDB官方文档的默认值
-
-#### schema
-
-因为mongoDB不存在schema的概念,在spark读取mongo的时候,会去对mongo的数据进行抽样并推断schema,
-实际上这个过程会比较慢并且可能不准确,此参数可以手动指定schema避免这些问题。schema为一个json字符串,如`{\"name\":\"string\",\"age\":\"integer\",\"addrs\":{\"country\":\"string\",\"city\":\"string\"}}`
-
-##### common options [string]
-
-`Input` 插件通用参数,详情参照 [Input Plugin](/zh-cn/v1/configuration/input-plugin)
-
-
-
-### Example
-
-```
-mongodb{
- readconfig.uri="mongodb://myhost:mypost"
- readconfig.database="mydatabase"
- readconfig.collection="mycollection"
- readconfig.spark.mongodb.input.partitioner = "MongoPaginateBySizePartitioner"
- schema="{\"name\":\"string\",\"age\":\"integer\",\"addrs\":{\"country\":\"string\",\"city\":\"string\"}}"
- result_table_name = "test"
- }
-```
diff --git a/docs/configuration/input-plugins/MySQL.md b/docs/configuration/input-plugins/MySQL.md
deleted file mode 100644
index c463018..0000000
--- a/docs/configuration/input-plugins/MySQL.md
+++ /dev/null
@@ -1,92 +0,0 @@
-## Input plugin : Mysql
-
-* Author: InterestingLab
-* Homepage: <https://interestinglab.github.io/seatunnel-docs>
-* Version: 1.0.0
-
-### Description
-
-读取MySQL的数据
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [password](#password-string) | string | yes | - |
-| [jdbc.*](#jdbc-string) | string| no | |
-| [table](#table-string) | string | yes | - |
-| [url](#url-string) | string | yes | - |
-| [user](#user-string) | string | yes | - |
-| [common-options](#common-options-string)| string | yes | - |
-
-
-##### password [string]
-
-密码
-
-
-##### jdbc [string]
-
-除了以上必须指定的参数外,用户还可以指定多个非必须参数,覆盖了Spark JDBC提供的所有[参数](https://spark.apache.org/docs/2.4.0/sql-programming-guide.html#jdbc-to-other-databases).
-
-指定参数的方式是在原参数名称上加上前缀"jdbc.",如指定fetchsize的方式是: jdbc.fetchsize = 50000。如果不指定这些非必须参数,它们将使用Spark JDBC给出的默认值。
-
-
-##### table [string]
-
-表名,或者指定SQL语句用于过滤
-
-
-##### url [string]
-
-JDBC连接的URL。参考一个案例:`jdbc:mysql://localhost:3306/info`
-
-
-##### user [string]
-
-用户名
-
-##### common options [string]
-
-`Input` 插件通用参数,详情参照 [Input Plugin](/zh-cn/v1/configuration/input-plugin)
-
-
-### Example
-
-```
-mysql {
- url = "jdbc:mysql://localhost:3306/info"
- table = "access"
- result_table_name = "access_log"
- user = "username"
- password = "password"
-}
-```
-
-```
-mysql {
- url = "jdbc:mysql://localhost:3306/info"
- table = "(select * from access) AS a"
- result_table_name = "access_log"
- user = "username"
- password = "password"
-}
-```
-
-> 从MySQL中读取数据
-
-```
-mysql {
- url = "jdbc:mysql://localhost:3306/info"
- table = "access"
- result_table_name = "access_log"
- user = "username"
- password = "password"
- jdbc.partitionColumn = "item_id"
- jdbc.numPartitions = "10"
- jdbc.lowerBound = 0
- jdbc.upperBound = 100
-}
-```
-
-> 根据指定字段划分分区
diff --git a/docs/configuration/input-plugins/Redis.md b/docs/configuration/input-plugins/Redis.md
deleted file mode 100644
index 63e03cf..0000000
--- a/docs/configuration/input-plugins/Redis.md
+++ /dev/null
@@ -1,72 +0,0 @@
-## Input plugin : Redis
-
-* Author: InterestingLab
-* Homepage: https://github.com/InterestingLab/seatunnel
-* Version: 1.1.0
-
-### Description
-
-从Redis中读取数据.
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [host](#host-string) | string | yes | - |
-| [port](#port-int) | int | no | 6379 |
-| [key_pattern](#key_pattern-string) | string | yes | - |
-| [partition](#partition-int) | int | no | 3 |
-| [db_num](#db_num-int) | int | no | 0 |
-| [auth](#auth-string) | string | no | - |
-| [common-options](#common-options-string)| string | yes | - |
-
-
-##### host [string]
-
-Redis服务器地址
-
-##### port [int]
-
-Redis服务端口, 默认6379
-
-##### key_pattern [string]
-
-Redis Key, 支持模糊匹配
-
-##### partition [int]
-
-Redis分片数量. 默认为3
-
-##### db_num [int]
-
-Redis数据库索引标识. 默认连接到db0.
-
-##### auth [string]
-
-redis 鉴权密码
-
-##### common options [string]
-
-`Input` 插件通用参数,详情参照 [Input Plugin](/zh-cn/v1/configuration/input-plugin)
-
-
-### Example
-
-```
-Redis {
- host = "192.168.1.100"
- port = 6379
- key_pattern = "*keys*"
- partition = 20
- db_num = 2
- result_table_name = "reids_result_table"
-}
-```
-
-> 返回的table中为一个两个字段均为string的数据表
-
-| raw_key | raw_message |
-| --- | --- |
-| [keys](#keys) | xxx |
-| [my_keys](#my_keys) | xxx |
-| [keys_mine](#keys_mine) | xxx |
diff --git a/docs/configuration/input-plugins/RedisStream.md b/docs/configuration/input-plugins/RedisStream.md
deleted file mode 100644
index 52be01a..0000000
--- a/docs/configuration/input-plugins/RedisStream.md
+++ /dev/null
@@ -1,83 +0,0 @@
-## Input plugin : RedisStream [Streaming]
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.1.0
-
-### Description
-
-Redis集群作为数据源,以队列作为数据输入源
-> 例如:logstash支持Redis集群方法资源页-》https://github.com/elastic/logstash/issues/12099
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [host](#host-string) | string | yes | - |
-| [prefKey](#prefKey-string) | string | yes | - |
-| [queue](#queue-string) | string | yes | - |
-| [password](#password-string) | string | no | - |
-| [maxTotal](#maxTotal-number) | number | no | 200 |
-| [maxIdle](#maxIdle-number) | number | no | 200 |
-| [maxWaitMillis](#maxWaitMillis-number) | number | no | 2000 |
-| [connectionTimeout](#connectionTimeout-number) | number | no | 5000 |
-| [soTimeout](#soTimeout-number) | number | no | 5000 |
-| [maxAttempts](#maxAttempts-number) | number | no | 5 |
-
-##### host [string]
-
-redis集群地址:多个以逗号分隔
-> 例子:127.0.0.1:7000,127.0.0.1:7001,127.0.0.1:7002
-
-##### prefKey [string]
-
-redis-queue业务前缀, 前缀规则: prefKey + ':' + queue
-> prefKey为空字符串,则实际队列名称为 queue
-
-##### queue [string]
-
-redis队列名称 , 数据存储队列
-> 例子:队列实际名称为 prefKey:queue
-
-##### password [string]
-
-redis密码,空字符串为无密码
-
-##### maxTotal [number]
-
-redis连接池的最大数据库连接数
-
-##### maxIdle [number]
-
-redis最大空闲数
-
-##### maxWaitMillis [number]
-
-redis最大建立连接等待时间
-
-##### connectionTimeout [number]
-
-redis连接超时时间
-
-##### soTimeout [number]
-
-redis读取数据超时时间
-
-##### maxAttempts [number]
-
-redis最大尝试次数
-
-##### common options [string]
-
-`Input` 插件通用参数,详情参照 [Input Plugin](/zh-cn/v1/configuration/input-plugin)
-
-### Example
-
-```
-RedisStream {
- host = "127.0.0.1:7000,127.0.0.1:7001,127.0.0.1:7002"
- prefKey = ""
- queue = "test"
- password = "root"
-}
-```
diff --git a/docs/configuration/input-plugins/S3Stream.docs b/docs/configuration/input-plugins/S3Stream.docs
deleted file mode 100644
index a065b0c..0000000
--- a/docs/configuration/input-plugins/S3Stream.docs
+++ /dev/null
@@ -1,10 +0,0 @@
-@seatunnelPlugin
-@pluginGroup input
-@pluginName S3
-@pluginDesc "从S3云存储上读取原始数据"
-@pluginAuthor InterestingLab
-@pluginHomepage https://interestinglab.github.io/seatunnel-docs
-@pluginVersion 1.0.0
-
-@pluginOption
-string path yes "S3云存储路径"
diff --git a/docs/configuration/input-plugins/S3Stream.md b/docs/configuration/input-plugins/S3Stream.md
deleted file mode 100644
index d177c22..0000000
--- a/docs/configuration/input-plugins/S3Stream.md
+++ /dev/null
@@ -1,34 +0,0 @@
-## Input plugin : S3Stream [Streaming]
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.1.0
-
-### Description
-
-从S3云存储上读取原始数据
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [path](#path-string) | string | yes | - |
-| [common-options](#common-options-string)| string | yes | - |
-
-
-##### path [string]
-
-S3云存储路径,当前支持的路径格式有**s3://**, **s3a://**, **s3n://**
-
-##### common options [string]
-
-`Input` 插件通用参数,详情参照 [Input Plugin](/zh-cn/v1/configuration/input-plugin)
-
-
-### Example
-
-```
-s3Stream {
- path = "s3n://bucket/access.log"
-}
-```
diff --git a/docs/configuration/input-plugins/SocketStream.docs b/docs/configuration/input-plugins/SocketStream.docs
deleted file mode 100644
index 914f828..0000000
--- a/docs/configuration/input-plugins/SocketStream.docs
+++ /dev/null
@@ -1,10 +0,0 @@
-@seatunnelPlugin
-@pluginGroup input
-@pluginName Socket
-@pluginDesc "Socket作为数据源"
-@pluginAuthor InterestingLab
-@pluginHomepage https://interestinglab.github.io/seatunnel-docs
-@pluginVersion 1.0.0
-
-@pluginOption string host="localhost" no "socket server hostname"
-@pluginOption number port="9999" no "socket server port"
diff --git a/docs/configuration/input-plugins/SocketStream.md b/docs/configuration/input-plugins/SocketStream.md
deleted file mode 100644
index d5b1bef..0000000
--- a/docs/configuration/input-plugins/SocketStream.md
+++ /dev/null
@@ -1,31 +0,0 @@
-## Input plugin : Socket [Streaming]
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.1.0
-
-### Description
-
-Socket作为数据源
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [host](#host-string) | string | no | localhost |
-| [port](#port-number) | number | no | 9999 |
-| [common-options](#common-options-string)| string | yes | - |
-
-
-##### host [string]
-
-socket server hostname
-
-##### port [number]
-
-socket server port
-
-##### common options [string]
-
-`Input` 插件通用参数,详情参照 [Input Plugin](/zh-cn/v1/configuration/input-plugin)
-
diff --git a/docs/configuration/input-plugins/Tidb.md b/docs/configuration/input-plugins/Tidb.md
deleted file mode 100644
index 6926138..0000000
--- a/docs/configuration/input-plugins/Tidb.md
+++ /dev/null
@@ -1,61 +0,0 @@
-## Input plugin : TiDB
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.1.5
-
-### Description
-
-通过[TiSpark](https://github.com/pingcap/tispark)从[TiDB](https://github.com/pingcap/tidb)数据库中读取数据,当前仅仅支持Spark 2.1
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [database](#database-string) | string | yes | - |
-| [pre_sql](#pre_sql-string) | string | yes | - |
-| [common-options](#common-options-string)| string | yes | - |
-
-##### database [string]
-
-TiDB库名
-
-##### pre_sql [string]
-
-进行预处理的sql, 如果不需要预处理,可以使用select * from tidb_db.tidb_table
-
-##### common options [string]
-
-`Input` 插件通用参数,详情参照 [Input Plugin](/zh-cn/v1/configuration/input-plugin)
-
-
-### Example
-
-
-使用TiDB Input必须在`spark-defaults.conf`或者seatunnel配置文件中配置`spark.tispark.pd.addresses`和`spark.sql.extensions`。
-
-一个seatunnel读取TiDB数据的配置文件如下:
-
-```
-spark {
- ...
- spark.tispark.pd.addresses = "localhost:2379"
- spark.sql.extensions = "org.apache.spark.sql.TiExtensions"
-}
-
-input {
- tidb {
- database = "test"
- pre_sql = "select * from test.my_table"
- result_table_name = "myTable"
- }
-}
-
-filter {
- ...
-}
-
-output {
- ...
-}
-```
diff --git a/docs/configuration/output-plugin.md b/docs/configuration/output-plugin.md
deleted file mode 100644
index 37541ac..0000000
--- a/docs/configuration/output-plugin.md
+++ /dev/null
@@ -1,32 +0,0 @@
-# Output 插件
-
-### Output插件通用参数
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [source_table_name](#source_table_name-string) | string | no | - |
-
-
-
-##### source_table_name [string]
-
-不指定 `source_table_name` 时,当前插件处理的就是配置文件中上一个插件输出的数据集(dataset);
-
-指定 `source_table_name` 的时候,当前插件处理的就是此参数对应的数据集。
-
-
-### 使用样例
-
-```
-stdout {
- source_table_name = "view_table_2"
-}
-```
-
-> 将名为 `view_table_2` 的临时表输出。
-
-```
-stdout {}
-```
-
-> 若不配置`source_table_name`, 将配置文件中最后一个 `Filter` 插件的处理结果输出
diff --git a/docs/configuration/output-plugins/Alluxio.md b/docs/configuration/output-plugins/Alluxio.md
deleted file mode 100644
index 43c5a24..0000000
--- a/docs/configuration/output-plugins/Alluxio.md
+++ /dev/null
@@ -1,89 +0,0 @@
-## Output plugin : Alluxio
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.0.0
-
-### Description
-
-输出数据到Alluxio文件
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [options](#options-object) | object | no | - |
-| [partition_by](#partition_by-array) | array | no | - |
-| [path](#path-string) | string | yes | - |
-| [path_time_format](#path_time_format-string) | string | no | yyyyMMddHHmmss |
-| [save_mode](#save_mode-string) | string | no | error |
-| [format](#format-string) | string | no | json |
-| [common-options](#common-options-string)| string | no | - |
-
-
-##### options [object]
-
-自定义参数
-
-##### partition_by [array]
-
-根据所选字段对数据进行分区
-
-##### path [string]
-
-Alluxio内存文件路径,以alluxio://开头
-
-##### path_time_format [string]
-
-当`path`参数中的格式为`xxxx-${now}`时,`path_time_format`可以指定alluxio路径的时间格式,默认值为 `yyyy.MM.dd`。常用的时间格式列举如下:
-
-| Symbol | Description |
-| --- | --- |
-| y | Year |
-| M | Month |
-| d | Day of month |
-| H | Hour in day (0-23) |
-| m | Minute in hour |
-| s | Second in minute |
-
-详细的时间格式语法见[Java SimpleDateFormat](https://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html)。
-
-##### save_mode [string]
-
-存储模式,当前支持overwrite,append,ignore以及error。每个模式具体含义见[save-modes](http://spark.apache.org/docs/2.2.0/sql-programming-guide.html#save-modes)
-
-##### format [string]
-
-序列化方法,当前支持csv、json、parquet、orc和text
-
-##### common options [string]
-
-`Output` 插件通用参数,详情参照 [Output Plugin](/zh-cn/v1/configuration/output-plugin)
-
-### Note
-
-如果使用zookeeper控制alluxio,请将以下语句加入到start-seatunnel.sh中
-
-```
-driverJavaOpts="-Dalluxio.user.file.writetype.default=CACHE_THROUGH -Dalluxio.zookeeper.address=your.zookeeper.address:zookeeper.port -Dalluxio.zookeeper.enabled=true"
-executorJavaOpts="-Dalluxio.user.file.writetype.default=CACHE_THROUGH -Dalluxio.zookeeper.address=your.zookeeper.address:zookeeper.port -Dalluxio.zookeeper.enabled=true"
-```
-
-或者在1.5版本之后,可以通过在配置文件中的spark{}增加以下参数
-
-```
-spark.driverJavaOpts="-Dalluxio.user.file.writetype.default=CACHE_THROUGH -Dalluxio.zookeeper.address=your.zookeeper.address:zookeeper.port -Dalluxio.zookeeper.enabled=true"
-spark.executorJavaOpts="-Dalluxio.user.file.writetype.default=CACHE_THROUGH -Dalluxio.zookeeper.address=your.zookeeper.address:zookeeper.port -Dalluxio.zookeeper.enabled=true"
-```
-
-### Example
-
-```
-alluxio {
- path = "alluxio:///var/logs-${now}"
- format = "json"
- path_time_format = "yyyy.MM.dd"
-}
-```
-
-> 按天生成Alluxio文件,例如**logs-2018.02.12**
diff --git a/docs/configuration/output-plugins/Clickhouse.docs b/docs/configuration/output-plugins/Clickhouse.docs
deleted file mode 100644
index 70a9d44..0000000
--- a/docs/configuration/output-plugins/Clickhouse.docs
+++ /dev/null
@@ -1,14 +0,0 @@
-@seatunnelPlugin
-@pluginGroup output
-@pluginName Clickhouse
-@pluginDesc "输出Row到Clickhouse"
-@pluginAuthor InterestingLab
-@pluginHomepage https://interestinglab.github.io/seatunnel-docs
-@pluginVersion 1.0.0
-
-@pluginOption array fields="" yes ""
-@pluginOption string hostname="" yes "Clickhouse hostname"
-@pluginOption string database="" yes "Clickhouse database"
-@pluginOption string table="" yes "Clickhouse table"
-@pluginOption string username="" no "Clickhouse auth username"
-@pluginOption string password="" no "Clickhouse auth password"
diff --git a/docs/configuration/output-plugins/Clickhouse.md b/docs/configuration/output-plugins/Clickhouse.md
deleted file mode 100644
index 3b158b3..0000000
--- a/docs/configuration/output-plugins/Clickhouse.md
+++ /dev/null
@@ -1,150 +0,0 @@
-## Output plugin : Clickhouse
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.1.0
-
-### Description
-
-通过 [Clickhouse-jdbc](https://github.com/yandex/clickhouse-jdbc) 将数据源按字段名对应,写入ClickHouse,需要提前创建对应的表结构
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [bulk_size](#bulk_size-number) | number| no |20000|
-| [clickhouse.*](#clickhouse-string) | string| no ||
-| [database](#database-string) | string |yes|-|
-| [fields](#fields-array) | array | no |-|
-| [host](#host-string) | string | yes |-|
-| [cluster](#cluster-string) | string | no |-|
-| [password](#password-string) | string | no |-|
-| [retry](#retry-number) | number| no |1|
-| [retry_codes](#password-array) | array | no |[ ]|
-| [table](#table-string) | string | yes |-|
-| [username](#username-string) | string | no |-|
-| [common-options](#common-options-string)| string | no | - |
-
-
-#### bulk_size [number]
-
-每次通过[ClickHouse JDBC](https://github.com/yandex/clickhouse-jdbc)写入数据的条数,默认为20000。
-
-##### database [string]
-
-ClickHouse database
-
-##### fields [array]
-
-需要输出到ClickHouse的数据字段,若不配置将会自动根据数据的Schema适配。
-
-##### host [string]
-
-ClickHouse集群地址,格式为host:port,允许指定多个host。如"host1:8123,host2:8123"。
-
-##### cluster [string]
-
-ClickHouse 配置分布式表的时候,提供配置表隶属的集群名称,参考官方文档[Distributed](https://clickhouse.tech/docs/en/operations/table_engines/distributed/)
-
-##### password [string]
-
-ClickHouse用户密码,仅当ClickHouse中开启权限时需要此字段。
-
-#### retry [number]
-
-重试次数,默认为1次
-
-##### retry_codes [array]
-
-出现异常时,会重试操作的ClickHouse异常错误码。详细错误码列表参考 [ClickHouseErrorCode](https://github.com/yandex/clickhouse-jdbc/blob/master/src/main/java/ru/yandex/clickhouse/except/ClickHouseErrorCode.java)
-
-如果多次重试都失败,将会丢弃这个批次的数据,慎用!!
-
-##### table [string]
-
-ClickHouse 表名
-
-##### username [string]
-
-ClickHouse用户用户名,仅当ClickHouse中开启权限时需要此字段
-
-##### clickhouse [string]
-
-除了以上必备的 clickhouse-jdbc须指定的参数外,用户还可以指定多个非必须参数,覆盖了clickhouse-jdbc提供的所有[参数](https://github.com/yandex/clickhouse-jdbc/blob/master/src/main/java/ru/yandex/clickhouse/settings/ClickHouseProperties.java).
-
-指定参数的方式是在原参数名称上加上前缀"clickhouse.",如指定socket_timeout的方式是: clickhouse.socket_timeout = 50000。如果不指定这些非必须参数,它们将使用clickhouse-jdbc给出的默认值。
-
-##### common options [string]
-
-`Output` 插件通用参数,详情参照 [Output Plugin](/zh-cn/v1/configuration/output-plugin)
-
-
-### ClickHouse类型对照表
-
-写入 ClickHouse 之前需要通过 Filter 中的 `SQL` 或者 `Convert` 插件将各字段转换为对应格式,否则会产生报错。
-
-以下为转换目标类型对照表(未列出的类型暂不支持):
-
-|ClickHouse字段类型|Convert插件转化目标类型|SQL转化表达式| Description |
-| :---: | :---: | :---:| :---:|
-|Date| string| string()|`yyyy-MM-dd`格式字符串|
-|DateTime| string| string()|`yyyy-MM-dd HH:mm:ss`格式字符串|
-|String| string| string()||
-|Int8| integer| int()||
-|Uint8| integer| int()||
-|Int16| integer| int()||
-|Uint16| integer| int()||
-|Int32| integer| int()||
-|Uint32| long | bigint()||
-|Int64| long| bigint()||
-|Uint64| long| bigint()||
-|Float32| float| float()||
-|Float64| double| double()||
-|Decimal(P, S)| - | CAST(source AS DECIMAL(P, S)) |Decimal32(S), Decimal64(S), Decimal128(S)皆可使用|
-|Array(T)|-|-|
-|Nullable(T)|取决于T|取决于T||
-|LowCardinality(T)|取决于T|取决于T||
-
-
-### Examples
-
-```
-clickhouse {
- host = "localhost:8123"
- clickhouse.socket_timeout = 50000
- database = "nginx"
- table = "access_msg"
- fields = ["date", "datetime", "hostname", "http_code", "data_size", "ua", "request_time"]
- username = "username"
- password = "password"
- bulk_size = 20000
-}
-```
-
-```
-ClickHouse {
- host = "localhost:8123"
- database = "nginx"
- table = "access_msg"
- fields = ["date", "datetime", "hostname", "http_code", "data_size", "ua", "request_time"]
- username = "username"
- password = "password"
- bulk_size = 20000
- retry_codes = [209, 210]
- retry = 3
-}
-```
-
-> 当出现网络超时或者网络异常的情况下,重试写入3次
-
-#### 分布式表配置
-```
-ClickHouse {
- host = "localhost:8123"
- database = "nginx"
- table = "access_msg"
- cluster = "no_replica_cluster"
- fields = ["date", "datetime", "hostname", "http_code", "data_size", "ua", "request_time"]
-}
-```
-> 根据提供的cluster名称,会从system.clusters表里面获取当前table实际分布在那些节点上。单spark partition的数据会根据随机策略选择某一个ClickHouse节点执行具体的写入操作
diff --git a/docs/configuration/output-plugins/Elasticsearch.docs b/docs/configuration/output-plugins/Elasticsearch.docs
deleted file mode 100644
index 7fa34a7..0000000
--- a/docs/configuration/output-plugins/Elasticsearch.docs
+++ /dev/null
@@ -1,11 +0,0 @@
-@seatunnelPlugin
-@pluginGroup output
-@pluginName Elasticsearch
-@pluginDesc "输出Dataframe到Elasticsearch"
-@pluginAuthor InterestingLab
-@pluginHomepage https://interestinglab.github.io/seatunnel-docs
-@pluginVersion 1.0.0
-
-@pluginOption array hosts="-" yes "Elasticsearch集群地址,格式为host:port"
-@pluginOption string index="seatunnel" yes "Elasticsearch index"
-@pluginOption string index_type="log" yes "Elasticsearch index type"
diff --git a/docs/configuration/output-plugins/Elasticsearch.md b/docs/configuration/output-plugins/Elasticsearch.md
deleted file mode 100644
index 33fb83c..0000000
--- a/docs/configuration/output-plugins/Elasticsearch.md
+++ /dev/null
@@ -1,82 +0,0 @@
-## Output plugin : Elasticsearch
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.0.0
-
-### Description
-
-输出数据到Elasticsearch,支持的Elasticsearch版本为 >= 2.x 且 < 7.0.0。
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [hosts](#hosts-array) | array | yes | - |
-| [index_type](#index_type-string) | string | no | log |
-| [index_time_format](#index_time_format-string) | string | no | yyyy.MM.dd |
-| [index](#index-string) | string | no | seatunnel |
-| [es](#es-string) | string | no | |
-| [common-options](#common-options-string)| string | no | - |
-
-
-##### hosts [array]
-
-Elasticsearch集群地址,格式为host:port,允许指定多个host。如["host1:9200", "host2:9200"]。
-
-##### index_type [string]
-
-Elasticsearch index type
-
-##### index_time_format [string]
-
-当`index`参数中的格式为`xxxx-${now}`时,`index_time_format`可以指定index名称的时间格式,默认值为 `yyyy.MM.dd`。常用的时间格式列举如下:
-
-| Symbol | Description |
-| --- | --- |
-| y | Year |
-| M | Month |
-| d | Day of month |
-| H | Hour in day (0-23) |
-| m | Minute in hour |
-| s | Second in minute |
-
-详细的时间格式语法见[Java SimpleDateFormat](https://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html)。
-
-
-##### index [string]
-
-Elasticsearch index名称,如果需要根据时间生成index,可以指定时间变量,如:`seatunnel-${now}`。`now`代表当前数据处理的时间。
-
-##### es.* [string]
-
-用户还可以指定多个非必须参数,详细的参数列表见[Elasticsearch支持的参数](https://www.elastic.co/guide/en/elasticsearch/hadoop/current/configuration.html#cfg-mapping).
-
-如指定`es.batch.size.entries`的方式是: `es.batch.size.entries = 100000`。如果不指定这些非必须参数,它们将使用官方文档给出的默认值。
-
-##### common options [string]
-
-`Output` 插件通用参数,详情参照 [Output Plugin](/zh-cn/v1/configuration/output-plugin)
-
-
-### Examples
-
-```
-elasticsearch {
- hosts = ["localhost:9200"]
- index = "seatunnel"
-}
-```
-
-> 将结果写入Elasticsearch集群的名称为seatunnel的index中
-
-```
-elasticsearch {
- hosts = ["localhost:9200"]
- index = "seatunnel-${now}"
- es.batch.size.entries = 100000
- index_time_format = "yyyy.MM.dd"
-}
-```
-
-> 按天创建索引,例如 **seatunnel-2017.11.03**
diff --git a/docs/configuration/output-plugins/File.md b/docs/configuration/output-plugins/File.md
deleted file mode 100644
index 39d6d60..0000000
--- a/docs/configuration/output-plugins/File.md
+++ /dev/null
@@ -1,71 +0,0 @@
-## Output plugin : File
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.0.0
-
-### Description
-
-输出数据到文件
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [options](#options-object) | object | no | - |
-| [partition_by](#partition_by-array) | array | no | - |
-| [path](#path-string) | string | yes | - |
-| [path_time_format](#path_time_format-string) | string | no | yyyyMMddHHmmss |
-| [save_mode](#save_mode-string) | string | no | error |
-| [format](#format-string) | string | no | json |
-| [common-options](#common-options-string)| string | no | - |
-
-
-##### options [object]
-
-自定义参数
-
-##### partition_by [array]
-
-根据所选字段对数据进行分区
-
-##### path [string]
-
-输出文件路径,以file://开头
-
-##### path_time_format [string]
-
-当`path`参数中的格式为`xxxx-${now}`时,`path_time_format`可以指定路径的时间格式,默认值为 `yyyy.MM.dd`。常用的时间格式列举如下:
-
-| Symbol | Description |
-| --- | --- |
-| y | Year |
-| M | Month |
-| d | Day of month |
-| H | Hour in day (0-23) |
-| m | Minute in hour |
-| s | Second in minute |
-
-详细的时间格式语法见[Java SimpleDateFormat](https://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html)。
-
-##### save_mode [string]
-
-存储模式,当前支持overwrite,append,ignore以及error。每个模式具体含义见[save-modes](http://spark.apache.org/docs/2.2.0/sql-programming-guide.html#save-modes)
-
-##### format [string]
-
-序列化方法,当前支持csv、json、parquet、orc和text
-
-##### common options [string]
-
-`Output` 插件通用参数,详情参照 [Output Plugin](/zh-cn/v1/configuration/output-plugin)
-
-
-### Example
-
-```
-file {
- path = "file:///var/logs"
- format = "text"
-}
-```
diff --git a/docs/configuration/output-plugins/Hdfs.md b/docs/configuration/output-plugins/Hdfs.md
deleted file mode 100644
index fb10244..0000000
--- a/docs/configuration/output-plugins/Hdfs.md
+++ /dev/null
@@ -1,74 +0,0 @@
-## Output plugin : Hdfs
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.0.0
-
-### Description
-
-输出数据到HDFS文件
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [options](#options-object) | object | no | - |
-| [partition_by](#partition_by-array) | array | no | - |
-| [path](#path-string) | string | yes | - |
-| [path_time_format](#path_time_format-string) | string | no | yyyyMMddHHmmss |
-| [save_mode](#save_mode-string) | string | no | error |
-| [format](#format-string) | string | no | json |
-| [common-options](#common-options-string)| string | no | - |
-
-
-##### options [object]
-
-自定义参数
-
-##### partition_by [array]
-
-根据所选字段对数据进行分区
-
-##### path [string]
-
-Hadoop集群文件路径,以hdfs://开头
-
-##### path_time_format [string]
-
-当`path`参数中的格式为`xxxx-${now}`时,`path_time_format`可以指定HDFS路径的时间格式,默认值为 `yyyy.MM.dd`。常用的时间格式列举如下:
-
-| Symbol | Description |
-| --- | --- |
-| y | Year |
-| M | Month |
-| d | Day of month |
-| H | Hour in day (0-23) |
-| m | Minute in hour |
-| s | Second in minute |
-
-详细的时间格式语法见[Java SimpleDateFormat](https://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html)。
-
-##### save_mode [string]
-
-存储模式,当前支持overwrite,append,ignore以及error。每个模式具体含义见[save-modes](http://spark.apache.org/docs/2.2.0/sql-programming-guide.html#save-modes)
-
-##### format [string]
-
-序列化方法,当前支持csv、json、parquet、orc和text
-
-##### common options [string]
-
-`Output` 插件通用参数,详情参照 [Output Plugin](/zh-cn/v1/configuration/output-plugin)
-
-
-### Example
-
-```
-hdfs {
- path = "hdfs:///var/logs-${now}"
- format = "json"
- path_time_format = "yyyy.MM.dd"
-}
-```
-
-> 按天生成HDFS文件,例如**logs-2018.02.12**
diff --git a/docs/configuration/output-plugins/Hive.md b/docs/configuration/output-plugins/Hive.md
deleted file mode 100644
index 13fc013..0000000
--- a/docs/configuration/output-plugins/Hive.md
+++ /dev/null
@@ -1,73 +0,0 @@
-# Output plugin : **Hive**
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.5.1
-
-### Description
-
-写入数据到[Apache Hive](https://hive.apache.org)表中
-
-### Options
-
-| name | type | required | default value |
-| --------------------------------------- | ------------- | -------- | ------------- |
-| [sql](#hql) | string | no | - |
-| [source_table_name](#source_table_name) | string | No | - |
-| [result_table_name](#result_table_name) | string | no | - |
-| [sink_columns](#sink_columns) | string | no | - |
-| [save_mode](#save_mode) | string | no | - |
-| [partition_by](#partition_by) | Array[string] | no | - |
-
-##### sql[string]
-
-标准的hql语句:insert into/overwrite $table select * from xxx_table
-
-如果有这个option,会忽略其他的option
-
-##### Source_table_name [string]
-
-准备输出到hive的表名
-
-##### result_table_name [string]
-
-结果在hive中的存储表名
-
-##### save_mode [string]
-
-写入spark中采取的模式,与spark.mode语义相同
-
-##### sink_columns[string]
-
-选择source_table_name中的需要的字段,存储在result_table_name中,字段间逗号分隔
-
-##### partition_by[Array[string]]
-
-hive分区
-
-### Example
-
-```conf
-output {
- Hive {
- sql = "insert overwrite table seatunnel.test1 partition(province) select name,age,province from myTable2"
- }
-}
-```
-
-
-
-
-
-```conf
-output {
- Hive {
- source_table_name = "myTable2"
- result_table_name = "seatunnel.test1"
- save_mode = "overwrite"
- sink_columns = "name,age,province"
- partition_by = ["province"]
- }
-}
-```
-
diff --git a/docs/configuration/output-plugins/Jdbc.docs b/docs/configuration/output-plugins/Jdbc.docs
deleted file mode 100644
index 659264a..0000000
--- a/docs/configuration/output-plugins/Jdbc.docs
+++ /dev/null
@@ -1,14 +0,0 @@
-@seatunnelPlugin
-@pluginGroup output
-@pluginName Jdbc
-@pluginDesc "通过JDBC输出数据到外部数据源"
-@pluginAuthor InterestingLab
-@pluginHomepage https://interestinglab.github.io/seatunnel-docs
-@pluginVersion 1.0.0
-
-@pluginOption string driver="-" yes ""
-@pluginOption string url="-" yes ""
-@pluginOption string table="-" yes ""
-@pluginOption string user="-" yes ""
-@pluginOption string password="-" yes ""
-@pluginOption string save_mode="append" no ""
diff --git a/docs/configuration/output-plugins/Jdbc.md b/docs/configuration/output-plugins/Jdbc.md
deleted file mode 100644
index f964fee..0000000
--- a/docs/configuration/output-plugins/Jdbc.md
+++ /dev/null
@@ -1,88 +0,0 @@
-## Output plugin : Jdbc
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.0.0
-
-### Description
-
-通过JDBC输出数据到外部数据源
-
-### Options
-
-| name | type | required | default value | engine |
-| --- | --- | --- | --- |--- |
-| [driver](#driver-string) | string | yes | - |all streaming |
-| [password](#password-string) | string | yes | - |all streaming |
-| [save_mode](#save_mode-string) | string | no | append |spark streaming |
-| [table](#table-string) | string | yes | - |all streaming |
-| [url](#url-string) | string | yes | - |all streaming |
-| [user](#user-string) | string | yes | - |all streaming |
-| [jdbc.*](#jdbc.*-string) | string | no | - |structured streaming |
-| [output_sql](#output_sql-string) | string | yes | - |structured streaming |
-| [common-options](#common-options-string)| string | no | - | all streaming|
-
-
-##### driver [string]
-
-用来连接远端数据源的JDBC类名
-
-##### password [string]
-
-密码
-
-##### save_mode [string]
-
-存储模式,当前支持overwrite,append,ignore以及error。每个模式具体含义见[save-modes](http://spark.apache.org/docs/2.2.0/sql-programming-guide.html#save-modes)
-
-##### table [string]
-
-表名
-
-##### url [string]
-
-JDBC连接的URL。参考一个案例: `jdbc:postgresql://localhost/test`
-
-
-##### user [string]
-
-用户名
-
-##### jdbc.* [string]
-
-阿里druid连接池配置,详见https://github.com/alibaba/druid/wiki/DruidDataSource%E9%85%8D%E7%BD%AE%E5%B1%9E%E6%80%A7%E5%88%97%E8%A1%A8
-在其列表属性之前添加jdbc.前缀,如配置initialSize(初始化连接池大小),jdbc.initialSize="1"
-
-##### output_sql [string]
-
-输出到jdbc的sql,例如 `insert ignore into test(age,name,city) values(?,?,?)`,`replace into test(age,name,city) values(?,?,?)`。注意的是,字段的顺序需要与`source_table_name(来自input或者filter)`的schema顺序一致
-
-
-##### common options [string]
-
-`Output` 插件通用参数,详情参照 [Output Plugin](/zh-cn/v1/configuration/output-plugin)
-
-
-### Example
-> spark streaming
-```
-jdbc {
- driver = "com.mysql.jdbc.Driver"
- url = "jdbc:mysql://localhost:3306/info"
- table = "access"
- user = "username"
- password = "password"
- save_mode = "append"
-}
-```
-> structured streaming
-```
-jdbc {
- driver = "com.mysql.jdbc.Driver"
- url = "jdbc:mysql://localhost:3306/info"
- table = "access"
- user = "username"
- password = "password"
-}
-```
-
diff --git a/docs/configuration/output-plugins/Kafka.docs b/docs/configuration/output-plugins/Kafka.docs
deleted file mode 100644
index 8ef4a4b..0000000
--- a/docs/configuration/output-plugins/Kafka.docs
+++ /dev/null
@@ -1,10 +0,0 @@
-@seatunnelPlugin
-@pluginGroup output
-@pluginName Kafka
-@pluginDesc "输出Dataframe到Kafka"
-@pluginAuthor InterestingLab
-@pluginHomepage https://interestinglab.github.io/seatunnel-docs
-@pluginVersion 1.0.0
-
-@pluginOption string topic="-" yes "Kafka Topic"
-@pluginOption string bootstrap.servers="-" yes "Kafka Brokers"
diff --git a/docs/configuration/output-plugins/Kafka.md b/docs/configuration/output-plugins/Kafka.md
deleted file mode 100644
index b21a5b6..0000000
--- a/docs/configuration/output-plugins/Kafka.md
+++ /dev/null
@@ -1,79 +0,0 @@
-## Output plugin : Kafka
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.0.0
-
-### Description
-
-输出数据到Kafka
-
-### Options
-
-| name | type | required | default value | engine |
-| --- | --- | --- | --- | --- |
-| [producer.bootstrap.servers](#producerbootstrapservers-string) | string | yes | - | all streaming |
-| [topic](#topic-string) | string | yes | - | all streaming |
-| [producer.*](#producer-string) | string | no | - | all streaming |
-| [format](#format-string) | string | no | json | all streaming |
-| [streaming_output_mode](#streaming_output_mode-string) | string | no | append | structured streaming |
-| [checkpointLocation](#checkpointLocation-string) | string | no | - | structured streaming |
-| [common-options](#common-options-string)| string | no | all streaming |
-
-
-##### producer.bootstrap.servers [string]
-
-Kafka Brokers List
-
-##### topic [string]
-
-Kafka Topic
-
-##### producer [string]
-
-除了以上必备的kafka producer客户端必须指定的参数外,用户还可以指定多个producer客户端非必须参数,覆盖了[kafka官方文档指定的所有producer参数](http://kafka.apache.org/documentation.html#producerconfigs).
-
-指定参数的方式是在原参数名称上加上前缀"producer.",如指定`request.timeout.ms`的方式是: `producer.request.timeout.ms = 60000`。如果不指定这些非必须参数,它们将使用Kafka官方文档给出的默认值。
-
-
-###### Notes
-
-在作为structured streaming 的output的时候,你可以添加一些额外的参数,来达到相应的效果
-
-##### checkpointLocation [string]
-
-你可以指定是否启用checkpoint,通过配置**checkpointLocation**这个参数
-
-##### streaming_output_mode [string]
-
-你可以指定输出模式,complete|append|update三种,详见Spark文档http://spark.apache.org/docs/latest/structured-streaming-programming-guide.html#output-modes
-
-##### format [string]
-
-序列化方法,当前支持json和text,如果选择text方式,需保证数据结构中仅有一个字段。
-
-##### common options [string]
-
-`Output` 插件通用参数,详情参照 [Output Plugin](/zh-cn/v1/configuration/output-plugin)
-
-
-### Examples
-
-> spark streaming or batch
-
-```
-kafka {
- topic = "seatunnel"
- producer.bootstrap.servers = "localhost:9092"
-}
-```
-> structured streaming
-
-```
-kafka {
- topic = "seatunnel"
- producer.bootstrap.servers = "localhost:9092"
- streaming_output_mode = "update"
- checkpointLocation = "/your/path"
-}
-```
diff --git a/docs/configuration/output-plugins/Kudu.docs b/docs/configuration/output-plugins/Kudu.docs
deleted file mode 100644
index f6be56b..0000000
--- a/docs/configuration/output-plugins/Kudu.docs
+++ /dev/null
@@ -1,12 +0,0 @@
-@seatunnelPlugin
-@pluginGroup output
-@pluginName Kudu
-@pluginDesc "写入数据到[Apache Kudu](https://kudu.apache.org)表中"
-@pluginAuthor InterestingLab
-@pluginHomepage https://interestinglab.github.io/seatunnel-docs
-@pluginVersion 1.0.0
-
-@pluginOption
-string kudu_master yes "kudu的master,多个master以逗号隔开"
-string kudu_table yes "kudu中要写入的表名,表必须已经存在"
-string mode="insert" no "写入kudu模式 insert|update|upsert|insertIgnore"
diff --git a/docs/configuration/output-plugins/Kudu.md b/docs/configuration/output-plugins/Kudu.md
deleted file mode 100644
index 646ef23..0000000
--- a/docs/configuration/output-plugins/Kudu.md
+++ /dev/null
@@ -1,48 +0,0 @@
-## Output plugin : Kudu
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.1.2
-
-### Description
-
-写入数据到[Apache Kudu](https://kudu.apache.org)表中
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [kudu_master](#kudu_master-string) | string | yes | - |
-| [kudu_table](#kudu_table) | string | yes | - |
-| [mode](#mode-string) | string | no | insert |
-| [common-options](#common-options-string)| string | no | - |
-
-
-##### kudu_master [string]
-
-kudu的master,多个master以逗号隔开
-
-##### kudu_table [string]
-
-kudu中要写入的表名,表必须已经存在
-
-##### mode [string]
-
-写入kudu中采取的模式,支持 insert|update|upsert|insertIgnore,默认为insert
-insert和insertIgnore :insert在遇见主键冲突将会报错,insertIgnore不会报错,将会舍弃这条数据
-update和upsert :update找不到要更新的主键将会报错,upsert不会,将会把这条数据插入
-
-##### common options [string]
-
-`Output` 插件通用参数,详情参照 [Output Plugin](/zh-cn/v1/configuration/output-plugin)
-
-
-### Example
-
-```
-kudu{
- kudu_master="hadoop01:7051,hadoop02:7051,hadoop03:7051"
- kudu_table="my_kudu_table"
- mode="upsert"
- }
-```
diff --git a/docs/configuration/output-plugins/MongoDB.docs b/docs/configuration/output-plugins/MongoDB.docs
deleted file mode 100644
index 8337433..0000000
--- a/docs/configuration/output-plugins/MongoDB.docs
+++ /dev/null
@@ -1,12 +0,0 @@
-@seatunnelPlugin
-@pluginGroup output
-@pluginName MongoDB
-@pluginDesc "写入数据到[MongoDB](https://www.mongodb.com/)"
-@pluginAuthor InterestingLab
-@pluginHomepage https://interestinglab.github.io/seatunnel-docs
-@pluginVersion 1.0.0
-
-@pluginOption
-string readConfig.uri yes "mongoDB uri"
-string readConfig.database yes "要写入的database"
-string readConfig.collection yes "要写入的collection"
diff --git a/docs/configuration/output-plugins/MongoDB.md b/docs/configuration/output-plugins/MongoDB.md
deleted file mode 100644
index bc8184f..0000000
--- a/docs/configuration/output-plugins/MongoDB.md
+++ /dev/null
@@ -1,92 +0,0 @@
-## Output plugin : MongoDB
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.1.2
-
-### Description
-
-写入数据到[MongoDB](https://www.mongodb.com/)
-
-### Options
-
-| name | type | required | default value | engine |
-| --- | --- | --- | --- |--- |
-| [writeconfig.uri](#writeconfig.uri-string) | string | yes | - | spark streaming |
-| [writeconfig.database](#writeconfig.database-string) | string | yes | - | all streaming |
-| [writeconfig.collection](#writeconfig.collection-string) | string | yes | - | all streaming |
-| [writeconfig.*](#writeconfig.*-string) | string | no | - | spark streaming |
-| [writeconfig.host](#writeconfig.port-integer) | string | yes | - | structured streaming |
-| [writeconfig.port](#writeconfig.port-integer) | integer | no | 27017 | structured streaming |
-| [update_fields](#update_fields-string) | string | no | - | structured streaming |
-| [mongo_output_mode](#mongo_output_mode-string) | string | no | insert | structured streaming |
-| [streaming_output_mode](#streaming_output_mode-string) | string | no | append | structured streaming |
-| [checkpointLocation](#checkpointLocation-string) | string | no | - | structured streaming |
-| [common-options](#common-options-string)| string | no | - |
-
-
-##### writeconfig.uri [string]
-
-要写入mongoDB的uri
-
-##### writeconfig.database [string]
-
-要写入mongoDB的database
-
-##### writeconfig.collection [string]
-
-要写入mongoDB的collection
-
-##### writeconfig
-
-这里还可以配置更多其他参数,详见https://docs.mongodb.com/spark-connector/v1.1/configuration/
-, 参见其中的`Output Configuration`部分
-指定参数的方式是在原参数名称上加上前缀"writeconfig." 如设置`localThreshold`的方式是 `writeconfig.localThreshold=20`。如果不指定这些非必须参数,将使用MongoDB官方文档的默认值
-
-
-#### Notes
-在作为structured streaming 的output的时候,你可以添加一些额外的参数,来达到相应的效果
-
-##### writeconfig.port [integer]
-如果你的mongoDB 的端口不是默认的27017,你可以手动指定
-
-##### mongo_output_mode [string]
-写入mongo中采取的模式,支持 insert|updateOne|updateMany|upsert|replace,默认为insert
-
-##### update_fields [string]
-当你指定的模式是更新或者是替代的是,你需要指定根据哪些字段去更新。根据多个字段更新字段用逗号隔开,例如根据学号和姓名字段更新则:update_fields = "id,name"
-
-##### checkpointLocation [string]
-你可以指定是否启用checkpoint,通过配置**checkpointLocation**这个参数
-
-##### streaming_output_mode [string]
-你可以指定输出模式,complete|append|update三种,详见Spark文档http://spark.apache.org/docs/latest/structured-streaming-programming-guide.html#output-modes
-
-##### common options [string]
-
-`Output` 插件通用参数,详情参照 [Output Plugin](/zh-cn/v1/configuration/output-plugin)
-
-> spark streaming or batch
-
-```
-mongodb{
- writeconfig.uri="mongodb://myhost:mypost"
- writeconfig.database="mydatabase"
- writeconfig.collection="mycollection"
-}
-```
-
-> structured streaming
-
-```
-mongodb{
- writeconfig.host="my host"
- writeconfig.port=27017
- writeconfig.database="mydatabase"
- writeconfig.collection="mycollection"
- mongo_output_mode = "updateOne"
- update_fields = "id,name"
- streaming_output_mode = "update"
- checkpointLocation = "/your/path"
-}
-```
diff --git a/docs/configuration/output-plugins/MySQL.docs b/docs/configuration/output-plugins/MySQL.docs
deleted file mode 100644
index a736717..0000000
--- a/docs/configuration/output-plugins/MySQL.docs
+++ /dev/null
@@ -1,13 +0,0 @@
-@seatunnelPlugin
-@pluginGroup output
-@pluginName Mysql
-@pluginDesc "输出数据到MySQL"
-@pluginAuthor InterestingLab
-@pluginHomepage https://interestinglab.github.io/seatunnel-docs
-@pluginVersion 1.0.0
-
-@pluginOption string url="-" yes ""
-@pluginOption string table="-" yes ""
-@pluginOption string user="-" yes ""
-@pluginOption string password="-" yes ""
-@pluginOption string save_mode="append" no ""
diff --git a/docs/configuration/output-plugins/MySQL.md b/docs/configuration/output-plugins/MySQL.md
deleted file mode 100644
index c613667..0000000
--- a/docs/configuration/output-plugins/MySQL.md
+++ /dev/null
@@ -1,61 +0,0 @@
-## Output plugin : Mysql
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.0.0
-
-### Description
-
-输出数据到MySQL
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [password](#password-string) | string | yes | - |
-| [save_mode](#save_mode-string) | string | no | append |
-| [table](#table-string) | string | yes | - |
-| [url](#url-string) | string | yes | - |
-| [user](#user-string) | string | yes | - |
-| [common-options](#common-options-string)| string | no | - |
-
-
-##### password [string]
-
-密码
-
-##### save_mode [string]
-
-存储模式,当前支持overwrite,append,ignore以及error。每个模式具体含义见[save-modes](http://spark.apache.org/docs/2.2.0/sql-programming-guide.html#save-modes)
-
-##### table [string]
-
-表名
-
-##### url [string]
-
-JDBC连接的URL。参考一个案例:`jdbc:mysql://localhose:3306/info`
-
-
-##### user [string]
-
-用户名
-
-##### common options [string]
-
-`Output` 插件通用参数,详情参照 [Output Plugin](/zh-cn/v1/configuration/output-plugin)
-
-
-### Example
-
-```
-mysql {
- url = "jdbc:mysql://localhost:3306/info"
- table = "access"
- user = "username"
- password = "password"
- save_mode = "append"
-}
-```
-
-> 将数据写入MySQL
diff --git a/docs/configuration/output-plugins/Opentsdb.md b/docs/configuration/output-plugins/Opentsdb.md
deleted file mode 100644
index d58b38a..0000000
--- a/docs/configuration/output-plugins/Opentsdb.md
+++ /dev/null
@@ -1,121 +0,0 @@
-## Output plugin : Opentsdb
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.3.0
-
-### Description
-
-输出数据到Opentsdb
-
-### Options
-
-| name | type | required | default value | engine |
-| --- | --- | --- | --- | --- |
-| [postUrl](#postUrl-string) | string | yes | - | structured streaming |
-| [metric](#metric-string) | string | yes | - | structured streaming |
-| [tags_fields](#tags_fields-array) | array | no | - | structured streaming |
-| [value_fields](#value_fields-array) | array | yes | - | structured streaming |
-| [timestamp](#user-string) | string | yes | - | structured streaming |
-| [streaming_output_mode](#streaming_output_mode-string) | string | no | append | structured streaming |
-| [trigger_type](#streaming_output_mode-string) | string | no | default | structured streaming |
-| [interval](#interval-string)| string | no | - | structured streaming |
-| [common-options](#common-options-string)| string | no | - | structured streaming |
-
-
-##### postUrl [string]
-
-输出到Opentsdb的http请求地址,示例:`http://localhost:4222/api/put?summary`
-
-##### metric [string]
-
-Opentsdb对应的metric,需要提前在服务器创建完成
-
-##### tags_fields [array]
-
-tags对象中包含的信息,会按照配置信息(K)和原始数据(v)形成 "K" -> V键值对
-
-##### value_fields [string]
-
-Opentsdb的Value信息,会根据配置信息将原始数据变成多行Opentsdb支持的数据行,转化规则见*示例说明*
-
-##### timestamp [string]
-
-时间戳字段
-
-##### streaming_output_mode [string]
-
-输出模式,支持 `Append` 、`Update` 或 `Complete`。
-
-
-##### trigger_type
-
-Trigger Type, 支持default、ProcessingTime、OneTime 和 Continuous
-
-##### interval [string]
-
-Trigger触发周期。 当 trigger_type 为ProcessingTime 或 Continuous 时配置。
-
-##### common options [string]
-
-`Output` 插件通用参数,详情参照 [Output Plugin](/zh-cn/v1/configuration/output-plugin)
-
-### Example
-
-```
-opentsdb{
- postUrl = "http://localhost:4222/api/put?summary"
- metric = "test_metric"
- tags_fields = ["col1","col2","col3"]
- measures = ["men1","men2"]
- value_fields = "timestamps"
- }
-```
-
-### 示例说明
-
-##### Schema信息
-
-Schema[(col1,DataType), (col2,DataType), (col3,DataType), (col4,DataType), (men1,LongType), (men2,LongType), (men3,LongType), (time,TimeStamp)]
-
-##### 原始数据
-
-Row("v1", "v2", "v3", "v4", 123, 22, 33, 1553081227)
-
-##### 转换规则
-
-1. 首先根据tags_fields,生成一个tags对象
-
-```
-{
- "col1": "v1",
- "col2" : "v2",
- "col3" : "v3"
-}
-```
-
-2. 根据value_fields,生成2行数据,并更新tags字段(默认生成一个key为st_group的字段)
-```
-{
- "metric": "test_metric",
- "timestamp": 1553081227,
- "value": 123
- "tags": {
- "col1" : "v1",
- "col2" : "v2",
- "col3" : "v3",
- "st_group": "men1"
- }
-}
-```
-
-3. 根据第二步一样生成第二行数据, 其中变化为
-
-```
-{
- ...
- "tags":{
- "st_group":"men2"
- }
-}
-```
diff --git a/docs/configuration/output-plugins/S3.md b/docs/configuration/output-plugins/S3.md
deleted file mode 100644
index b887842..0000000
--- a/docs/configuration/output-plugins/S3.md
+++ /dev/null
@@ -1,69 +0,0 @@
-## Output plugin : S3
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.0.0
-
-### Description
-
-输出数据到S3文件
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [options](#options-object) | object | no | - |
-| [partition_by](#partition_by-array) | array | no | - |
-| [path](#path-string) | string | yes | - |
-| [path_time_format](#path_time_format-string) | string | no | yyyyMMddHHmmss |
-| [save_mode](#save_mode-string) | string | no | error |
-| [format](#format-string) | string | no | json |
-| [common-options](#common-options-string)| string | no | - |
-
-##### options [object]
-
-自定义参数
-
-##### partition_by [array]
-
-根据所选字段对数据进行分区
-
-##### path [string]
-
-AWS S3文件路径,以s3://,s3a://或s3n://开头
-
-##### path_time_format [string]
-
-当`path`参数中的格式为`xxxx-${now}`时,`path_time_format`可以指定路径的的时间格式,默认值为 `yyyy.MM.dd`。常用的时间格式列举如下:
-
-| Symbol | Description |
-| --- | --- |
-| y | Year |
-| M | Month |
-| d | Day of month |
-| H | Hour in day (0-23) |
-| m | Minute in hour |
-| s | Second in minute |
-
-详细的时间格式语法见[Java SimpleDateFormat](https://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html)。
-
-##### save_mode [string]
-
-存储模式,当前支持overwrite,append,ignore以及error。每个模式具体含义见[save-modes](http://spark.apache.org/docs/2.2.0/sql-programming-guide.html#save-modes)
-
-##### format [string]
-
-序列化方法,当前支持csv、json、parquet和text
-
-##### common options [string]
-
-`Output` 插件通用参数,详情参照 [Output Plugin](/zh-cn/v1/configuration/output-plugin)
-
-### Example
-
-```
-s3 {
- path = "s3a://var/logs"
- format = "parquet"
-}
-```
diff --git a/docs/configuration/output-plugins/Stdout.md b/docs/configuration/output-plugins/Stdout.md
deleted file mode 100644
index c28010d..0000000
--- a/docs/configuration/output-plugins/Stdout.md
+++ /dev/null
@@ -1,41 +0,0 @@
-## Output plugin : Stdout
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.0.0
-
-### Description
-
-输出数据到标准输出/终端, 常用于debug, 能够很方便输出数据.
-
-### Options
-
-| name | type | required | default value | engine |
-| --- | --- | --- | --- | --- |
-| [limit](#limit-number) | number | no | 100 | batch/spark streaming |
-| [format](#format-string) | string | no | plain | batch/spark streaming |
-| [common-options](#common-options-string)| string | no | - | all streaming |
-
-##### limit [number]
-
-限制输出Row的条数,合法范围[-1, 2147483647], `-1`表示输出最多2147483647条Row
-
-##### format [string]
-
-输出到终端的格式,可用的`format`包括: `json`, `plain` 以及 `schema`,用于输出数据的 **Schema**
-
-##### common options [string]
-
-`Output` 插件通用参数,详情参照 [Output Plugin](/zh-cn/v1/configuration/output-plugin)
-
-
-### Example
-
-```
-stdout {
- limit = 10
- format = "json"
-}
-```
-
-> 以Json格式输出10条数据
diff --git a/docs/configuration/output-plugins/Tidb.md b/docs/configuration/output-plugins/Tidb.md
deleted file mode 100644
index 3d32b9d..0000000
--- a/docs/configuration/output-plugins/Tidb.md
+++ /dev/null
@@ -1,73 +0,0 @@
-## Output plugin : TiDB
-
-* Author: InterestingLab
-* Homepage: https://interestinglab.github.io/seatunnel-docs
-* Version: 1.1.5
-
-### Description
-
-通过JDBC将数据写入[TiDB](https://github.com/pingcap/tidb)
-
-### Options
-
-| name | type | required | default value |
-| --- | --- | --- | --- |
-| [batchsize](#batchsize-number) | number | no | 150 |
-| [isolationLevel](#isolationLevel-string) | string | no | NONE |
-| [password](#password-string) | string | yes | - |
-| [save_mode](#save_mode-string) | string | no | append |
-| [table](#table-string) | string | yes | - |
-| [url](#url-string) | string | yes | - |
-| [user](#user-string) | string | yes | - |
-| [useSSL](#useSSL-boolean) | boolean | no | false |
-| [common-options](#common-options-string)| string | no | - |
-
-##### batchsize [number]
-
-写入批次大小
-
-##### isolationLevel [string]
-
-Isolation level means whether do the resolve lock for the underlying tidb clusters.
-
-##### password [string]
-
-密码
-
-##### save_mode [string]
-
-存储模式,当前支持overwrite,append,ignore以及error。每个模式具体含义见[save-modes](http://spark.apache.org/docs/2.2.0/sql-programming-guide.html#save-modes)
-
-##### table [string]
-
-表名
-
-##### url [string]
-
-JDBC连接的URL。参考一个案例: `jdbc:mysql://127.0.0.1:4000/test?rewriteBatchedStatements=true`
-
-
-##### user [string]
-
-用户名
-
-##### useSSL [boolean]
-
-useSSL
-
-##### common options [string]
-
-`Output` 插件通用参数,详情参照 [Output Plugin](/zh-cn/v1/configuration/output-plugin)
-
-
-### Example
-
-```
-tidb {
- url = "jdbc:mysql://127.0.0.1:4000/test?useUnicode=true&characterEncoding=utf8"
- table = "access"
- user = "username"
- password = "password"
- save_mode = "append"
-}
-```
diff --git a/docs/contribution/_category_.json b/docs/contribution/_category_.json
deleted file mode 100644
index 6997c7c..0000000
--- a/docs/contribution/_category_.json
+++ /dev/null
@@ -1,4 +0,0 @@
-{
- "label": "参与贡献",
- "position": 8
-}
\ No newline at end of file
diff --git a/docs/contribution/contribution.md b/docs/contribution/contribution.md
deleted file mode 100644
index 89fad6e..0000000
--- a/docs/contribution/contribution.md
+++ /dev/null
@@ -1,70 +0,0 @@
-# 为 seatunnel 贡献代码
-
-## Coding Style
-
-Scala Coding Style 参考:
-
-http://docs.scala-lang.org/style/
-
-https://github.com/databricks/scala-style-guide
-
-使用sbt插件[scalastyle](http://www.scalastyle.org/)作为coding style检查工具;无法通过coding style检查的代码无法提交.
-
-通过scalafmt利用[Cli或者IntelliJ Idea](http://scalameta.org/scalafmt/#IntelliJ)自动完成scala代码的格式化。
-
-如果使用scalafmt的Idea插件,请在插件安装完后设置`文件保存时自动更正代码格式`,方法 "Preferences" -> "Tools" -> "Scalafmt", 勾选"format on file save"
-
-## 代码/文档贡献流程
-
-* Interesting Lab成员 :
-
-(1) 从 master上 checkout 出新分支,分支名称要求新功能:
-`<username>`.fea.`<feature_name>`,修复bug: `<username>`.fixbug.`<bugname_or_issue_id>`, 文档:`<username>`.doc.`<doc_name>`
-
-(2) 开发, 提交commit
-
-(3) 在github的项目主页,选中你的分支,点"new pull request",提交pull request
-
-(3) 经至少1个其他成员审核通过,并且travis-ci的build全部通过后,由审核人merge到master分支中.
-
-(4) 删除你的分支
-
-* 非Interesting Lab 成员(常见的github协作流程):
-
-(1) 在seatunnel主页 fork 这个项目 https://github.com/InterestingLab/seatunnel
-
-(2) 开发
-
-(3) 提交commit
-
-(4) 在你自己的项目主页上,点"new pull request",提交pull request
-
-(5) Interesting Lab 审核通过后,你的贡献将被纳入项目代码中。
-
-## 自动化Build与Test
-
-此项目使用 [travis-ci](https://travis-ci.org/) 作为自动化Build工具.
-
-所有分支每次commit有更新,都会触发自动化Build,新的pull request也会触发。
-
-## 国内sbt加速
-
-```textmate
-
-// 增加全局 repositories 配置, 加速依赖下载
-
-vim ~/.sbt/repository
-
-[repositories]
-local
-aliyun-ivy: http://maven.aliyun.com/nexus/content/groups/public, [organization]/[module]/(scala_[scalaVersion]/)(sbt_[sbtVersion]/)[revision]/[type]s/[artifact](-[classifier]).[ext]
-aliyun-maven: http://maven.aliyun.com/nexus/content/groups/public
-typesafe: http://repo.typesafe.com/typesafe/ivy-releases/, [organization]/[module]/(scala_[scalaVersion]/)(sbt_[sbtVersion]/)[revision]/[type]s/[artifact](-[classifier]).[ext], bootOnly
-typesafe2: http://repo.typesafe.com/typesafe/releases/
-sbt-plugin: http://repo.scala-sbt.org/scalasbt/sbt-plugin-releases/
-sonatype: http://oss.sonatype.org/content/repositories/snapshots
-uk_maven: http://uk.maven.org/maven2/
-repo2: http://repo2.maven.org/maven2/
-
-
-```
\ No newline at end of file
diff --git a/docs/deployment/_category_.json b/docs/deployment/_category_.json
deleted file mode 100644
index 2094b61..0000000
--- a/docs/deployment/_category_.json
+++ /dev/null
@@ -1,4 +0,0 @@
-{
- "label": "安装部署",
- "position": 2
-}
\ No newline at end of file
diff --git a/docs/deployment/deployment.md b/docs/deployment/deployment.md
deleted file mode 100644
index 588355d..0000000
--- a/docs/deployment/deployment.md
+++ /dev/null
@@ -1,58 +0,0 @@
-# 部署与运行
-
-> seatunnel 依赖Java运行环境和Spark,详细的seatunnel 安装步骤参考[安装seatunnel](/zh-cn/v1/installation)
-
-下面重点说明不同平台的运行方式:
-
-### 在本地以local方式运行seatunnel
-
-```
-./bin/start-seatunnel.sh --master local[4] --deploy-mode client --config ./config/application.conf
-```
-
-### 在Spark Standalone集群上运行seatunnel
-
-```
-# client 模式
-./bin/start-seatunnel.sh --master spark://207.184.161.138:7077 --deploy-mode client --config ./config/application.conf
-
-# cluster 模式
-./bin/start-seatunnel.sh --master spark://207.184.161.138:7077 --deploy-mode cluster --config ./config/application.conf
-```
-
-### 在Yarn集群上运行seatunnel
-
-```
-# client 模式
-./bin/start-seatunnel.sh --master yarn --deploy-mode client --config ./config/application.conf
-
-# cluster 模式
-./bin/start-seatunnel.sh --master yarn --deploy-mode cluster --config ./config/application.conf
-```
-
-### 在Mesos上运行seatunnel
-
-```
-# cluster 模式
-./bin/start-seatunnel.sh --master mesos://207.184.161.138:7077 --deploy-mode cluster --config ./config/application.conf
-```
-
----
-
-start-seatunnel.sh 的`master`, `deploy-mode`参数的含义与Spark `master`, `deploy-mode`相同,
-可参考: [Spark Submitting Applications](http://spark.apache.org/docs/latest/submitting-applications.html)
-
-如果要指定seatunnel运行时占用的资源大小,或者其他Spark参数,可以在`--config`指定的配置文件里面指定:
-
-```
-spark {
- spark.executor.instances = 2
- spark.executor.cores = 1
- spark.executor.memory = "1g"
- ...
-}
-...
-
-```
-
-关于如何配置seatunnel, 请见[seatunnel 配置](/zh-cn/v1/configuration/base)
diff --git a/docs/deployment/installation.md b/docs/deployment/installation.md
deleted file mode 100644
index f2a13eb..0000000
--- a/docs/deployment/installation.md
+++ /dev/null
@@ -1,34 +0,0 @@
-# 下载、安装
-
-## 下载
-
-### 社区版本(Community)
-
-https://github.com/InterestingLab/seatunnel/releases
-
-## 环境准备
-
-### 准备好JDK1.8
-
-seatunnel 依赖JDK1.8运行环境。
-
-### 准备好Spark
-
-seatunnel 依赖Spark,安装seatunnel前,需要先准备好Spark。
-请先[下载Spark](http://spark.apache.org/downloads.html), Spark版本请选择 >= 2.x.x。下载解压后,不需要做任何配置即可提交Spark deploy-mode = local模式的任务。
-如果你期望任务运行在Standalone集群或者Yarn、Mesos集群上,请参考Spark官网配置文档。
-
-### 安装seatunnel
-
-下载seatunnel安装包并解压, 这里以社区版为例:
-
-```
-wget https://github.com/InterestingLab/seatunnel/releases/download/v<version>/seatunnel-<version>.zip -O seatunnel-<version>.zip
-unzip seatunnel-<version>.zip
-ln -s seatunnel-<version> seatunnel
-```
-
-没有任何复杂的安装配置步骤,seatunnel的使用方法请参考[Quick Start](#), 配置请参考[Configuration](/zh-cn/v1/configuration/base)。
-
-如果想把seatunnel部署在Spark Standalone/Yarn/Mesos集群上运行,请参考[seatunnel部署](/zh-cn/v1/deployment)
-
diff --git a/docs/development/_category_.json b/docs/development/_category_.json
deleted file mode 100644
index a3460cb..0000000
--- a/docs/development/_category_.json
+++ /dev/null
@@ -1,4 +0,0 @@
-{
- "label": "插件开发",
- "position": 4
-}
\ No newline at end of file
diff --git a/docs/development/development.md b/docs/development/development.md
deleted file mode 100644
index 75a2720..0000000
--- a/docs/development/development.md
+++ /dev/null
@@ -1,309 +0,0 @@
-# 插件开发
-
-
-## 插件体系介绍
-
-seatunnel插件分为三部分,**Input**、**Filter**和**Output**
-
-### Input
-
-**Input**负责将外部数据源的数据转化为`DStream[(String, String)]`
-
-### Filter
-
-**Filter**是[transform](http://spark.apache.org/docs/latest/rdd-programming-guide.html#transformations)操作,负责对Dataset[Row]的数据结构进行操作
-
-### Output
-
-**Output**是[action](http://spark.apache.org/docs/latest/rdd-programming-guide.html#actions)操作,负责将Dataset[Row]输出到外部数据源或者打印到终端
-
-## 准备工作
-
-seatunnel支持Java/Scala作为插件开发语言,其中**Input**插件推荐使用Scala作为开发语言,其余类型插件Java和Scala皆可。
-
-新建一个Java/Scala项目,或者可以直接拉取[seatunnel-filter-example](https://github.com/InterestingLab/seatunnel-filter-example),然后在此项目上进行修改
-
-## 一、 新建pom.xml
-
-参考文件[pom.xml](https://github.com/InterestingLab/seatunnel-filter-example/blob/master/pom.xml)
-
-将seatunnel提供的接口加入项目的依赖中
-```
-<dependency>
- <groupId>io.github.interestinglab.seatunnel</groupId>
- <artifactId>seatunnel-apis_2.11</artifactId>
- <version>1.1.0</version>
-</dependency>
-```
-
-## 二、 实现自己的方法
-
-### Input(实时流)
-
-- 新建一个类,并继承**seatunnel-apis**提供的父类`BaseInput`
- ```scala
- class ScalaHdfs extends BaseStreamingInput {
-
- var config: Config = ConfigFactory.empty()
-
- /**
- * Set Config.
- **/
- override def setConfig(config: Config): Unit = {
- this.config = config
- }
-
- /**
- * Get Config.
- **/
- override def getConfig(): Config = {
- this.config
- }
- ```
-- 重写父类定义的`checkConfig`、`prepare`和`getDstream`方法
- ```scala
- override def checkConfig(): (Boolean, String) = {}
- override def prepare(spark: SparkSession): Unit = {}
- override def getDStream(ssc: StreamingContext): DStream[(String, String)] = {}
-
- ```
-- **Input**插件在调用时会先执行`checkConfig`方法核对调用插件时传入的参数是否正确,然后调用`prepare`方法配置参数的缺省值以及初始化类的成员变量,最后调用`getStream`方法将外部数据源转换为`DStream[(String, String)]`
-- Scala版本**Input**插件实现参照[ScalaHdfs](https://github.com/InterestingLab/seatunnel-filter-example/blob/master/src/main/scala/org/interestinglab/seatunnel/input/ScalaHdfs.scala)
-
-
-### Filter
-
-- 新建一个类,并继承**seatunnel-apis**提供的父类`BaseFilter`
- ```Scala
- class ScalaSubstring extends BaseFilter {
-
- var config: Config = ConfigFactory.empty()
-
- /**
- * Set Config.
- **/
- override def setConfig(config: Config): Unit = {
- this.config = config
- }
-
- /**
- * Get Config.
- **/
- override def getConfig(): Config = {
- this.config
- }
- }
- ```
- ```Java
- public class JavaSubstring extends BaseFilter {
-
- private Config config;
-
- @Override
- public Config getConfig() {
- return config;
- }
-
- @Override
- public void setConfig(Config config) {
- this.config = config;
- }
- }
- ```
-- 重写父类定义的`checkConfig`、`prepare`和`process`方法
- ```Scala
- override def checkConfig(): (Boolean, String) = {}
- override def prepare(spark: SparkSession): Unit = {}
- override def process(spark: SparkSession, ds: Dataset[Row]): Dataset[Row] = {}
- ```
- ```Java
- @Override
- public Tuple2<Object, String> checkConfig() {}
- @Override
- public void prepare(SparkSession spark, StreamingContext ssc) {}
- @Override
- public Dataset<Row> process(SparkSession spark, Dataset<Row> df) {}
- ```
- - **Filter**插件在调用时会先执行`checkConfig`方法核对调用插件时传入的参数是否正确,然后调用`prepare`方法配置参数的缺省值以及初始化类的成员变量,最后调用`process`方法对 **Dataset[Row]** 格式数据进行处理。
- - Java版本**Filter**插件的实现参照[JavaSubstring](https://github.com/InterestingLab/seatunnel-filter-example/blob/master/src/main/java/org/interestinglab/seatunnel/filter/JavaSubstring.java),Scala版本**Filter**插件的实现参照[ScalaSubstring](https://github.com/InterestingLab/seatunnel-filter-example/blob/master/src/main/scala/org/interestinglab/seatunnel/filter/ScalaSubstring.scala)
-
-### Output
-
-- 新建一个类,并继承**seatunnel-apis**提供的父类`BaseOutput`
- ```Scala
- class ScalaStdout extends BaseOutput {
-
-
- var config: Config = ConfigFactory.empty()
-
- /**
- * Set Config.
- **/
- override def setConfig(config: Config): Unit = {
- this.config = config
- }
-
- /**
- * Get Config.
- **/
- override def getConfig(): Config = {
- this.config
- }
- }
- ```
- ```Java
- public class JavaStdout extends BaseOutput {
-
- private Config config;
-
- @Override
- public Config getConfig() {
- return config;
- }
-
- @Override
- public void setConfig(Config config) {
- this.config = config;
- }
- }
- ```
-- 重写父类定义的`checkConfig`、`prepare`和`process`方法
- ```Scala
- override def checkConfig(): (Boolean, String) = {}
- override def prepare(spark: SparkSession): Unit = {}
- override def process(spark: SparkSession, ds: Dataset[Row]): Dataset[Row] = {}
- ```
- ```Java
- @Override
- public Tuple2<Object, String> checkConfig() {}
- @Override
- public void prepare(SparkSession spark) {}
- @Override
- public Dataset<Row> process(SparkSession spark, Dataset<Row> ds) {}
- ```
- - **Output**插件调用结构与**Filter**插件相似。在调用时会先执行`checkConfig`方法核对调用插件时传入的参数是否正确,然后调用`prepare`方法配置参数的缺省值以及初始化类的成员变量,最后调用`process`方法将 **Dataset[Row]** 格式数据输出到外部数据源。
- - Java版本**Output**插件的实现参照[JavaStdout](https://github.com/InterestingLab/seatunnel-filter-example/blob/master/src/main/java/org/interestinglab/seatunnel/output/JavaStdout.java),Scala版本**Output**插件的实现参照[ScalaStdout](https://github.com/InterestingLab/seatunnel-filter-example/blob/master/src/main/scala/org/interestinglab/seatunnel/output/ScalaStdout.scala)
-
-### UDF
-
-- 新建一个类,并继承**seatunnel-apis**提供的父类`BaseFilter`
- ```Scala
- class ScalaSubstring extends BaseFilter {
-
- var config: Config = ConfigFactory.empty()
-
- /**
- * Set Config.
- **/
- override def setConfig(config: Config): Unit = {
- this.config = config
- }
-
- /**
- * Get Config.
- **/
- override def getConfig(): Config = {
- this.config
- }
- }
- ```
-- 重写父类定义的`checkConfig`、`prepare`、`getUdfList`和`process`方法,这里只介绍`getUdfList`以及`process`两个方法
- ```Scala
- override def getUdfList(): List[(String, UserDefinedFunction)] = {
- val func = udf((s: String, pos: Int, len: Int) => s.substring(pos, pos+len))
- List(("my_sub", func))
- }
- override def process(spark: SparkSession, ds: Dataset[Row]): Dataset[Row] = {
- val srcField = config.getString("source_field")
- val targetField = config.getString("target_field")
- val pos = config.getInt("pos")
- val len = config.getInt("len")
- val func = getUdfList().get(0)._2
- df.withColumn(targetField, func(col(srcField), lit(pos), lit(len)))
- }
- ```
- 具体UDF插件开发完整案例参照[ScalaSubstring](https://github.com/InterestingLab/seatunnel-example/blob/rickyhuo.fea.udf/src/main/scala/org/interestinglab/seatunnel/filter/ScalaSubstring.scala#L15)
-- 新建META-INF/services
-
- seatunnel会利用**Service loader**机制将实现`io.github.interestinglab.seatunnel.apis.BaseFilter`的方法根据`getUdfList`返回的方法注册为UDF,如果接口实现类不在services中注明,将不会注册为UDF。
-
- 案例中的[META-INF](https://github.com/InterestingLab/seatunnel-example/blob/master/src/main/resources/META-INF/services/io.github.interestinglab.seatunnel.apis.BaseFilter)
-
-## 三、 打包使用
-
-1. 打包
-
- > mvn package
-
-2. 将打包好的Jar包放到seatunnel `plugins`目录下
- ```shell
- cd seatunnel-1.1.0
- mkdir -p plugins/my_plugins/lib
- cd plugins/my_plugins/lib
- ```
-
- seatunnel需要将第三方Jar包放到,必须新建**lib**文件夹
- > plugins/your_plugin_name/lib/your_jar_name
-
- 其他文件放到
- > plugins/your_plugin_name/files/your_file_name
-
-3. 在配置文件中使用插件
-
- 以下是一个使用第三方插件的完整示例,并将其放至`config/application.conf`
-
- 由`Fake`插件生成测试数据,进行`Split`进行分割后,使用第三方插件`ScalaSubstring`进行字符串截取,最后使用第三方插件`JavaStdout`打印到终端。
- ```
- spark {
- spark.streaming.batchDuration = 5
- spark.app.name = "seatunnel-sample"
- spark.ui.port = 13000
- spark.executor.instances = 2
- spark.executor.cores = 1
- spark.executor.memory = "1g"
- }
-
- input {
- fakeStream {
- content = ["INFO : gary is 28 years old", "WARN : suwey is 16 years old"]
- rate = 5
- }
- }
-
- filter {
- split {
- fields = ["log_level", "message"]
- delimiter = ":"
- }
- sql = {
- table_name = "tmp"
- # 使用UDF
- sql = "select log_level, my_sub(message, 1, 3) from tmp"
- }
- }
-
- output {
- org.interestinglab.seatunnel.output.JavaStdout {
- limit = 2
- }
- }
- ```
-
-4. 启动seatunnel
-
- ```
- ./bin/start-seatunnel.sh --config config/application.conf --deploy-mode client --master local[2]
- ```
-
-5. 查看结果
-
- ```
- +---------+------------------+
- |log_level|UDF(message, 1, 3)|
- +---------+------------------+
- |INFO |ary |
- |INFO |ary |
- +---------+------------------+
- only showing top 2 rows
-
- ```
diff --git a/docs/internal/_category_.json b/docs/internal/_category_.json
deleted file mode 100644
index 36a09be..0000000
--- a/docs/internal/_category_.json
+++ /dev/null
@@ -1,4 +0,0 @@
-{
- "label": "深入了解",
- "position": 5
-}
\ No newline at end of file
diff --git a/docs/internal/internal.md b/docs/internal/internal.md
deleted file mode 100644
index de71932..0000000
--- a/docs/internal/internal.md
+++ /dev/null
@@ -1,34 +0,0 @@
-# 深入 seatunnel
-
-## seatunnel 努力改善多处痛点
-
-除了大大简化分布式数据处理难度外,seatunnel尽所能为您解决可能遇到的问题:
-
-* 数据丢失与重复
-
-如 seatunnel 的 Kafka Input 是通过 Kafka Direct API 实现的,同时通过checkpoint机制或者支持幂等写入的Output的支持,实现了exactly once操作。此外seatunnel的项目代码经过了详尽测试,尽可能减少了因数据处理异常导致的数据意外丢弃。
-
-* 任务堆积与延迟
-
-在线上环境,存在大量的Spark任务或者包含较多task的单个stage的Spark运行环境中,我们多次遇到单个task处理时间较长,拖慢了整个batch的情况。seatunnel默认开启了Spark推测执行的功能,推测执行功能会找到慢task并启动新的task,并以先完成的task作为结算结果。
-
-* 吞吐量低
-
-seatunnel 的代码实现中,直接利用了多项在实践中被证明有利于提升处理性能的Spark的高级特性,如:
-
-(1)在核心流程代码中,使用Dataset,Spark SQL 编程API,有效利用了Spark 的catalyst优化器。
-
-(2)支持插件实现中使用broadcast variable,对于IP库解析,写数据库链接维护这样的应用场景,能起到优化作用。
-
-(3)在插件的实现代码中,性能始终是我们优先考虑的因素。
-
-* 应用到生产环境周期长
-
-使用 seatunnel 可以做到开箱即用,在安装、部署、启动上做了多处简化;插件体系容易配置和部署,开发者能够很快在 seatunnel 中集成特定业务逻辑。
-
-* 缺少应用运行状态监控
-
-(1)seatunnel 自带监控工具 `Guardian`,是 seatunnel 的子项目,可监控 seatunnel 是否存活,并能够根据配置自动拉起 seatunnel 实例;可监控其运行时streaming batch是否存在堆积和延迟,并发送报警。
-
-(2)下一个release版本中将加入数据处理各阶段耗时统计,方便做性能优化。
-
diff --git a/docs/internal/monitoring.md b/docs/internal/monitoring.md
deleted file mode 100644
index 1362241..0000000
--- a/docs/internal/monitoring.md
+++ /dev/null
@@ -1,290 +0,0 @@
-# Guardian
-
-[Guardian](https://github.com/InterestingLab/guardian)是seatunnel的子项目,是一个监控和报警工具,可以提供seatunnel的存活情况监控以及调度延迟情况监控。Guardian能够在运行时动态加载配置文件,并提供HTTP API支持配置的实时修改。目前仅支持seatunnel on Yarn.
-
-## 运行Guardian
-
-下载Guardian,这里以guardian_1.0.0为例
-```
-wget https://github.com/InterestingLab/guardian/releases/download/v1.0.0/guardian_1.0.0.tar.gz
-tar -xvf guardian_1.0.0
-cd guardian_1.0.0
-./bin/guardian check config.json
-```
-
-
-## 配置文件
-
-Guardian配置文件由`JSON`格式编写,一个有效的实例,点击[这里](https://github.com/InterestingLab/guardian/blob/master/config.json.template)
-
-整个配置文件由以下几个部分组成:
-
-- port: 接口API绑定的端口
-- node_name: 节点信息
-- check_interval: 检测应用的时间间隔
-- yarn: 被检测的YARN集群地址
-- apps: 需要被检测的具体应用
-- alert_manager: 报警管理
-
-以下是各部分详细介绍:
-
-
-### yarn
-
-```
-# Yarn resourcemanager
-api_hosts: <list>
-```
-
-**Example**
-
-```
-"yarn": {
- "api_hosts": [
- "10.11.10.21:8088",
- "10.11.10.22:8088"
- ]
-}
-```
-
-### apps
-
-```
-[{
- # Spark application name
- "app_name": <string>,
- # 当应用失败时的重启命令
- "start_cmd": <string>,
- # 同一个app_name下的应用运行个数
- "app_num": <number>,
- # Application type, default 'spark'
- "check_type": <string>,
- # 标志这个应用是否有效
- "active": <boolean>
- "check_options": {
- # 报警级别,支持WARNNING、ERROR等
- "alert_level": <string>,
- "max_delayed_batch_num": <number>,
- "max_delayed_time": <number>
- }
-}]
-```
-
-**Example**
-
-```
-"apps": [
- {
- "app_name": "seatunnel-app",
- "start_cmd": "test_cmd",
- "app_num": 1,
- "check_type": "spark",
- "check_options": {
- "alert_level": "WARNING",
- "max_delayed_batch_num": 10,
- "max_delayed_time": 600
- }
- }
-]
-```
-
-### alert_manager
-
-#### routes
-
-报警路由,当前仅支持报警级别
-
-当报警级别为`WARNNING`或`ERROR`触发报警
-
-```
-"routes": {
- "match": {
- "level": ["WARNING", "ERROR"]
- }
-}
-```
-
-#### **emails**
-
-通过邮件发送报警信息
-
-```
-# 邮箱验证用户名
-"auth_username": <string>,
-# 邮箱验证密码
-"auth_password": <string>,
-# 邮箱stmp服务器
-"smtp_server": <string>,
-# 发件人
-"sender": <string>,
-# 收件人列表
-"receivers": <list>
-```
-
-**Example**
-
-```
-"emails": {
- "auth_username": "username",
- "auth_password": "password",
- "smtp_server": "smtp.163.com",
- "sender": "huochen1994@163.com",
- "receivers": ["garygaowork@gmail.com"],
- "routes": {
- "match": {
- "level": ["WARNING", "ERROR"]
- }
- }
-}
-```
-
-#### **webhook**
-
-通过接口实现自定义报警方式
-
-```
-# webhook接口地址
-"url": <string>
-```
-
-**Example**
-
-```
-"webhook": {
- "url": "http://api.webhook.interestinglab.org/alert",
- "routes": {
- "match": {
- "level": ["ERROR"]
- }
- }
-}
-```
-
-Gaurdian调用接口的时候会以下面JSON格式发送HTTP POST请求到配置的接口地址:
-
-```
-{
- "subject": "Guardian",
- "objects": "seatunnel_app",
- "content": "App is not running or less than expected number of running instance, will restart"
-}
-```
-
-
-## Guardian接口使用指南
-
-
-### GET
-
-#### 概述
-
-* 功能描述
-
- 获取Guardian对应app_name的配置信息
-
-* 基础接口
-
- http://localhost:5000/config/[app_name]
-
-* 请求方式
-
- get
-
-#### 接口参数定义
-
-N/A
-
-#### 返回结果
-
-```
-curl 'http://localhost:5000/config/seatunnel-app2'
-
-{
- "content": {
- "app_name": "seatunnel-app2",
- "app_num": 1,
- "check_options": {},
- "check_type": "spark",
- "start_cmd": "test_cmd_not_exist"
- },
- "status": 0
-}
-```
-
-### POST
-
-#### 概述
-
-* 功能描述
-
- 更新或新增Guardian中应用配置信息,当`app_name`存在,更新对应配置信息,当`app_name`不存在,新增一个应用监控配置
-
-* 基础接口
-
- http://localhost:5000/config/[app_name]
-
-* 请求方式
-
- post
-
-#### 接口参数定义
-
-| 字段 | 类型 | 注释 | 实例 |
-| :--: | :--: | :--: | :--:|
-| start_cmd| string| 重启命令| |
-|app_num| num | 存在个数 | 2 |
-|check_type| string | 应用类型 | spark |
-|check_options| dict| | |
-|active| boolean| 是否有效| true|
-
-#### 返回结果
-
-```
-curl 'http://localhost:5000/config/seatunnel-app2' -d '
-{
- 'active': false
-}'
-
-{
- "status": 0
-}
-```
-
-### DELETE
-
-#### 概述
-
-* 功能描述
-
- 删除Guardian对应app_name的配置信息
-
-* 基础接口
-
- http://localhost:5000/config/[app_name]
-
-* 请求方式
-
- delete
-
-#### 接口参数定义
-
-N/A
-
-#### 返回结果
-
-```
-curl -XDELETE 10.212.81.56:5000/config/seatunnel-app2
-
-{
- "status": 0
-}
-```
-
-
-### 返回状态码说明
-
-| status | 说明 |
-| :--: | :--:|
-| 0 | 成功|
-| 1 | 参数错误|
-| 2 | 内部错误|
diff --git a/docs/introduction.md b/docs/introduction.md
index 9802ab7..ffa1af8 100644
--- a/docs/introduction.md
+++ b/docs/introduction.md
@@ -1,172 +1,161 @@
---
-title: 简介
+title: Introduction
sidebar_position: 1
---
-# seatunnel
+# SeaTunnel
-[![Build Status](https://travis-ci.org/InterestingLab/seatunnel.svg?branch=master)](https://travis-ci.org/InterestingLab/seatunnel) [![Join the chat at https://gitter.im/interestinglab_seatunnel/Lobby](https://badges.gitter.im/interestinglab_seatunnel/Lobby.svg)](https://gitter.im/interestinglab_seatunnel/Lobby?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
+<img src="https://seatunnel.apache.org/image/logo.png" alt="seatunnel logo" height="200px" align="right" />
-seatunnel 是一个`非常易用`,`高性能`、支持`实时流式`和`离线批处理`的`海量数据`处理产品,架构于`Apache Spark` 和 `Apache Flink`之上。
+[![Backend Workflow](https://github.com/apache/incubator-seatunnel/actions/workflows/backend.yml/badge.svg?branch=dev)](https://github.com/apache/incubator-seatunnel/actions/workflows/backend.yml)
+[![Slack](https://img.shields.io/badge/slack-%23seatunnel-4f8eba?logo=slack)](https://join.slack.com/t/apacheseatunnel/shared_invite/zt-123jmewxe-RjB_DW3M3gV~xL91pZ0oVQ)
+[![Twitter Follow](https://img.shields.io/twitter/follow/ASFSeaTunnel.svg?label=Follow&logo=twitter)](https://twitter.com/ASFSeaTunnel)
----
-
-### 如果您没时间看下面内容,请直接进入正题:
-
-请点击进入快速入门:https://interestinglab.github.io/seatunnel-docs-docs/#/zh-cn/v1/quick-start
-
-seatunnel 提供可直接执行的软件包,没有必要自行编译源代码,下载地址:https://github.com/InterestingLab/seatunnel/releases
-
-文档地址:https://interestinglab.github.io/seatunnel-docs-docs/
-
-各种线上应用案例,请见: https://interestinglab.github.io/seatunnel-docs-docs/#/zh-cn/v1/case_study/
-
----
-
-## 为什么我们需要 seatunnel
-
-Databricks 开源的 Apache Spark 对于分布式数据处理来说是一个伟大的进步。我们在使用 Spark 时发现了很多可圈可点之处,同时我们也发现了我们的机会 —— 通过我们的努力让Spark的使用更简单,更高效,并将业界和我们使用Spark的优质经验固化到seatunnel这个产品中,明显减少学习成本,加快分布式数据处理能力在生产环境落地。
-
-除了大大简化分布式数据处理难度外,seatunnel尽所能为您解决可能遇到的问题:
-* 数据丢失与重复
-* 任务堆积与延迟
-* 吞吐量低
-* 应用到生产环境周期长
-* 缺少应用运行状态监控
-
-
-"seatunnel" 的中文是“水滴”,来自中国当代科幻小说作家刘慈欣的《三体》系列,它是三体人制造的宇宙探测器,会反射几乎全部的电磁波,表面绝对光滑,温度处于绝对零度,全部由被强互作用力紧密锁死的质子与中子构成,无坚不摧。在末日之战中,仅一个水滴就摧毁了人类太空武装力量近2千艘战舰。
-
-## seatunnel 使用场景
-
-* 海量数据ETL
-* 海量数据聚合
-* 多源数据处理
-
-## seatunnel 的特性
-
-* 简单易用,灵活配置,无需开发
-* 实时流式处理
-* 高性能
-* 海量数据处理能力
-* 模块化和插件化,易于扩展
-* 支持利用SQL做数据处理和聚合
-* Spark Structured Streaming
-* 支持Spark 2.x
-
-## seatunnel 的工作流程
-
-```
-Input/Source[数据源输入] -> Filter/Transform[数据处理] -> Output/Sink[结果输出]
-```
+[![EN doc](https://img.shields.io/badge/document-English-blue.svg)](README.md)
+[![CN doc](https://img.shields.io/badge/文档-中文版-blue.svg)](README_zh_CN.md)
-![wd-workflow](/doc/image_zh/wd-workflow.png ':size=300%')
+SeaTunnel was formerly named Waterdrop , and renamed SeaTunnel since October 12, 2021.
-多个Filter构建了数据处理的Pipeline,满足各种各样的数据处理需求,如果您熟悉SQL,也可以直接通过SQL构建数据处理的Pipeline,简单高效。目前seatunnel支持的[Filter列表](https://interestinglab.github.io/seatunnel-docs/#/zh-cn/v1/configuration/filter-plugin), 仍然在不断扩充中。您也可以开发自己的数据处理插件,整个系统是易于扩展的。
+SeaTunnel is a very easy-to-use ultra-high-performance distributed data integration platform that supports real-time
+synchronization of massive data. It can synchronize tens of billions of data stably and efficiently every day, and has
+been used in the production of nearly 100 companies.
-## seatunnel 支持的插件
+## Why do we need SeaTunnel
-* Input/Source plugin
+SeaTunnel will do its best to solve the problems that may be encountered in the synchronization of massive data:
-Fake, File, Hdfs, Kafka, S3, Socket, 自行开发的Input plugin
+- Data loss and duplication
+- Task accumulation and delay
+- Low throughput
+- Long cycle to be applied in the production environment
+- Lack of application running status monitoring
-* Filter/Transform plugin
+## SeaTunnel use scenarios
-Add, Checksum, Convert, Date, Drop, Grok, Json, Kv, Lowercase, Remove, Rename, Repartition, Replace, Sample, Split, Sql, Table, Truncate, Uppercase, Uuid, 自行开发的Filter plugin
+- Mass data synchronization
+- Mass data integration
+- ETL with massive data
+- Mass data aggregation
+- Multi-source data processing
-* Output/Sink plugin
+## Features of SeaTunnel
-Elasticsearch, File, Hdfs, Jdbc, Kafka, Mysql, S3, Stdout, 自行开发的Output plugin
+- Easy to use, flexible configuration, low code development
+- Real-time streaming
+- Offline multi-source data analysis
+- High-performance, massive data processing capabilities
+- Modular and plug-in mechanism, easy to extend
+- Support data processing and aggregation by SQL
+- Support Spark structured streaming
+- Support Spark 2.x
-## 环境依赖
+## Workflow of SeaTunnel
-1. java运行环境,java >= 8
+![seatunnel-workflow.svg](https://github.com/apache/incubator-seatunnel-website/blob/main/static/image/seatunnel-workflow.svg)
-2. 如果您要在集群环境中运行seatunnel,那么需要以下Spark集群环境的任意一种:
+Input[Data Source Input] -> Filter[Data Processing] -> Output[Result Output]
-* Spark on Yarn
-* Spark Standalone
-* Spark on Mesos
+The data processing pipeline is constituted by multiple filters to meet a variety of data processing needs. If you are
+accustomed to SQL, you can also directly construct a data processing pipeline by SQL, which is simple and efficient.
+Currently, the filter list supported by SeaTunnel is still being expanded. Furthermore, you can develop your own data
+processing plug-in, because the whole system is easy to expand.
-如果您的数据量较小或者只是做功能验证,也可以仅使用`local`模式启动,无需集群环境,seatunnel支持单机运行。
+## Plugins supported by SeaTunnel
-## 社区分享
+- Input plugin Fake, File, Hdfs, Kafka, Druid, S3, Socket, self-developed Input plugin
-* 2018-09-08 Elasticsearch 社区分享 [seatunnel:构建在Spark之上的简单高效数据处理系统](https://elasticsearch.cn/slides/127#page=1)
+- Filter plugin Add, Checksum, Convert, Date, Drop, Grok, Json, Kv, Lowercase, Remove, Rename, Repartition, Replace,
+ Sample, Split, Sql, Table, Truncate, Uppercase, Uuid, Self-developed Filter plugin
-* 2017-09-22 InterestingLab 内部分享 [seatunnel介绍PPT](http://slides.com/garyelephant/seatunnel/fullscreen?token=GKrQoxJi)
+- Output plugin Elasticsearch, File, Hdfs, Jdbc, Kafka, Druid, Mysql, S3, Stdout, self-developed Output plugin
-## 应用案例
+## Environmental dependency
-* [微博](https://weibo.com), 增值业务部数据平台
+1. java runtime environment, java >= 8
-![微博 Logo](https://img.t.sinajs.cn/t5/style/images/staticlogo/groups3.png?version=f362a1c5be520a15 ':size=200%')
+2. If you want to run SeaTunnel in a cluster environment, any of the following Spark cluster environments is usable:
-微博某业务有数百个实时流式计算任务使用内部定制版seatunnel,以及其子项目[Guardian](https://github.com/InterestingLab/guardian)做seatunnel On Yarn的任务监控。
+- Spark on Yarn
+- Spark Standalone
-* [新浪](http://www.sina.com.cn/), 大数据运维分析平台
+If the data volume is small, or the goal is merely for functional verification, you can also start in local mode without
+a cluster environment, because SeaTunnel supports standalone operation. Note: SeaTunnel 2.0 supports running on Spark
+and Flink.
-![新浪 Logo](/doc/image_zh/sina-logo.png ':size=170%')
+## Downloads
-新浪运维数据分析平台使用seatunnel为新浪新闻,CDN等服务做运维大数据的实时和离线分析,并写入Clickhouse。
+Download address for run-directly software package: https://github.com/apache/incubator-seatunnel/releases
-* [字节跳动](https://bytedance.com/zh),广告数据平台
+## Quick start
-![字节跳动 Logo](/doc/image_zh/bytedance-logo.jpeg ':size=40%')
+Quick start: https://interestinglab.github.io/seatunnel-docs/#/zh-cn/v1/quick-start
-字节跳动使用seatunnel实现了多源数据的关联分析(如Hive和ES的数据源关联查询分析),大大简化了不同数据源之间的分析对比工作,并且节省了大量的Spark程序的学习和开发时间。
+Detailed documentation on SeaTunnel:https://interestinglab.github.io/seatunnel-docs/#/
-* [搜狗](http://agent.e.sogou.com/),搜狗奇点系统
+## Application practice cases
-![搜狗 Logo](/doc/image_zh/sougou-logo.png ':size=40%')
+- Weibo, Value-added Business Department Data Platform
-搜狗奇点系统使用 seatunnel 作为 etl 工具, 帮助建立实时数仓体系
+Weibo business uses an internal customized version of SeaTunnel and its sub-project Guardian for SeaTunnel On Yarn task
+monitoring for hundreds of real-time streaming computing tasks.
-* [趣头条](https://www.qutoutiao.net/),趣头条数据中心
+- Sina, Big Data Operation Analysis Platform
-![趣头条 Logo](/doc/image_zh/qutoutiao-logo.jpg ':size=40%')
+Sina Data Operation Analysis Platform uses SeaTunnel to perform real-time and offline analysis of data operation and
+maintenance for Sina News, CDN and other services, and write it into Clickhouse.
- 趣头条数据中心,使用seatunnel支撑mysql to hive的离线etl任务、实时hive to clickhouse的backfill技术支撑,很好的cover离线、实时大部分任务场景。
+- Sogou, Sogou Qiqian System
-* [一下科技](https://www.yixia.com/), 一直播数据平台
+Sogou Qiqian System takes SeaTunnel as an ETL tool to help establish a real-time data warehouse system.
-![一下科技 Logo](https://imgaliyuncdn.miaopai.com/static20131031/miaopai20140729/new_yixia/static/imgs/logo.png ':size=170%')
+- Qutoutiao, Qutoutiao Data Center
-* 永辉超市子公司-永辉云创,会员电商数据分析平台
+Qutoutiao Data Center uses SeaTunnel to support mysql to hive offline ETL tasks, real-time hive to clickhouse backfill
+technical support, and well covers most offline and real-time tasks needs.
-![永辉云创 Logo](/doc/image_zh/yonghuiyunchuang-logo.png)
+- Yixia Technology, Yizhibo Data Platform
-seatunnel 为永辉云创旗下新零售品牌永辉生活提供电商用户行为数据实时流式与离线SQL计算。
+- Yonghui Superstores Founders' Alliance-Yonghui Yunchuang Technology, Member E-commerce Data Analysis Platform
-* 水滴筹, 数据平台
+SeaTunnel provides real-time streaming and offline SQL computing of e-commerce user behavior data for Yonghui Life, a
+new retail brand of Yonghui Yunchuang Technology.
-![水滴筹 logo](/doc/image_zh/shuidichou-logo.jpg ':size=130%')
+- Shuidichou, Data Platform
-水滴筹在Yarn上使用seatunnel做实时流式以及定时的离线批处理,每天处理3~4T的数据量,最终将数据写入Clickhouse。
+Shuidichou adopts SeaTunnel to do real-time streaming and regular offline batch processing on Yarn, processing 3~4T data
+volume average daily, and later writing the data to Clickhouse.
-* 浙江乐控信息科技有限公司
+- Tencent Cloud
-![浙江乐控信息科技有限公司 logo](/doc/image_zh/zhejiang_lekong_xinxi_keji-logo.jpg ':size=130%')
+Collecting various logs from business services into Apache Kafka, some of the data in Apache Kafka is consumed and extracted through Seatunnel, and then store into Clickhouse.
-Watedrop 为浙江乐控信息科技有限公司旗下乐控智能提供物联网交互数据实时流sql分析(Structured Streaming 引擎)和离线数据分析。每天处理的数据量8千万到一亿条数据 最终数据落地到kafka和mysql数据库。
+For more use cases, please refer to: https://interestinglab.github.io/seatunnel-docs/#/zh-cn/case_study/
-* [上海分蛋信息科技](https://91fd.com),大数据数据分析平台
+## Code of conduct
-![上海分蛋信息科技 logo](/doc/image_zh/fendan-keji-logo.jpeg ':size=70%')
+This project adheres to the Contributor Covenant [code of conduct](https://www.apache.org/foundation/policies/conduct).
+By participating, you are expected to uphold this code. Please follow
+the [REPORTING GUIDELINES](https://www.apache.org/foundation/policies/conduct#reporting-guidelines) to report
+unacceptable behavior.
-分蛋科技使用seatunnel做数据仓库实时同步,近百个Pipeline同步处理;数据流实时统计,数据平台指标离线计算。
+## Developer
-## 贡献观点和代码
+Thanks to all developers!
-提交问题和建议:https://github.com/InterestingLab/seatunnel/issues
+[![](https://opencollective.com/seatunnel/contributors.svg?width=666)](https://github.com/apache/incubator-seatunnel/graphs/contributors)
-贡献代码:https://github.com/InterestingLab/seatunnel/pulls
+## Contact Us
-## 开发者
+* Mail list: **dev@seatunnel.apache.org**. Mail to `dev-subscribe@seatunnel.apache.org`, follow the reply to subscribe the mail list.
+* Slack: https://join.slack.com/t/apacheseatunnel/shared_invite/zt-123jmewxe-RjB_DW3M3gV~xL91pZ0oVQ
+* Twitter: https://twitter.com/ASFSeaTunnel
+* [bilibili](https://space.bilibili.com/1542095008) (for china users)
-感谢[所有开发者](https://github.com/InterestingLab/seatunnel/graphs/contributors)
+## Landscapes
-## 联系我们
-* 邮件列表 : **dev@seatunnel.apache.org**. 发送任意内容至 `dev-subscribe@seatunnel.apache.org`, 按照回复订阅邮件列表。
-* Slack: 发送 `Request to join SeaTunnel slack` 邮件到邮件列表 (`dev@seatunnel.apache.org`), 我们会邀请你加入(在此之前请确认已经注册Slack).
-* [bilibili B站 视频](https://space.bilibili.com/1542095008)
+<p align="center">
+<br/><br/>
+<img src="https://landscape.cncf.io/images/left-logo.svg" width="150" alt=""/> <img src="https://landscape.cncf.io/images/right-logo.svg" width="200" alt=""/>
+<br/><br/>
+SeaTunnel enriches the <a href="https://landscape.cncf.io/landscape=observability-and-analysis&license=apache-license-2-0">CNCF CLOUD NATIVE Landscape.</a >
+</p >
diff --git a/docs/quickstart/_category_.json b/docs/quickstart/_category_.json
deleted file mode 100644
index 029132f..0000000
--- a/docs/quickstart/_category_.json
+++ /dev/null
@@ -1,4 +0,0 @@
-{
- "label": "快速开始",
- "position": 1
-}
\ No newline at end of file
diff --git a/docs/quickstart/quickstart.md b/docs/quickstart/quickstart.md
deleted file mode 100644
index 09d3005..0000000
--- a/docs/quickstart/quickstart.md
+++ /dev/null
@@ -1,135 +0,0 @@
----
-title: 快速开始
-sidebar_position: 2
----
-
-# 快速开始
-
-> 我们以一个通过socket接收数据,将数据分割为多个字段,并输出处理结果的应用为例,快速展示seatunnel的使用方法。
-
-### Step 1: 准备Spark 运行环境
-
-> 如果你熟悉Spark或者已准备好Spark运行环境,可忽略此步骤,Spark不需要做任何特殊配置。
-
-请先[下载Spark](http://spark.apache.org/downloads.html), Spark版本请选择 >= 2.x.x。下载解压后,不需要做任何配置即可提交Spark deploy-mode = local模式的任务。
-如果你期望任务运行在Standalone集群或者Yarn、Mesos集群上,请参考Spark官网的[Spark部署文档](http://spark.apache.org/docs/latest/cluster-overview.html)。
-
-### Step 2: 下载 seatunnel
-
-进入[seatunnel安装包下载页面](https://github.com/InterestingLab/seatunnel/releases/latest),下载最新版`seatunnel-<version>.zip`
-
-或者直接下载指定版本(以1.1.2为例):
-
-```
-wget https://github.com/InterestingLab/seatunnel/releases/download/v1.1.2/seatunnel-1.1.2.zip -O seatunnel-1.1.2.zip
-```
-
-下载后,解压:
-
-```
-unzip seatunnel-<version>.zip
-ln -s seatunnel-<version> seatunnel
-```
-
-### Step 3: 配置 seatunnel
-
-编辑 `config/seatunnel-env.sh`, 指定必须环境配置如SPARK_HOME(Step 1 中Spark下载并解压后的目录)
-
-编辑 `config/application.conf`, 它决定了seatunnel启动后,数据输入,处理,输出的方式和逻辑。
-
-```
-spark {
- # seatunnel defined streaming batch duration in seconds
- spark.streaming.batchDuration = 5
-
- spark.app.name = "seatunnel"
- spark.ui.port = 13000
-}
-
-input {
- socketStream {}
-}
-
-filter {
- split {
- fields = ["msg", "name"]
- delimiter = ","
- }
-}
-
-output {
- stdout {}
-}
-
-```
-
-### Step 4: 启动netcat server用于发送数据
-
-```
-nc -l -p 9999
-```
-
-
-### Step 5: 启动seatunnel
-
-```
-cd seatunnel
-./bin/start-seatunnel.sh --master local[4] --deploy-mode client --config ./config/application.conf
-
-```
-
-### Step 6: 在nc端输入
-
-```
-Hello World, Gary
-```
-seatunnel日志打印出:
-
-```
-+-----------------+-----------+----+
-|raw_message |msg |name|
-+-----------------+-----------+----+
-|Hello World, Gary|Hello World|Gary|
-+-----------------+-----------+----+
-```
-
-
-### 总结
-
-seatunnel简单易用,还有更丰富的数据处理功能等待被发现。本文展示的数据处理案例,
-无需任何代码、编译、打包,比官方的[Quick Example](https://spark.apache.org/docs/latest/streaming-programming-guide.html#a-quick-example)更简单。
-
-
----
-
-如果想了解更多的seatunnel配置示例可参见:
-
-[配置示例1 : Streaming 流式计算](https://github.com/InterestingLab/seatunnel/blob/master/config/streaming.conf.template)
-
-以上配置为默认【流式处理配置模版】,可直接运行,命令如下:
-
-```
-cd seatunnel
-./bin/start-seatunnel.sh --master local[4] --deploy-mode client --config ./config/streaming.conf.template
-
-```
-
-[配置示例2 : Batch 离线批处理](https://github.com/InterestingLab/seatunnel/blob/master/config/batch.conf.template)
-
-以上配置为默认【离线批处理配置模版】,可直接运行,命令如下:
-
-```
-cd seatunnel
-./bin/start-seatunnel.sh --master local[4] --deploy-mode client --config ./config/batch.conf.template
-
-```
-
-[配置示例3 : Structured Streaming 流式处理](https://github.com/InterestingLab/seatunnel/blob/master/config/structuredstreaming.conf.template)
-
-以上配置为默认【Structured Streaming 配置模版】,需配置Kafka输入源后运行,命令如下:
-
-```
-cd seatunnel
-./bin/start-seatunnel-structured-streaming.sh --master local[4] --deploy-mode client --config ./config/batch.conf.template
-
-```
diff --git a/docs/roadmap/_category_.json b/docs/roadmap/_category_.json
deleted file mode 100644
index 4325fa8..0000000
--- a/docs/roadmap/_category_.json
+++ /dev/null
@@ -1,4 +0,0 @@
-{
- "label": "Roadmap",
- "position": 7
-}
\ No newline at end of file
diff --git a/docs/roadmap/roadmap.md b/docs/roadmap/roadmap.md
deleted file mode 100644
index d93012a..0000000
--- a/docs/roadmap/roadmap.md
+++ /dev/null
@@ -1,14 +0,0 @@
-# Roadmap
-
-* 支持离线数据处理
-
-* 支持Apache Flink / Apache Beam
-
-* 支持更丰富的插件: 如国内的ipip.net IP库解析;输出数据到HBase, MongoDB的插件。
-
-* 支持流式机器学习
-
-* 性能优化
-
-* ...
-
diff --git a/docs/usecase/1.md b/docs/usecase/1.md
deleted file mode 100644
index a65427d..0000000
--- a/docs/usecase/1.md
+++ /dev/null
@@ -1,218 +0,0 @@
----
-title: HDFS导出数据到Clickhouse
-sidebar_position: 1
----
-
-![](/doc/image_zh/hdfs2ch.jpg)
-
-ClickHouse是面向OLAP的分布式列式DBMS。我们部门目前已经把所有数据分析相关的日志数据存储至ClickHouse这个优秀的数据仓库之中,当前日数据量达到了300亿。
-
-之前介绍的有关数据处理入库的经验都是基于实时数据流,数据存储在Kafka中,我们使用Java或者Golang将数据从Kafka中读取、解析、清洗之后写入ClickHouse中,这样可以实现数据的快速接入。然而在很多同学的使用场景中,数据都不是实时的,可能需要将HDFS或者是Hive中的数据导入ClickHouse。有的同学通过编写Spark程序来实现数据的导入,那么是否有更简单、高效的方法呢。
-
-目前开源社区上有一款工具**seatunnel**,项目地址[https://github.com/InterestingLab/seatunnel](https://github.com/InterestingLab/seatunnel),可以快速地将HDFS中的数据导入ClickHouse。
-
-## HDFS to ClickHouse
-
-假设我们的日志存储在HDFS中,我们需要将日志进行解析并筛选出我们关心的字段,将对应的字段写入ClickHouse的表中。
-
-### Log Sample
-
-我们在HDFS中存储的日志格式如下, 是很常见的Nginx日志
-
-```
-10.41.1.28 github.com 114.250.140.241 0.001s "127.0.0.1:80" [26/Oct/2018:03:09:32 +0800] "GET /InterestingLab/seatunnel HTTP/1.1" 200 0 "-" - "Dalvik/2.1.0 (Linux; U; Android 7.1.1; OPPO R11 Build/NMF26X)" "196" "-" "mainpage" "443" "-" "172.16.181.129"
-```
-
-### ClickHouse Schema
-
-我们的ClickHouse建表语句如下,我们的表按日进行分区
-
-```
-CREATE TABLE cms.cms_msg
-(
- date Date,
- datetime DateTime,
- url String,
- request_time Float32,
- status String,
- hostname String,
- domain String,
- remote_addr String,
- data_size Int32,
- pool String
-) ENGINE = MergeTree PARTITION BY date ORDER BY date SETTINGS index_granularity = 16384
-```
-
-## seatunnel with ClickHouse
-
-接下来会给大家详细介绍,我们如何通过seatunnel满足上述需求,将HDFS中的数据写入ClickHouse中。
-
-### seatunnel
-
-[seatunnel](https://github.com/InterestingLab/seatunnel)是一个非常易用,高性能,能够应对海量数据的实时数据处理产品,它构建在Spark之上。seatunnel拥有着非常丰富的插件,支持从Kafka、HDFS、Kudu中读取数据,进行各种各样的数据处理,并将结果写入ClickHouse、Elasticsearch或者Kafka中。
-
-### Prerequisites
-
-首先我们需要安装seatunnel,安装十分简单,无需配置系统环境变量
-1. 准备Spark环境
-2. 安装seatunnel
-3. 配置seatunnel
-
-以下是简易步骤,具体安装可以参照[Quick Start](https://interestinglab.github.io/seatunnel-docs/#/zh-cn/v1/quick-start)
-
-```
-cd /usr/local
-wget https://archive.apache.org/dist/spark/spark-2.2.0/spark-2.2.0-bin-hadoop2.7.tgz
-tar -xvf https://archive.apache.org/dist/spark/spark-2.2.0/spark-2.2.0-bin-hadoop2.7.tgz
-wget https://github.com/InterestingLab/seatunnel/releases/download/v1.1.1/seatunnel-1.1.1.zip
-unzip seatunnel-1.1.1.zip
-cd seatunnel-1.1.1
-
-vim config/seatunnel-env.sh
-# 指定Spark安装路径
-SPARK_HOME=${SPARK_HOME:-/usr/local/spark-2.2.0-bin-hadoop2.7}
-```
-
-### seatunnel Pipeline
-
-我们仅需要编写一个seatunnel Pipeline的配置文件即可完成数据的导入。
-
-配置文件包括四个部分,分别是Spark、Input、filter和Output。
-
-#### Spark
-
-
-这一部分是Spark的相关配置,主要配置Spark执行时所需的资源大小。
-```
-spark {
- spark.app.name = "seatunnel"
- spark.executor.instances = 2
- spark.executor.cores = 1
- spark.executor.memory = "1g"
-}
-```
-
-#### Input
-
-这一部分定义数据源,如下是从HDFS文件中读取text格式数据的配置案例。
-
-```
-input {
- hdfs {
- path = "hdfs://nomanode:8020/rowlog/accesslog"
- table_name = "access_log"
- format = "text"
- }
-}
-```
-
-#### Filter
-
-在Filter部分,这里我们配置一系列的转化,包括正则解析将日志进行拆分、时间转换将HTTPDATE转化为ClickHouse支持的日期格式、对Number类型的字段进行类型转换以及通过SQL进行字段筛减等
-```
-filter {
- # 使用正则解析原始日志
- grok {
- source_field = "raw_message"
- pattern = '%{IP:ha_ip}\\s%{NOTSPACE:domain}\\s%{IP:remote_addr}\\s%{NUMBER:request_time}s\\s\"%{DATA:upstream_ip}\"\\s\\[%{HTTPDATE:timestamp}\\]\\s\"%{NOTSPACE:method}\\s%{DATA:url}\\s%{NOTSPACE:http_ver}\"\\s%{NUMBER:status}\\s%{NUMBER:body_bytes_send}\\s%{DATA:referer}\\s%{NOTSPACE:cookie_info}\\s\"%{DATA:user_agent}\"\\s%{DATA:uid}\\s%{DATA:session_id}\\s\"%{DATA:pool}\"\\s\"%{DATA:tag2}\"\\s%{DATA:tag3}\\s%{DATA:tag4}'
- }
- # 将"dd/MMM/yyyy:HH:mm:ss Z"格式的数据转换为
- # "yyyy/MM/dd HH:mm:ss"格式的数据
- date {
- source_field = "timestamp"
- target_field = "datetime"
- source_time_format = "dd/MMM/yyyy:HH:mm:ss Z"
- target_time_format = "yyyy/MM/dd HH:mm:ss"
- }
- # 使用SQL筛选关注的字段,并对字段进行处理
- # 甚至可以通过过滤条件过滤掉不关心的数据
- sql {
- table_name = "access"
- sql = "select substring(date, 1, 10) as date, datetime, hostname, url, http_code, float(request_time), int(data_size), domain from access"
- }
-}
-```
-
-#### Output
-最后我们将处理好的结构化数据写入ClickHouse
-
-```
-output {
- clickhouse {
- host = "your.clickhouse.host:8123"
- database = "seatunnel"
- table = "access_log"
- fields = ["date", "datetime", "hostname", "uri", "http_code", "request_time", "data_size", "domain"]
- username = "username"
- password = "password"
- }
-}
-```
-
-### Running seatunnel
-
-我们将上述四部分配置组合成为我们的配置文件`config/batch.conf`。
-
- vim config/batch.conf
-
-```
-spark {
- spark.app.name = "seatunnel"
- spark.executor.instances = 2
- spark.executor.cores = 1
- spark.executor.memory = "1g"
-}
-input {
- hdfs {
- path = "hdfs://nomanode:8020/rowlog/accesslog"
- table_name = "access_log"
- format = "text"
- }
-}
-filter {
- # 使用正则解析原始日志
- grok {
- source_field = "raw_message"
- pattern = '%{IP:ha_ip}\\s%{NOTSPACE:domain}\\s%{IP:remote_addr}\\s%{NUMBER:request_time}s\\s\"%{DATA:upstream_ip}\"\\s\\[%{HTTPDATE:timestamp}\\]\\s\"%{NOTSPACE:method}\\s%{DATA:url}\\s%{NOTSPACE:http_ver}\"\\s%{NUMBER:status}\\s%{NUMBER:body_bytes_send}\\s%{DATA:referer}\\s%{NOTSPACE:cookie_info}\\s\"%{DATA:user_agent}\"\\s%{DATA:uid}\\s%{DATA:session_id}\\s\"%{DATA:pool}\"\\s\"%{DATA:tag2}\"\\s%{DATA:tag3}\\s%{DATA:tag4}'
- }
- # 将"dd/MMM/yyyy:HH:mm:ss Z"格式的数据转换为
- # "yyyy/MM/dd HH:mm:ss"格式的数据
- date {
- source_field = "timestamp"
- target_field = "datetime"
- source_time_format = "dd/MMM/yyyy:HH:mm:ss Z"
- target_time_format = "yyyy/MM/dd HH:mm:ss"
- }
- # 使用SQL筛选关注的字段,并对字段进行处理
- # 甚至可以通过过滤条件过滤掉不关心的数据
- sql {
- table_name = "access"
- sql = "select substring(date, 1, 10) as date, datetime, hostname, url, http_code, float(request_time), int(data_size), domain from access"
- }
-}
-output {
- clickhouse {
- host = "your.clickhouse.host:8123"
- database = "seatunnel"
- table = "access_log"
- fields = ["date", "datetime", "hostname", "uri", "http_code", "request_time", "data_size", "domain"]
- username = "username"
- password = "password"
- }
-}
-```
-
-执行命令,指定配置文件,运行seatunnel,即可将数据写入ClickHouse。这里我们以本地模式为例。
-
- ./bin/start-seatunnel.sh --config config/batch.conf -e client -m 'local[2]'
-
-
-## Conclusion
-
-在这篇文章中,我们介绍了如何使用seatunnel将HDFS中的Nginx日志文件导入ClickHouse中。仅通过一个配置文件便可快速完成数据的导入,无需编写任何代码。除了支持HDFS数据源之外,seatunnel同样支持将数据从Kafka中实时读取处理写入ClickHouse中。我们的下一篇文章将会介绍,如何将Hive中的数据快速导入ClickHouse中。
-
-当然,seatunnel不仅仅是ClickHouse数据写入的工具,在Elasticsearch以及Kafka等数据源的写入上同样可以扮演相当重要的角色。
-
-希望了解seatunnel和ClickHouse、Elasticsearch、Kafka结合使用的更多功能和案例,可以直接进入项目主页[https://github.com/InterestingLab/seatunnel](https://github.com/InterestingLab/seatunnel)
-
--- Power by [InterestingLab](https://github.com/InterestingLab)
diff --git a/docs/usecase/2.md b/docs/usecase/2.md
deleted file mode 100644
index 45eba6d..0000000
--- a/docs/usecase/2.md
+++ /dev/null
@@ -1,186 +0,0 @@
----
-title: Hive导出数据到Clickhouse
-sidebar_position: 1
----
-
-ClickHouse是面向OLAP的分布式列式DBMS。我们部门目前已经把所有数据分析相关的日志数据存储至ClickHouse这个优秀的数据仓库之中,当前日数据量达到了300亿。
-
-在之前的文章[如何快速地把HDFS中的数据导入ClickHouse](http://www.clickhouse.com.cn/topic/5be26d48b24c202171d54fd6)中我们提到过使用seatunnel——[https://github.com/InterestingLab/seatunnel](https://github.com/InterestingLab/seatunnel)对HDFS中的数据经过很简单的操作就可以将数据写入ClickHouse。HDFS中的数据一般是非结构化的数据,那么针对存储在Hive中的结构化数据,我们应该怎么操作呢?
-
-![](/doc/image_zh/hive-logo.png)
-
-## Hive to ClickHouse
-
-假定我们的数据已经存储在Hive中,我们需要读取Hive表中的数据并筛选出我们关心的字段,或者对字段进行转换,最后将对应的字段写入ClickHouse的表中。
-
-### Hive Schema
-
-我们在Hive中存储的数据表结构如下,存储的是很常见的Nginx日志
-
-```
-CREATE TABLE `nginx_msg_detail`(
- `hostname` string,
- `domain` string,
- `remote_addr` string,
- `request_time` float,
- `datetime` string,
- `url` string,
- `status` int,
- `data_size` int,
- `referer` string,
- `cookie_info` string,
- `user_agent` string,
- `minute` string)
- PARTITIONED BY (
- `date` string,
- `hour` string)
-
-```
-
-### ClickHouse Schema
-
-我们的ClickHouse建表语句如下,我们的表按日进行分区
-
-```
-CREATE TABLE cms.cms_msg
-(
- date Date,
- datetime DateTime,
- url String,
- request_time Float32,
- status String,
- hostname String,
- domain String,
- remote_addr String,
- data_size Int32
-) ENGINE = MergeTree PARTITION BY date ORDER BY (date, hostname) SETTINGS index_granularity = 16384
-```
-
-## seatunnel with ClickHouse
-
-接下来会给大家介绍,我们如何通过seatunnel将Hive中的数据写入ClickHouse中。
-
-### seatunnel
-
-[seatunnel](https://github.com/InterestingLab/seatunnel)是一个非常易用,高性能,能够应对海量数据的实时数据处理产品,它构建在Spark之上。seatunnel拥有着非常丰富的插件,支持从Kafka、HDFS、Kudu中读取数据,进行各种各样的数据处理,并将结果写入ClickHouse、Elasticsearch或者Kafka中。
-
-seatunnel的环境准备以及安装步骤这里就不一一赘述了,具体安装步骤可以参考上一篇文章或者访问[seatunnel Docs](https://interestinglab.github.io/seatunnel-docs/#/)
-
-### seatunnel Pipeline
-
-我们仅需要编写一个seatunnel Pipeline的配置文件即可完成数据的导入。
-
-配置文件包括四个部分,分别是Spark、Input、filter和Output。
-
-#### Spark
-
-
-这一部分是Spark的相关配置,主要配置Spark执行时所需的资源大小。
-```
-spark {
- // 这个配置必需填写
- spark.sql.catalogImplementation = "hive"
- spark.app.name = "seatunnel"
- spark.executor.instances = 2
- spark.executor.cores = 1
- spark.executor.memory = "1g"
-}
-```
-
-#### Input
-
-这一部分定义数据源,如下是从Hive文件中读取text格式数据的配置案例。
-
-```
-input {
- hive {
- pre_sql = "select * from access.nginx_msg_detail"
- table_name = "access_log"
- }
-}
-```
-
-看,很简单的一个配置就可以从Hive中读取数据了。其中`pre_sql`是从Hive中读取数据SQL,`table_name`是将读取后的数据,注册成为Spark中临时表的表名,可为任意字段。
-
-需要注意的是,必须保证hive的metastore是在服务状态。
-
-在Cluster、Client、Local模式下运行时,必须把`hive-site.xml`文件置于提交任务节点的$HADOOP_CONF目录下
-
-#### Filter
-
-在Filter部分,这里我们配置一系列的转化,我们这里把不需要的minute和hour字段丢弃。当然我们也可以在读取Hive的时候通过`pre_sql`不读取这些字段
-
-```
-filter {
- remove {
- source_field = ["minute", "hour"]
- }
-}
-```
-
-#### Output
-最后我们将处理好的结构化数据写入ClickHouse
-
-```
-output {
- clickhouse {
- host = "your.clickhouse.host:8123"
- database = "seatunnel"
- table = "nginx_log"
- fields = ["date", "datetime", "hostname", "url", "http_code", "request_time", "data_size", "domain"]
- username = "username"
- password = "password"
- }
-}
-```
-
-### Running seatunnel
-
-我们将上述四部分配置组合成为我们的配置文件`config/batch.conf`。
-
- vim config/batch.conf
-
-```
-spark {
- spark.app.name = "seatunnel"
- spark.executor.instances = 2
- spark.executor.cores = 1
- spark.executor.memory = "1g"
- // 这个配置必需填写
- spark.sql.catalogImplementation = "hive"
-}
-input {
- hive {
- pre_sql = "select * from access.nginx_msg_detail"
- table_name = "access_log"
- }
-}
-filter {
- remove {
- source_field = ["minute", "hour"]
- }
-}
-output {
- clickhouse {
- host = "your.clickhouse.host:8123"
- database = "seatunnel"
- table = "access_log"
- fields = ["date", "datetime", "hostname", "uri", "http_code", "request_time", "data_size", "domain"]
- username = "username"
- password = "password"
- }
-}
-```
-
-执行命令,指定配置文件,运行seatunnel,即可将数据写入ClickHouse。这里我们以本地模式为例。
-
- ./bin/start-seatunnel.sh --config config/batch.conf -e client -m 'local[2]'
-
-
-## Conclusion
-
-在这篇文章中,我们介绍了如何使用seatunnel将Hive中的数据导入ClickHouse中。仅仅通过一个配置文件便可快速完成数据的导入,无需编写任何代码,十分简单。
-
-希望了解seatunnel与ClickHouse、Elasticsearch、Kafka、Hadoop结合使用的更多功能和案例,可以直接进入项目主页[https://github.com/InterestingLab/seatunnel](https://github.com/InterestingLab/seatunnel)
-
--- Power by [InterestingLab](https://github.com/InterestingLab)
diff --git a/docs/usecase/3.md b/docs/usecase/3.md
deleted file mode 100644
index d268bcf..0000000
--- a/docs/usecase/3.md
+++ /dev/null
@@ -1,231 +0,0 @@
----
-title: 写入数据到Elasticsearch
-sidebar_position: 3
----
-
-说到数据写入Elasticsearch,最先想到的肯定是Logstash。Logstash因为其简单上手、可扩展、可伸缩等优点被广大用户接受。但是尺有所短,寸有所长,Logstash肯定也有它无法适用的应用场景,比如:
-
- * 海量数据ETL
- * 海量数据聚合
- * 多源数据处理
-
-为了满足这些场景,很多同学都会选择Spark,借助Spark算子进行数据处理,最后将处理结果写入Elasticsearch。
-
-我们部门之前利用Spark对Nginx日志进行分析,统计我们的Web服务访问情况,将Nginx日志每分钟聚合一次最后将结果写入Elasticsearch,然后利用Kibana配置实时监控Dashboard。Elasticsearch和Kibana都很方便、实用,但是随着类似需求越来越多,如何快速通过Spark将数据写入Elasticsearch成为了我们的一大问题。
-
-今天给大家推荐一款能够实现数据快速写入的黑科技——[seatunnel](https://github.com/InterestingLab/seatunnel),一个非常易用,高性能,能够应对海量数据的实时数据处理产品,它构建在Spark之上,简单易用,灵活配置,无需开发。
-
-
-![](/doc/image_zh/wd-struct.png)
-
-
-## Kafka to Elasticsearch
-
-和Logstash一样,seatunnel同样支持多种类型的数据输入,这里我们以最常见的Kakfa作为输入源为例,讲解如何使用seatunnel将数据快速写入Elasticsearch
-
-### Log Sample
-
-原始日志格式如下:
-```
-127.0.0.1 elasticsearch.cn 114.250.140.241 0.001s "127.0.0.1:80" [26/Oct/2018:21:54:32 +0800] "GET /article HTTP/1.1" 200 123 "-" - "Dalvik/2.1.0 (Linux; U; Android 7.1.1; OPPO R11 Build/NMF26X)"
-```
-
-### Elasticsearch Document
-
-我们想要统计,一分钟每个域名的访问情况,聚合完的数据有以下字段:
-```
-domain String
-hostname String
-status int
-datetime String
-count int
-```
-
-## seatunnel with Elasticsearch
-
-接下来会给大家详细介绍,我们如何通过seatunnel读取Kafka中的数据,对数据进行解析以及聚合,最后将处理结果写入Elasticsearch中。
-
-### seatunnel
-
-[seatunnel](https://github.com/InterestingLab/seatunnel)同样拥有着非常丰富的插件,支持从Kafka、HDFS、Hive中读取数据,进行各种各样的数据处理,并将结果写入Elasticsearch、Kudu或者Kafka中。
-
-### Prerequisites
-
-首先我们需要安装seatunnel,安装十分简单,无需配置系统环境变量
-1. 准备Spark环境
-2. 安装seatunnel
-3. 配置seatunnel
-
-以下是简易步骤,具体安装可以参照[Quick Start](https://interestinglab.github.io/seatunnel-docs/#/zh-cn/v1/quick-start)
-
-```yaml
-cd /usr/local
-wget https://archive.apache.org/dist/spark/spark-2.2.0/spark-2.2.0-bin-hadoop2.7.tgz
-tar -xvf https://archive.apache.org/dist/spark/spark-2.2.0/spark-2.2.0-bin-hadoop2.7.tgz
-wget https://github.com/InterestingLab/seatunnel/releases/download/v1.1.1/seatunnel-1.1.1.zip
-unzip seatunnel-1.1.1.zip
-cd seatunnel-1.1.1
-
-vim config/seatunnel-env.sh
-# 指定Spark安装路径
-SPARK_HOME=${SPARK_HOME:-/usr/local/spark-2.2.0-bin-hadoop2.7}
-```
-
-### seatunnel Pipeline
-
-与Logstash一样,我们仅需要编写一个seatunnel Pipeline的配置文件即可完成数据的导入,相信了解Logstash的朋友可以很快入手seatunnel配置。
-
-配置文件包括四个部分,分别是Spark、Input、filter和Output。
-
-#### Spark
-
-
-这一部分是Spark的相关配置,主要配置Spark执行时所需的资源大小。
-```
-spark {
- spark.app.name = "seatunnel"
- spark.executor.instances = 2
- spark.executor.cores = 1
- spark.executor.memory = "1g"
- spark.streaming.batchDuration = 5
-}
-```
-
-#### Input
-
-这一部分定义数据源,如下是从Kafka中读取数据的配置案例,
-
-```
-kafkaStream {
- topics = "seatunnel-es"
- consumer.bootstrap.servers = "localhost:9092"
- consumer.group.id = "seatunnel_es_group"
- consumer.rebalance.max.retries = 100
-}
-```
-
-#### Filter
-
-在Filter部分,这里我们配置一系列的转化,包括正则解析将日志进行拆分、时间转换将HTTPDATE转化为Elasticsearch支持的日期格式、对Number类型的字段进行类型转换以及通过SQL进行数据聚合
-```yaml
-filter {
- # 使用正则解析原始日志
- # 最开始数据都在raw_message字段中
- grok {
- source_field = "raw_message"
- pattern = '%{NOTSPACE:hostname}\\s%{NOTSPACE:domain}\\s%{IP:remote_addr}\\s%{NUMBER:request_time}s\\s\"%{DATA:upstream_ip}\"\\s\\[%{HTTPDATE:timestamp}\\]\\s\"%{NOTSPACE:method}\\s%{DATA:url}\\s%{NOTSPACE:http_ver}\"\\s%{NUMBER:status}\\s%{NUMBER:body_bytes_send}\\s%{DATA:referer}\\s%{NOTSPACE:cookie_info}\\s\"%{DATA:user_agent}'
- }
- # 将"dd/MMM/yyyy:HH:mm:ss Z"格式的数据转换为
- # Elasticsearch中支持的格式
- date {
- source_field = "timestamp"
- target_field = "datetime"
- source_time_format = "dd/MMM/yyyy:HH:mm:ss Z"
- target_time_format = "yyyy-MM-dd'T'HH:mm:ss.SSS+08:00"
- }
- ## 利用SQL对数据进行聚合
- sql {
- table_name = "access_log"
- sql = "select domain, hostname, int(status), datetime, count(*) from access_log group by domain, hostname, status, datetime"
- }
- }
-```
-
-#### Output
-最后我们将处理好的结构化数据写入Elasticsearch。
-
-```yaml
-output {
- elasticsearch {
- hosts = ["localhost:9200"]
- index = "seatunnel-${now}"
- es.batch.size.entries = 100000
- index_time_format = "yyyy.MM.dd"
- }
-}
-```
-
-### Running seatunnel
-
-我们将上述四部分配置组合成为我们的配置文件`config/batch.conf`。
-
- vim config/batch.conf
-
-```
-spark {
- spark.app.name = "seatunnel"
- spark.executor.instances = 2
- spark.executor.cores = 1
- spark.executor.memory = "1g"
- spark.streaming.batchDuration = 5
-}
-input {
- kafkaStream {
- topics = "seatunnel-es"
- consumer.bootstrap.servers = "localhost:9092"
- consumer.group.id = "seatunnel_es_group"
- consumer.rebalance.max.retries = 100
- }
-}
-filter {
- # 使用正则解析原始日志
- # 最开始数据都在raw_message字段中
- grok {
- source_field = "raw_message"
- pattern = '%{IP:hostname}\\s%{NOTSPACE:domain}\\s%{IP:remote_addr}\\s%{NUMBER:request_time}s\\s\"%{DATA:upstream_ip}\"\\s\\[%{HTTPDATE:timestamp}\\]\\s\"%{NOTSPACE:method}\\s%{DATA:url}\\s%{NOTSPACE:http_ver}\"\\s%{NUMBER:status}\\s%{NUMBER:body_bytes_send}\\s%{DATA:referer}\\s%{NOTSPACE:cookie_info}\\s\"%{DATA:user_agent}'
- }
- # 将"dd/MMM/yyyy:HH:mm:ss Z"格式的数据转换为
- # Elasticsearch中支持的格式
- date {
- source_field = "timestamp"
- target_field = "datetime"
- source_time_format = "dd/MMM/yyyy:HH:mm:ss Z"
- target_time_format = "yyyy-MM-dd'T'HH:mm:00.SSS+08:00"
- }
- ## 利用SQL对数据进行聚合
- sql {
- table_name = "access_log"
- sql = "select domain, hostname, status, datetime, count(*) from access_log group by domain, hostname, status, datetime"
- }
- }
-output {
- elasticsearch {
- hosts = ["localhost:9200"]
- index = "seatunnel-${now}"
- es.batch.size.entries = 100000
- index_time_format = "yyyy.MM.dd"
- }
-}
-```
-
-执行命令,指定配置文件,运行seatunnel,即可将数据写入Elasticsearch。这里我们以本地模式为例。
-
- ./bin/start-seatunnel.sh --config config/batch.conf -e client -m 'local[2]'
-
-最后,写入Elasticsearch中的数据如下,再配上Kibana就可以实现Web服务的实时监控了^_^.
-
-```
-"_source": {
- "domain": "elasticsearch.cn",
- "hostname": "localhost",
- "status": "200",
- "datetime": "2018-11-26T21:54:00.000+08:00",
- "count": 26
- }
-```
-
-## Conclusion
-
-在这篇文章中,我们介绍了如何通过seatunnel将Kafka中的数据写入Elasticsearch中。仅仅通过一个配置文件便可快速运行一个Spark Application,完成数据的处理、写入,无需编写任何代码,十分简单。
-
-当数据处理过程中有遇到Logstash无法支持的场景或者Logstah性能无法达到预期的情况下,都可以尝试使用seatunnel解决问题。
-
-希望了解seatunnel与Elasticsearch、Kafka、Hadoop结合使用的更多功能和案例,可以直接进入项目主页[https://github.com/InterestingLab/seatunnel](https://github.com/InterestingLab/seatunnel)
-
-
-**我们近期会再发布一篇《如何用Spark和Elasticsearch做交互式数据分析》,敬请期待.**
-
-## Contract us
-* 邮件列表 : **dev@seatunnel.apache.org**. 发送任意内容至 `dev-subscribe@seatunnel.apache.org`, 按照回复订阅邮件列表。
-* Slack: 发送 `Request to join SeaTunnel slack` 邮件到邮件列表 (`dev@seatunnel.apache.org`), 我们会邀请你加入(在此之前请确认已经注册Slack).
-* [bilibili B站 视频](https://space.bilibili.com/1542095008)
diff --git a/docs/usecase/4.md b/docs/usecase/4.md
deleted file mode 100644
index fd53898..0000000
--- a/docs/usecase/4.md
+++ /dev/null
@@ -1,259 +0,0 @@
-# 怎么用Spark在TiDB上做OLAP分析
-
-![](https://download.pingcap.com/images/tidb-planet.jpg)
-
-[TiDB](https://github.com/pingcap/tidb) 是一款定位于在线事务处理/在线分析处理的融合型数据库产品,实现了一键水平伸缩,强一致性的多副本数据安全,分布式事务,实时 OLAP 等重要特性。
-
-TiSpark 是 PingCAP 为解决用户复杂 OLAP 需求而推出的产品。它借助 Spark 平台,同时融合 TiKV 分布式集群的优势。
-
-直接使用 TiSpark 完成 OLAP 操作需要了解 Spark,还需要一些开发工作。那么,有没有一些开箱即用的工具能帮我们更快速地使用 TiSpark 在 TiDB 上完成 OLAP 分析呢?
-
-目前开源社区上有一款工具 **seatunnel**,项目地址 [https://github.com/InterestingLab/seatunnel](https://github.com/InterestingLab/seatunnel) ,可以基于Spark,在 TiSpark 的基础上快速实现 TiDB 数据读取和 OLAP 分析。
-
-
-## 使用seatunnel操作TiDB
-
-在我们线上有这么一个需求,从 TiDB 中读取某一天的网站访问数据,统计每个域名以及服务返回状态码的访问次数,最后将统计结果写入 TiDB 另外一个表中。 我们来看看seatunnel是如何实现这么一个功能的。
-
-### seatunnel
-
-[seatunnel](https://github.com/InterestingLab/seatunnel) 是一个非常易用,高性能,能够应对海量数据的实时数据处理产品,它构建在 Spark 之上。seatunnel 拥有着非常丰富的插件,支持从 TiDB、Kafka、HDFS、Kudu 中读取数据,进行各种各样的数据处理,然后将结果写入 TiDB、ClickHouse、Elasticsearch 或者 Kafka 中。
-
-
-#### 准备工作
-
-##### 1. TiDB 表结构介绍
-
-**Input**(存储访问日志的表)
-
-```
-CREATE TABLE access_log (
- domain VARCHAR(255),
- datetime VARCHAR(63),
- remote_addr VARCHAR(63),
- http_ver VARCHAR(15),
- body_bytes_send INT,
- status INT,
- request_time FLOAT,
- url TEXT
-)
-```
-
-```
-+-----------------+--------------+------+------+---------+-------+
-| Field | Type | Null | Key | Default | Extra |
-+-----------------+--------------+------+------+---------+-------+
-| domain | varchar(255) | YES | | NULL | |
-| datetime | varchar(63) | YES | | NULL | |
-| remote_addr | varchar(63) | YES | | NULL | |
-| http_ver | varchar(15) | YES | | NULL | |
-| body_bytes_send | int(11) | YES | | NULL | |
-| status | int(11) | YES | | NULL | |
-| request_time | float | YES | | NULL | |
-| url | text | YES | | NULL | |
-+-----------------+--------------+------+------+---------+-------+
-```
-
-**Output**(存储结果数据的表)
-
-```
-CREATE TABLE access_collect (
- date VARCHAR(23),
- domain VARCHAR(63),
- status INT,
- hit INT
-)
-```
-
-```
-+--------+-------------+------+------+---------+-------+
-| Field | Type | Null | Key | Default | Extra |
-+--------+-------------+------+------+---------+-------+
-| date | varchar(23) | YES | | NULL | |
-| domain | varchar(63) | YES | | NULL | |
-| status | int(11) | YES | | NULL | |
-| hit | int(11) | YES | | NULL | |
-+--------+-------------+------+------+---------+-------+
-```
-
-##### 2. 安装 seatunnel
-
-有了 TiDB 输入和输出表之后, 我们需要安装 seatunnel,安装十分简单,无需配置系统环境变量
-1. 准备 Spark环境
-2. 安装 seatunnel
-3. 配置 seatunnel
-
-以下是简易步骤,具体安装可以参照[Quick Start](https://interestinglab.github.io/seatunnel-docs/#/zh-cn/v1/quick-start)
-
-```
-# 下载安装Spark
-cd /usr/local
-wget https://archive.apache.org/dist/spark/spark-2.1.0/spark-2.1.0-bin-hadoop2.7.tgz
-tar -xvf https://archive.apache.org/dist/spark/spark-2.1.0/spark-2.1.0-bin-hadoop2.7.tgz
-wget
-# 下载安装seatunnel
-https://github.com/InterestingLab/seatunnel/releases/download/v1.2.0/seatunnel-1.2.0.zip
-unzip seatunnel-1.2.0.zip
-cd seatunnel-1.2.0
-
-vim config/seatunnel-env.sh
-# 指定Spark安装路径
-SPARK_HOME=${SPARK_HOME:-/usr/local/spark-2.1.0-bin-hadoop2.7}
-```
-
-
-### 实现 seatunnel 处理流程
-
-我们仅需要编写一个 seatunnel 配置文件即可完成数据的读取、处理、写入。
-
-seatunnel 配置文件由四个部分组成,分别是 `Spark`、`Input`、`Filter` 和 `Output`。`Input` 部分用于指定数据的输入源,`Filter` 部分用于定义各种各样的数据处理、聚合,`Output` 部分负责将处理之后的数据写入指定的数据库或者消息队列。
-
-整个处理流程为 `Input` -> `Filter` -> `Output`,整个流程组成了 seatunnel 的 处理流程(Pipeline)。
-
-> 以下是一个具体配置,此配置来源于线上实际应用,但是为了演示有所简化。
-
-
-##### Input (TiDB)
-
-这里部分配置定义输入源,如下是从 TiDB 一张表中读取数据。
-
- input {
- tidb {
- database = "nginx"
- pre_sql = "select * from nginx.access_log"
- table_name = "spark_nginx_input"
- }
- }
-
-##### Filter
-
-在Filter部分,这里我们配置一系列的转化, 大部分数据分析的需求,都是在Filter完成的。seatunnel 提供了丰富的插件,足以满足各种数据分析需求。这里我们通过 SQL 插件完成数据的聚合操作。
-
- filter {
- sql {
- table_name = "spark_nginx_log"
- sql = "select count(*) as hit, domain, status, substring(datetime, 1, 10) as date from spark_nginx_log where substring(datetime, 1, 10)='2019-01-20' group by domain, status, substring(datetime, 1, 10)"
- }
- }
-
-
-##### Output (TiDB)
-
-最后, 我们将处理后的结果写入TiDB另外一张表中。TiDB Output是通过JDBC实现的
-
- output {
- tidb {
- url = "jdbc:mysql://127.0.0.1:4000/nginx?useUnicode=true&characterEncoding=utf8"
- table = "access_collect"
- user = "username"
- password = "password"
- save_mode = "append"
- }
- }
-
-##### Spark
-
-这一部分是 Spark 的相关配置,主要配置 Spark 执行时所需的资源大小以及其他 Spark 配置。
-
-我们的 TiDB Input 插件是基于 TiSpark 实现的,而 TiSpark 依赖于 TiKV 集群和 Placement Driver (PD)。因此我们需要指定 PD 节点信息以及 TiSpark 相关配置`spark.tispark.pd.addresses`和`spark.sql.extensions`。
-
-
- spark {
- spark.app.name = "seatunnel-tidb"
- spark.executor.instances = 2
- spark.executor.cores = 1
- spark.executor.memory = "1g"
- # Set for TiSpark
- spark.tispark.pd.addresses = "localhost:2379"
- spark.sql.extensions = "org.apache.spark.sql.TiExtensions"
- }
-
-
-#### 运行 seatunnel
-
-我们将上述四部分配置组合成我们最终的配置文件`conf/tidb.conf`
-
-```
-spark {
- spark.app.name = "seatunnel-tidb"
- spark.executor.instances = 2
- spark.executor.cores = 1
- spark.executor.memory = "1g"
- # Set for TiSpark
- spark.tispark.pd.addresses = "localhost:2379"
- spark.sql.extensions = "org.apache.spark.sql.TiExtensions"
-}
-input {
- tidb {
- database = "nginx"
- pre_sql = "select * from nginx.access_log"
- table_name = "spark_table"
- }
-}
-filter {
- sql {
- table_name = "spark_nginx_log"
- sql = "select count(*) as hit, domain, status, substring(datetime, 1, 10) as date from spark_nginx_log where substring(datetime, 1, 10)='2019-01-20' group by domain, status, substring(datetime, 1, 10)"
- }
-}
-output {
- tidb {
- url = "jdbc:mysql://127.0.0.1:4000/nginx?useUnicode=true&characterEncoding=utf8"
- table = "access_collect"
- user = "username"
- password = "password"
- save_mode = "append"
- }
-}
-```
-
-执行命令,指定配置文件,运行 seatunnel ,即可实现我们的数据处理逻辑。
-
-* Local
-
-> ./bin/start-seatunnel.sh --config config/tidb.conf --deploy-mode client --master 'local[2]'
-
-* yarn-client
-
-> ./bin/start-seatunnel.sh --config config/tidb.conf --deploy-mode client --master yarn
-
-* yarn-cluster
-
-> ./bin/start-seatunnel.sh --config config/tidb.conf --deploy-mode cluster -master yarn
-
-如果是本机测试验证逻辑,用本地模式(Local)就可以了,一般生产环境下,都是使用`yarn-client`或者`yarn-cluster`模式。
-
-#### 检查结果
-
-```
-mysql> select * from access_collect;
-+------------+--------+--------+------+
-| date | domain | status | hit |
-+------------+--------+--------+------+
-| 2019-01-20 | b.com | 200 | 63 |
-| 2019-01-20 | a.com | 200 | 85 |
-+------------+--------+--------+------+
-2 rows in set (0.21 sec)
-```
-
-
-
-## 总结
-
-在这篇文章中,我们介绍了如何使用 seatunnel 从 TiDB 中读取数据,做简单的数据处理之后写入 TiDB 另外一个表中。仅通过一个配置文件便可快速完成数据的导入,无需编写任何代码。
-
-除了支持 TiDB 数据源之外,seatunnel 同样支持Elasticsearch, Kafka, Kudu, ClickHouse等数据源。
-
-**于此同时,我们正在研发一个重要功能,就是在 seatunnel 中,利用 TiDB 的事务特性,实现从 Kafka 到 TiDB 流式数据处理,并且支持端(Kafka)到端(TiDB)的 Exactly-Once 数据一致性。**
-
-希望了解 seatunnel 和 TiDB,ClickHouse、Elasticsearch、Kafka结合使用的更多功能和案例,可以直接进入项目主页 [https://github.com/InterestingLab/seatunnel](https://github.com/InterestingLab/seatunnel)或者联系项目负责人:
-
-## 联系我们
-* 邮件列表 : **dev@seatunnel.apache.org**. 发送任意内容至 `dev-subscribe@seatunnel.apache.org`, 按照回复订阅邮件列表。
-* Slack: 发送 `Request to join SeaTunnel slack` 邮件到邮件列表 (`dev@seatunnel.apache.org`), 我们会邀请你加入(在此之前请确认已经注册Slack).
-* [bilibili B站 视频](https://space.bilibili.com/1542095008)
-
-
-
--- Power by [InterestingLab](https://github.com/InterestingLab)
-
diff --git a/docs/usecase/5.md b/docs/usecase/5.md
deleted file mode 100644
index ce765c3..0000000
--- a/docs/usecase/5.md
+++ /dev/null
@@ -1,280 +0,0 @@
-# seatunnel最近支持的StructuredStreaming怎么用?
-
-### 前言
-
-StructuredStreaming是Spark 2.0以后新开放的一个模块,相比SparkStreaming,它有一些比较突出的优点:<br/>   一、它能做到更低的延迟;<br/>
-  二、可以做实时的聚合,例如实时计算每天每个商品的销售总额;<br/>
-  三、可以做流与流之间的关联,例如计算广告的点击率,需要将广告的曝光记录和点击记录关联。<br/>
-以上几点如果使用SparkStreaming来实现可能会比较麻烦或者说是很难实现,但是使用StructuredStreaming实现起来会比较轻松。
-### 如何使用StructuredStreaming
-可能你没有详细研究过StructuredStreaming,但是发现StructuredStreaming能很好的解决你的需求,如何快速利用StructuredStreaming来解决你的需求?目前社区有一款工具**seatunnel**,项目地址:https://github.com/InterestingLab/seatunnel ,
-可以高效低成本的帮助你利用StructuredStreaming来完成你的需求。
-
-### seatunnel
-
-seatunnel是一个非常易用,高性能,能够应对海量数据的实时数据处理产品,它构建在Spark之上。seatunnel拥有着非常丰富的插件,支持从Kafka、HDFS、Kudu中读取数据,进行各种各样的数据处理,并将结果写入ClickHouse、Elasticsearch或者Kafka中
-
-### 准备工作
-
-首先我们需要安装seatunnel,安装十分简单,无需配置系统环境变量
-
-1. 准备Spark环境
-2. 安装seatunnel
-3. 配置seatunnel
-以下是简易步骤,具体安装可以参照[Quick Start](https://interestinglab.github.io/seatunnel-docs/#/zh-cn/v1/quick-start)
-
-```
-cd /usr/local
-wget https://archive.apache.org/dist/spark/spark-2.2.0/spark-2.2.0-bin-hadoop2.7.tgz
-tar -xvf https://archive.apache.org/dist/spark/spark-2.2.0/spark-2.2.0-bin-hadoop2.7.tgz
-wget https://github.com/InterestingLab/seatunnel/releases/download/v1.3.0/seatunnel-1.3.0.zip
-unzip seatunnel-1.3.0.zip
-cd seatunnel-1.3.0
-
-vim config/seatunnel-env.sh
-# 指定Spark安装路径
-SPARK_HOME=${SPARK_HOME:-/usr/local/spark-2.2.0-bin-hadoop2.7}
-```
-
-### seatunnel Pipeline
-
-我们仅需要编写一个seatunnel Pipeline的配置文件即可完成数据的导入。
-
-配置文件包括四个部分,分别是Spark、Input、filter和Output。
-
-#### Spark
-
-这一部分是Spark的相关配置,主要配置Spark执行时所需的资源大小。
-
-```
-spark {
- spark.app.name = "seatunnel"
- spark.executor.instances = 2
- spark.executor.cores = 1
- spark.executor.memory = "1g"
-}
-```
-
-#### Input
-
-下面是一个从kafka读取数据的例子
-
-```
-kafkaStream {
- topics = "seatunnel"
- consumer.bootstrap.servers = "localhost:9092"
- schema = "{\"name\":\"string\",\"age\":\"integer\",\"addrs\":{\"country\":\"string\",\"city\":\"string\"}}"
-}
-```
-
-通过上面的配置就可以读取kafka里的数据了 ,topics是要订阅的kafka的topic,同时订阅多个topic可以以逗号隔开,consumer.bootstrap.servers就是Kafka的服务器列表,schema是可选项,因为StructuredStreaming从kafka读取到的值(官方固定字段value)是binary类型的,详见http://spark.apache.org/docs/latest/structured-streaming-kafka-integration.html
-但是如果你确定你kafka里的数据是json字符串的话,你可以指定schema,input插件将按照你指定的schema解析
-
-#### Filter
-
-下面是一个简单的filter例子
-
-```
-filter{
- sql{
- table_name = "student"
- sql = "select name,age from student"
- }
-}
-```
-`table_name`是注册成的临时表名,以便于在下面的sql使用
-
-#### Output
-
-处理好的数据往外输出,假设我们的输出也是kafka
-
-```
-output{
- kafka {
- topic = "seatunnel"
- producer.bootstrap.servers = "localhost:9092"
- streaming_output_mode = "update"
- checkpointLocation = "/your/path"
- }
-}
-```
-
-`topic` 是你要输出的topic,` producer.bootstrap.servers`是kafka集群列表,`streaming_output_mode`是StructuredStreaming的一个输出模式参数,有三种类型`append|update|complete`,具体使用参见文档http://spark.apache.org/docs/latest/structured-streaming-programming-guide.html#output-modes
-
-`checkpointLocation`是StructuredStreaming的checkpoint路径,如果配置了的话,这个目录会存储程序的运行信息,比如程序退出再启动的话会接着上次的offset进行消费。
-
-### 场景分析
-
-以上就是一个简单的例子,接下来我们就来介绍的稍微复杂一些的业务场景
-
-#### 场景一:实时聚合场景
-
-假设现在有一个商城,上面有10种商品,现在需要实时求每天每种商品的销售额,甚至是求每种商品的购买人数(不要求十分精确)。
-这么做的巨大的优势就是海量数据可以在实时处理的时候,完成聚合,再也不需要先将数据写入数据仓库,再跑离线的定时任务进行聚合,
-操作起来还是很方便的。
-
-kafka的数据如下
-
-```
-{"good_id":"abc","price":300,"user_id":123456,"time":1553216320}
-```
-
-那我们该怎么利用seatunnel来完成这个需求呢,当然还是只需要配置就好了。
-
-```
-#spark里的配置根据业务需求配置
-spark {
- spark.app.name = "seatunnel"
- spark.executor.instances = 2
- spark.executor.cores = 1
- spark.executor.memory = "1g"
-}
-
-#配置input
-input {
- kafkaStream {
- topics = "good_topic"
- consumer.bootstrap.servers = "localhost:9092"
- schema = "{\"good_id\":\"string\",\"price\":\"integer\",\"user_id\":\"Long\",\"time\":\"Long\"}"
- }
-}
-
-#配置filter
-filter {
-
- #在程序做聚合的时候,内部会去存储程序从启动开始的聚合状态,久而久之会导致OOM,如果设置了watermark,程序自动的会去清理watermark之外的状态
- #这里表示使用ts字段设置watermark,界限为1天
-
- Watermark {
- time_field = "time"
- time_type = "UNIX" #UNIX表示时间字段为10为的时间戳,还有其他的类型详细可以查看插件文档
- time_pattern = "yyyy-MM-dd" #这里之所以要把ts对其到天是因为求每天的销售额,如果是求每小时的销售额可以对其到小时`yyyy-MM-dd HH`
- delay_threshold = "1 day"
- watermark_field = "ts" #设置watermark之后会新增一个字段,`ts`就是这个字段的名字
- }
-
- #之所以要group by ts是要让watermark生效,approx_count_distinct是一个估值,并不是精确的count_distinct
- sql {
- table_name = "good_table_2"
- sql = "select good_id,sum(price) total, approx_count_distinct(user_id) person from good_table_2 group by ts,good_id"
- }
-}
-
-#接下来我们选择将结果实时输出到Kafka
-output{
- kafka {
- topic = "seatunnel"
- producer.bootstrap.servers = "localhost:9092"
- streaming_output_mode = "update"
- checkpointLocation = "/your/path"
- }
-}
-
-```
-如上配置完成,启动seatunnel,就可以获取你想要的结果了。
-
-#### 场景二:多个流关联场景
-
-假设你在某个平台投放了广告,现在要实时计算出每个广告的CTR(点击率),数据分别来自两个topic,一个是广告曝光日志,一个是广告点击日志,
-此时我们就需要把两个流数据关联到一起做计算,而seatunnel 最近也支持了此功能,让我们一起看一下该怎么做:
-
-
-点击topic数据格式
-
-```
-{"ad_id":"abc","click_time":1553216320,"user_id":12345}
-
-```
-
-曝光topic数据格式
-
-```
-{"ad_id":"abc","show_time":1553216220,"user_id":12345}
-
-```
-
-```
-#spark里的配置根据业务需求配置
-spark {
- spark.app.name = "seatunnel"
- spark.executor.instances = 2
- spark.executor.cores = 1
- spark.executor.memory = "1g"
-}
-
-#配置input
-input {
-
- kafkaStream {
- topics = "click_topic"
- consumer.bootstrap.servers = "localhost:9092"
- schema = "{\"ad_id\":\"string\",\"user_id\":\"Long\",\"click_time\":\"Long\"}"
- table_name = "click_table"
- }
-
- kafkaStream {
- topics = "show_topic"
- consumer.bootstrap.servers = "localhost:9092"
- schema = "{\"ad_id\":\"string\",\"user_id\":\"Long\",\"show_time\":\"Long\"}"
- table_name = "show_table"
- }
-}
-
-filter {
-
- #左关联右表必须设置watermark
- #右关左右表必须设置watermark
- #http://spark.apache.org/docs/latest/structured-streaming-programming-guide.html#inner-joins-with-optional-watermarking
- Watermark {
- source_table_name = "click_table" #这里可以指定为某个临时表添加watermark,不指定的话就是为input中的第一个
- time_field = "time"
- time_type = "UNIX"
- delay_threshold = "3 hours"
- watermark_field = "ts"
- result_table_name = "click_table_watermark" #添加完watermark之后可以注册成临时表,方便后续在sql中使用
- }
-
- Watermark {
- source_table_name = "show_table"
- time_field = "time"
- time_type = "UNIX"
- delay_threshold = "2 hours"
- watermark_field = "ts"
- result_table_name = "show_table_watermark"
- }
-
-
- sql {
- table_name = "show_table_watermark"
- sql = "select a.ad_id,count(b.user_id)/count(a.user_id) ctr from show_table_watermark as a left join click_table_watermark as b on a.ad_id = b.ad_id and a.user_id = b.user_id "
- }
-
-}
-
-#接下来我们选择将结果实时输出到Kafka
-output {
- kafka {
- topic = "seatunnel"
- producer.bootstrap.servers = "localhost:9092"
- streaming_output_mode = "append" #流关联只支持append模式
- checkpointLocation = "/your/path"
- }
-}
-```
-
-通过配置,到这里流关联的案例也完成了。
-
-### 结语
-通过配置能很快的利用StructuredStreaming做实时数据处理,但是还是需要对StructuredStreaming的一些概念了解,比如其中的watermark机制,还有程序的输出模式。
-
-最后,seatunnel当然还支持spark streaming和spark 批处理。
-如果你对这两个也感兴趣的话,可以阅读我们以前发布的文章《[如何快速地将Hive中的数据导入ClickHouse](https://interestinglab.github.io/seatunnel-docs/#/zh-cn/v1/case_study/2)》、
-《[优秀的数据工程师,怎么用Spark在TiDB上做OLAP分析](https://interestinglab.github.io/seatunnel-docs/#/zh-cn/v1/case_study/4)》、
-《[如何使用Spark快速将数据写入Elasticsearch](https://interestinglab.github.io/seatunnel-docs/#/zh-cn/v1/case_study/3)》
-
-希望了解 seatunnel 和 HBase, ClickHouse、Elasticsearch、Kafka、MySQL 等数据源结合使用的更多功能和案例,可以直接进入项目主页 [https://github.com/InterestingLab/seatunnel](https://github.com/InterestingLab/seatunnel)或者联系项目负责人:
-
-## 联系我们
-* 邮件列表 : **dev@seatunnel.apache.org**. 发送任意内容至 `dev-subscribe@seatunnel.apache.org`, 按照回复订阅邮件列表。
-* Slack: 发送 `Request to join SeaTunnel slack` 邮件到邮件列表 (`dev@seatunnel.apache.org`), 我们会邀请你加入(在此之前请确认已经注册Slack).
-* [bilibili B站 视频](https://space.bilibili.com/1542095008)
diff --git a/docs/usecase/README.md b/docs/usecase/README.md
deleted file mode 100644
index a07d839..0000000
--- a/docs/usecase/README.md
+++ /dev/null
@@ -1,24 +0,0 @@
-### 行业应用案例
-
-* [如何快速地把HDFS中的数据导入Clickhouse](usecase/1.md)
-* [如何快速地将Hive中的数据导入ClickHouse](usecase/2.md)
-* [如何使用Spark快速将数据写入Elasticsearch](usecase/3.md)
-* [优秀的数据工程师,怎么用Spark在TiDB上做OLAP分析](usecase/4.md)
-* [seatunnel中StructuredStreaming怎么用](usecase/5.md)
-
-### 使用seatunnel的公司
-
-* [微博](https://weibo.com), 增值业务部数据平台
-
-![微博Logo](https://img.t.sinajs.cn/t5/style/images/staticlogo/groups3.png?version=f362a1c5be520a15)
-
-* [新浪](http://www.sina.com.cn/), 大数据运维分析平台
-
-![新浪Logo](http://n.sinaimg.cn/tech/ir/imges/logo.png)
-
-* [一下科技](https://www.yixia.com/), 一直播数据平台
-
-![一下科技Logo](https://imgaliyuncdn.miaopai.com/static20131031/miaopai20140729/new_yixia/static/imgs/logo.png)
-
-* 其他公司 ... 期待您的加入
-
diff --git a/docs/usecase/_category_.json b/docs/usecase/_category_.json
deleted file mode 100644
index 3bc79db..0000000
--- a/docs/usecase/_category_.json
+++ /dev/null
@@ -1,4 +0,0 @@
-{
- "label": "应用案例",
- "position": 6
-}
\ No newline at end of file
diff --git a/docusaurus.config.js b/docusaurus.config.js
index b2e5b40..c770b2f 100644
--- a/docusaurus.config.js
+++ b/docusaurus.config.js
@@ -76,7 +76,7 @@ const config = {
items: [
{
label: "Next-2.x (WIP)",
- to: "https://interestinglab.github.io/seatunnel-docs/#/zh-cn/v2/",
+ to: "/docs/introduction",
},
{
label: "1.x(Not apache release)",
diff --git a/tools/build-docs.sh b/tools/build-docs.sh
new file mode 100644
index 0000000..60bf502
--- /dev/null
+++ b/tools/build-docs.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+WEBSITE_REPO="https://github.com/apache/incubator-seatunnel-website.git"
+MAIN_REPO="https://github.com/apache/incubator-seatunnel.git"
+
+WEBSITE_NAME="seatunnel-website"
+MAIN_NAME="seatunnel"
+
+WORK_PATH="./seatunnel"
+
+if [ ! -d ${WORK_PATH} ]; then
+ mkdir -p ${WORK_PATH}
+else
+ rm -rf ${WORK_PATH}
+ mkdir -p ${WORK_PATH}
+fi
+
+echo "===>>>: Start documents sync"
+
+cd ${WORK_PATH}
+echo "===>>>: current path: ${WORK_PATH}"
+
+echo "===>>>: Clone git repositories"
+
+echo "===>>>: Clone ${MAIN_NAME} repositories"
+git clone --depth 1 ${WEBSITE_REPO} ${WORK_PATH}/${WEBSITE_NAME}
+
+echo "===>>>: Clone ${WEBSITE_NAME} repositories"
+git clone --depth 1 ${MAIN_REPO} ${WORK_PATH}/${MAIN_NAME}
+
+echo "===>>>: Replace elements inside MD files"
+cp -rf ${WORK_PATH}/${MAIN_NAME}/docs/en/ ${WORK_PATH}/${WEBSITE_NAME}/docs