You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@seatunnel.apache.org by ki...@apache.org on 2022/03/25 10:22:07 UTC

[incubator-seatunnel-website] branch main updated: [refactor] Simplify site build script and make it work locally (#94)

This is an automated email from the ASF dual-hosted git repository.

kirs pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-seatunnel-website.git


The following commit(s) were added to refs/heads/main by this push:
     new dc3ad89  [refactor] Simplify site build script and make it work locally (#94)
dc3ad89 is described below

commit dc3ad89a32fa19e073b72a23bea714e39727479d
Author: Jiajie Zhong <zh...@hotmail.com>
AuthorDate: Fri Mar 25 18:22:03 2022 +0800

    [refactor] Simplify site build script and make it work locally (#94)
    
    * [refactor] Simplify site build script and make it work locally
    
    This patch simplify our site build script, the previous need to
    clone two repositories and now only our main repo.
    
    Also this patch make our the build could be work locally, which
    make developer test or run website more easier.
    
    also close: apache/incubator-seatunnel#1550
    
    * Change readme files
    
    * recover docs dir
    
    * test
    
    * delete file the whole dirs
    
    * recover the directory docs
---
 .github/workflows/deploy.yml |   1 -
 .gitignore                   |  20 +++--
 README.md                    |  10 +--
 README_ZH.md                 |  10 +--
 docs/introduction.md         | 169 -------------------------------------------
 tools/build-docs.sh          | 163 ++++++++++++++++++++++++++++-------------
 6 files changed, 137 insertions(+), 236 deletions(-)

diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
index 261c4bc..4cbb838 100644
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -25,7 +25,6 @@ jobs:
           node-version: 14
       - name: Documents Sync
         run: |
-          chmod -R 775 ./tools/build-docs.sh
           bash ./tools/build-docs.sh
         shell: bash
       - name: install
diff --git a/.gitignore b/.gitignore
index 6e805c6..febfa97 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,23 +1,33 @@
+# System Temp
+.DS_Store
+
 # Intellij
 .idea/
 *.iml
 *.iws
 
+# vscode
+.vscode
+
 # Dependencies
-/node_modules
+node_modules/
+package-lock.json
 
 # Production
-/build
+build/
 
 # Generated files
 .docusaurus
 .cache-loader
 
-.DS_Store
+# Temp directory for perpare docs for main repository
+# those directories will be create when you run script `./tool/build_docs.sh`
+swap/
+docs/
+static/image_en/
+
 dist
 dist-ssr
 *.local
 *.idea
 *.iml.
-.vscode
-/package-lock.json
diff --git a/README.md b/README.md
index aa7bf1c..ca3f534 100644
--- a/README.md
+++ b/README.md
@@ -23,10 +23,11 @@ This website is compiled using node, using Docusaurus framework components
 
 1. Download and install nodejs (version>12.5.0)
 2. Clone the code to the local `git clone git@github.com:apache/incubator-seatunnel-website.git`
-2. Run `npm install` to install the required dependent libraries.
-3. Run `npm run start` in the root directory, you can visit http://localhost:3000 to view the English mode preview of the site
-4. Run `npm run start-zh` in the root directory, you can visit http://localhost:3000 to view the Chinese mode preview of the site
-5. To generate static website resource files, run `npm run build`. The static resources of the build are in the build directory.
+3. Run `./tools/build-docs.sh` to fetch and prepare docs form **apache/incubator-seatunnel**
+4. Run `npm install` to install the required dependent libraries.
+5. Run `npm run start` in the root directory, you can visit http://localhost:3000 to view the English mode preview of the site
+6. Run `npm run start-zh` in the root directory, you can visit http://localhost:3000 to view the Chinese mode preview of the site
+7. To generate static website resource files, run `npm run build`. The static resources of the build are in the build directory.
 
 ## 2. Directory structure
 
@@ -38,7 +39,6 @@ This website is compiled using node, using Docusaurus framework components
 ├── babel.config.js
 ├── blog
 ├── community  // Community
-├── docs  // Documentation
 ├── docusaurus.config.js
 |-- download  // Download
 ├── faq  // Q&A
diff --git a/README_ZH.md b/README_ZH.md
index f76bfda..399cb28 100644
--- a/README_ZH.md
+++ b/README_ZH.md
@@ -23,10 +23,11 @@ asf-staging 官网测试环境  通过https://seatunnel.staged.apache.org 访问
 
 1. 下载并安装 nodejs(version>12.5.0)
 2. 克隆代码到本地 `git clone  git@github.com:apache/incubator-seatunnel-website.git`
-2. 运行 `npm install` 来安装所需的依赖库。
-3. 在根目录运行`npm run start`,可以访问http://localhost:3000查看站点英文模式预览
-4. 在根目录运行`npm run start-zh`,可以访问http://localhost:3000查看站点的中文模式预览
-5. 要生成静态网站资源文件,运行 `npm run build`。构建的静态资源在build目录中。
+3. 运行 `./tools/build-docs.sh` 从 **apache/incubator-seatunnel** 中拉取、准备文档
+4. 运行 `npm install` 来安装所需的依赖库。
+5. 在根目录运行`npm run start`,可以访问http://localhost:3000查看站点英文模式预览
+6. 在根目录运行`npm run start-zh`,可以访问http://localhost:3000查看站点的中文模式预览
+7. 要生成静态网站资源文件,运行 `npm run build`。构建的静态资源在build目录中。
 
 ## 2.目录结构
 
@@ -38,7 +39,6 @@ asf-staging 官网测试环境  通过https://seatunnel.staged.apache.org 访问
 ├── babel.config.js
 ├── blog
 ├── community  // 社区
-├── docs  // 文档
 ├── docusaurus.config.js
 |-- download  // 下载
 ├── faq  // Q&A
diff --git a/docs/introduction.md b/docs/introduction.md
deleted file mode 100644
index 0cd8722..0000000
--- a/docs/introduction.md
+++ /dev/null
@@ -1,169 +0,0 @@
----
-title: Introduction
-sidebar_position: 1
----
-
-# SeaTunnel
-
-<img src="https://seatunnel.apache.org/image/logo.png" alt="seatunnel logo" width="200px" height="200px" align="right" />
-
-[![Slack](https://img.shields.io/badge/slack-%23seatunnel-4f8eba?logo=slack)](https://join.slack.com/t/apacheseatunnel/shared_invite/zt-123jmewxe-RjB_DW3M3gV~xL91pZ0oVQ)
-[![Twitter Follow](https://img.shields.io/twitter/follow/ASFSeaTunnel.svg?label=Follow&logo=twitter)](https://twitter.com/ASFSeaTunnel)
-
----
-
-SeaTunnel was formerly named Waterdrop , and renamed SeaTunnel since October 12, 2021.
-
----
-
-SeaTunnel is a very easy-to-use ultra-high-performance distributed data integration platform that supports real-time
-synchronization of massive data. It can synchronize tens of billions of data stably and efficiently every day, and has
-been used in the production of nearly 100 companies.
-
-## Why do we need SeaTunnel
-
-SeaTunnel will do its best to solve the problems that may be encountered in the synchronization of massive data:
-
-- Data loss and duplication
-- Task accumulation and delay
-- Low throughput
-- Long cycle to be applied in the production environment
-- Lack of application running status monitoring
-
-## SeaTunnel use scenarios
-
-- Mass data synchronization
-- Mass data integration
-- ETL with massive data
-- Mass data aggregation
-- Multi-source data processing
-
-## Features of SeaTunnel
-
-- Easy to use, flexible configuration, low code development
-- Real-time streaming
-- Offline multi-source data analysis
-- High-performance, massive data processing capabilities
-- Modular and plug-in mechanism, easy to extend
-- Support data processing and aggregation by SQL
-- Support Spark structured streaming
-- Support Spark 2.x
-
-## Workflow of SeaTunnel
-
-![seatunnel-workflow.svg](../static/image/seatunnel-workflow.svg)
-
-```
-Source[Data Source Input] -> Transform[Data Processing] -> Sink[Result Output]
-```
-
-The data processing pipeline is constituted by multiple filters to meet a variety of data processing needs. If you are
-accustomed to SQL, you can also directly construct a data processing pipeline by SQL, which is simple and efficient.
-Currently, the filter list supported by SeaTunnel is still being expanded. Furthermore, you can develop your own data
-processing plug-in, because the whole system is easy to expand.
-
-## Plugins supported by SeaTunnel
-
-- Input plugin Fake, File, Hdfs, Kafka, Druid, InfluxDB, S3, Socket, self-developed Input plugin
-
-- Filter plugin Add, Checksum, Convert, Date, Drop, Grok, Json, Kv, Lowercase, Remove, Rename, Repartition, Replace,
-  Sample, Split, Sql, Table, Truncate, Uppercase, Uuid, Self-developed Filter plugin
-
-- Output plugin Elasticsearch, File, Hdfs, Jdbc, Kafka, Druid, InfluxDB, Mysql, S3, Stdout, self-developed Output plugin
-
-## Environmental dependency
-
-1. java runtime environment, java >= 8
-
-2. If you want to run SeaTunnel in a cluster environment, any of the following Spark cluster environments is usable:
-
-- Spark on Yarn
-- Spark Standalone
-
-If the data volume is small, or the goal is merely for functional verification, you can also start in local mode without
-a cluster environment, because SeaTunnel supports standalone operation. Note: SeaTunnel 2.0 supports running on Spark
-and Flink.
-
-## Downloads
-
-Download address for run-directly software package :https://github.com/apache/incubator-seatunnel/releases
-
-## Quick start
-
-**Spark**
-https://seatunnel.apache.org/docs/spark/quick-start
-
-**Flink**
-https://seatunnel.apache.org/docs/flink/quick-start
-
-Detailed documentation on SeaTunnel
-https://seatunnel.apache.org/docs/introduction
-
-## Application practice cases
-
-- Weibo, Value-added Business Department Data Platform
-
-Weibo business uses an internal customized version of SeaTunnel and its sub-project Guardian for SeaTunnel On Yarn task
-monitoring for hundreds of real-time streaming computing tasks.
-
-- Sina, Big Data Operation Analysis Platform
-
-Sina Data Operation Analysis Platform uses SeaTunnel to perform real-time and offline analysis of data operation and
-maintenance for Sina News, CDN and other services, and write it into Clickhouse.
-
-- Sogou, Sogou Qiqian System
-
-Sogou Qiqian System takes SeaTunnel as an ETL tool to help establish a real-time data warehouse system.
-
-- Qutoutiao, Qutoutiao Data Center
-
-Qutoutiao Data Center uses SeaTunnel to support mysql to hive offline ETL tasks, real-time hive to clickhouse backfill
-technical support, and well covers most offline and real-time tasks needs.
-
-- Yixia Technology, Yizhibo Data Platform
-
-- Yonghui Superstores Founders' Alliance-Yonghui Yunchuang Technology, Member E-commerce Data Analysis Platform
-
-SeaTunnel provides real-time streaming and offline SQL computing of e-commerce user behavior data for Yonghui Life, a
-new retail brand of Yonghui Yunchuang Technology.
-
-- Shuidichou, Data Platform
-
-Shuidichou adopts SeaTunnel to do real-time streaming and regular offline batch processing on Yarn, processing 3~4T data
-volume average daily, and later writing the data to Clickhouse.
-
-- Tencent Cloud
-
-Collecting various logs from business services into Apache Kafka, some of the data in Apache Kafka is consumed and extracted through Seatunnel, and then store into Clickhouse.
-
-For more use cases, please refer to: https://seatunnel.apache.org/blog
-
-## Code of conduct
-
-This project adheres to the Contributor Covenant [code of conduct](https://www.apache.org/foundation/policies/conduct).
-By participating, you are expected to uphold this code. Please follow
-the [REPORTING GUIDELINES](https://www.apache.org/foundation/policies/conduct#reporting-guidelines) to report
-unacceptable behavior.
-
-## Developer
-
-Thanks to all developers!
-
-[![](https://opencollective.com/seatunnel/contributors.svg?width=666)](https://github.com/apache/incubator-seatunnel/graphs/contributors)
-
-## Contact Us
-
-* Mail list: **dev@seatunnel.apache.org**. Mail to `dev-subscribe@seatunnel.apache.org`, follow the reply to subscribe
-  the mail list.
-* Slack: https://join.slack.com/t/apacheseatunnel/shared_invite/zt-123jmewxe-RjB_DW3M3gV~xL91pZ0oVQ
-* Twitter: https://twitter.com/ASFSeaTunnel
-* [Bilibili](https://space.bilibili.com/1542095008) (for Chinese users)
-
-## Landscapes
-
-<p align="center">
-<br/><br/>
-<img src="https://landscape.cncf.io/images/left-logo.svg" width="150" alt=""/>&nbsp;&nbsp;<img src="https://landscape.cncf.io/images/right-logo.svg" width="200" alt=""/>
-<br/><br/>
-SeaTunnel enriches the <a href="https://landscape.cncf.io/landscape=observability-and-analysis&license=apache-license-2-0">CNCF CLOUD NATIVE Landscape.</a >
-</p >
diff --git a/tools/build-docs.sh b/tools/build-docs.sh
old mode 100644
new mode 100755
index a662c47..49a5570
--- a/tools/build-docs.sh
+++ b/tools/build-docs.sh
@@ -1,76 +1,137 @@
 #!/usr/bin/env bash
 
-set -exv
+set -euo pipefail
 
-WEBSITE_REPO="https://github.com/apache/incubator-seatunnel-website.git"
-MAIN_REPO="https://github.com/apache/incubator-seatunnel.git"
+SOURCE_PATH="$(cd "$(dirname "$(dirname "${BASH_SOURCE[0]}")" )" && pwd)"
 
-WEBSITE_REPO_NAME="incubator-seatunnel-website"
-WEBSITE_NAME="website"
-MAIN_NAME="seatunnel"
+PROJECT_NAME="seatunnel"
+PROJECT_BRANCH_NAME="dev"
+# PROJECT_WEBSITE_NAME="${PROJECT_NAME}-website"
 
-WORK_PATH=~/work/${WEBSITE_REPO_NAME}
+SWAP_DIR="${SOURCE_PATH}/swap"
+PROJECT_SITE_IMG_DIR="${SOURCE_PATH}/static/image_en"
+PROJECT_SITE_DOC_DIR="${SOURCE_PATH}/docs"
 
-MAIN_PATH=${WORK_PATH}/${MAIN_NAME}
-WEBSITE_PATH=${WORK_PATH}/${WEBSITE_NAME}
-WEBSITE_REPO_PATH=${WORK_PATH}/${WEBSITE_REPO_NAME}
-DOCS_EN=${MAIN_PATH}/docs/en
+PROJECT_DIR="${SWAP_DIR}/${PROJECT_NAME}"
+PROJECT_IMG_DIR="${PROJECT_DIR}/docs/en/images"
+PROJECT_DOC_DIR="${PROJECT_DIR}/docs/en"
 
-if [ ! -d ${WORK_PATH} ]; then
-  mkdir -p ${WORK_PATH}
+
+# Choose the protocol for git communication to server, default is HTTP because it do not requests password or secret key,
+# run command `export PROTOCOL_MODE=ssh` in terminal change protocol to SSH which in is faster and stable in many cases,
+# such as local development where we already have RSA public key.
+if [ "${PROTOCOL_MODE:-HTTP}" == "ssh" ]; then
+    PROJECT_REPO="git@github.com:apache/${PROJECT_NAME}.git"
 else
-  rm -rf ${WORK_PATH}
-  mkdir -p ${WORK_PATH}
+    PROJECT_REPO="https://github.com/apache/${PROJECT_NAME}.git"
 fi
 
-echo "===>>>: Start documents sync"
-
-cd ${WORK_PATH}
-echo "===>>>: current work path: ${WORK_PATH}"
-
-echo "===>>>: Clone git repositories"
-
-echo "===>>>: Clone ${WEBSITE_REPO} repositories to ${WEBSITE_NAME}"
-git clone --depth 1 ${WEBSITE_REPO} ${WEBSITE_PATH}
-
-echo "===>>>: Clone ${MAIN_REPO} repositories to ${MAIN_NAME}"
-git clone --depth 1 ${MAIN_REPO} ${MAIN_PATH}
-
-echo "===>>>: Copy images to ${WEBSITE_PATH}/static/doc/image_en/"
-cp -rf ${DOCS_EN}/images/* ${WEBSITE_PATH}/static/doc/image_en/
+##############################################################
+#
+# Rebuild specific directory, if directory exists, will remove
+# it before create it, otherwise create it directly.
+#
+# Arguments:
+#
+#   path: One or more directories want to rebuild
+#
+##############################################################
+function rebuild_dirs() {
+    for dir in "$@"; do
+        echo "  ---> Rebuild directory ${dir}"
+        if [ -d "${dir}" ]; then
+          rm -rf "${dir}"
+        fi
+        mkdir -p "${dir}"
+    done
+}
 
-if [ -d ${DOCS_EN}/images ]; then
-  rm -rf ${DOCS_EN}/images
-fi
+##############################################################
+#
+# Clone repository to target directory, it will only support
+# clone one depth. Supported two or three parameters, if you
+# want to clone into specific directory you should provider
+# the third parameter.
+#
+# Arguments:
+#
+#   repo: The link of the repository you want to clone
+#   branch: The branch to clone
+#   path: Optional parameter, The directory to keep the clone
+#         content
+#
+##############################################################
+function clone_repo() {
+    if [ "$#" -eq 2 ]; then
+        local repo="${1}"
+        local path="${2}"
+
+        echo "  ---> Start clone repository ${repo} to directory ${path}"
+        git clone --depth 1 "${repo}" "${path}"
+    elif [ "$#" -eq 3 ]; then
+        local repo="${1}"
+        local branch="${2}"
+        local path="${3}"
+
+        echo "  ---> Start clone repository ${repo} branch ${branch} to directory ${path}"
+        git clone --depth 1 --branch "${branch}" "${repo}" "${path}"
+    else
+        echo "Illegal number of parameters. Only support parameters number of 2 or 3 but get $#."
+        exit 1
+    fi
+}
 
-echo "===>>>: Replace images path to /doc/image_en"
-function replaceImagesPath(){
-  CURRENT_DIR=$1
-  echo "===>>>: Current directory: ${CURRENT_DIR}"
-  for filePath in ${CURRENT_DIR}/*; do
-    if test -f ${filePath}; then
-      if [ "${filePath##*.}"x = "md"x ]; then
-        echo "===>>: Replace images path to /doc/image_en in ${filePath}"
+##############################################################
+#
+# Replace images path in markdown documents, the source path
+# in repo `apache/incubator-seatunnel` is like `images/<name>.png`
+# and we should replace it to `images_en/<name>.png`
+#
+# Arguments:
+#
+#   replace_dir: The directory to replace the img path
+#
+##############################################################
+function replace_images_path(){
+  replace_dir=$1
+  for file_path in "${replace_dir}"/*; do
+    if test -f "${file_path}"; then
+      if [ "${file_path##*.}"x = "md"x ]; then
+        echo "  ---> Replace images path to /doc/image_en in ${file_path}"
         if [[ "$OSTYPE" == "darwin"* ]]; then
-          sed -E -i '' "s/(\.\.\/)*images/\/doc\/image_en/g" ${filePath}
+          sed -E -i '' "s/(\.\.\/)*images/\/image_en/g" "${file_path}"
         else
-          sed -E -i "s/(\.\.\/)*images/\/doc\/image_en/g" ${filePath}
+          sed -E -i "s/(\.\.\/)*images/\/image_en/g" "${file_path}"
         fi
       fi
     else
-      replaceImagesPath ${filePath}
+      replace_images_path "${file_path}"
     fi
   done
 }
 
-replaceImagesPath ${DOCS_EN}
+##############################################################
+# Main project to do prepare job to debug and build our web
+##############################################################
+function prepare_docs() {
+    echo "===>>>: Start documents sync."
 
-echo "===>>>: Replace elements inside md filePath"
-cp -rf ${DOCS_EN}/* ${WEBSITE_PATH}/docs/
+    echo "===>>>: Rebuild directory swap, docs, static/image_en."
+    rebuild_dirs "${SWAP_DIR}" "${PROJECT_SITE_DOC_DIR}" "${PROJECT_SITE_IMG_DIR}"
 
-echo "===>>>: Replace docs Done"
+    echo "===>>>: Clone project main codebase repositories."
+    clone_repo "${PROJECT_REPO}" "${PROJECT_BRANCH_NAME}" "${PROJECT_DIR}"
 
-echo "===>>>: Copy ${WEBSITE_PATH} to ${WEBSITE_REPO_PATH} directory"
-cp -rf ${WEBSITE_PATH} ${WEBSITE_REPO_PATH}
+    echo "===>>>: Rsync images to ${PROJECT_SITE_IMG_DIR}"
+    rsync -av "${PROJECT_IMG_DIR}"/ "${PROJECT_SITE_IMG_DIR}"
+
+    echo "===>>>: Rsync documents exclude images to ${PROJECT_SITE_DOC_DIR}"
+    rsync -av --exclude images "${PROJECT_DOC_DIR}"/ "${PROJECT_SITE_DOC_DIR}"
+
+    echo "===>>>: Replace images path in ${PROJECT_SITE_DOC_DIR}"
+    replace_images_path "${PROJECT_SITE_DOC_DIR}"
+
+    echo "===>>>: End documents sync"
+}
 
-echo "===>>>: Replace ${WEBSITE_REPO_PATH} Done"
+prepare_docs