You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hugegraph.apache.org by zh...@apache.org on 2022/08/29 14:57:00 UTC

[incubator-hugegraph-toolchain] branch master updated: feat: support yarn-cluster mode for spark-loader (#318)

This is an automated email from the ASF dual-hosted git repository.

zhaocong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-hugegraph-toolchain.git


The following commit(s) were added to refs/heads/master by this push:
     new bea0f0d  feat: support yarn-cluster mode for spark-loader (#318)
bea0f0d is described below

commit bea0f0da84391ecb13bd854d504ea3148c3b312e
Author: Simon Cheung <mi...@apache.org>
AuthorDate: Mon Aug 29 22:56:56 2022 +0800

    feat: support yarn-cluster mode for spark-loader (#318)
---
 hugegraph-loader/assembly/static/bin/get-params.sh | 37 +++++++++++++++-------
 .../static/bin/hugegraph-flinkcdc-loader.sh        | 12 +++----
 .../assembly/static/bin/hugegraph-spark-loader.sh  | 13 ++++----
 3 files changed, 39 insertions(+), 23 deletions(-)

diff --git a/hugegraph-loader/assembly/static/bin/get-params.sh b/hugegraph-loader/assembly/static/bin/get-params.sh
index f6a2cc7..c9a8132 100644
--- a/hugegraph-loader/assembly/static/bin/get-params.sh
+++ b/hugegraph-loader/assembly/static/bin/get-params.sh
@@ -1,24 +1,39 @@
 #!/bin/bash
 function get_params() {
   echo "params: $*"
-  engine_params=""
-  hugegraph_params=""
+  ENGINE_PARAMS=""
+  HUGEGRAPH_PARAMS=""
   while (("$#")); do
     case "$1" in
-      -–file | --graph | --schema | --host | --port | --username | --token | --protocol | \
+      --graph | --schema | --host | --port | --username | --token | --protocol | \
       --trust-store-file | --trust-store-password | --clear-all-data | --clear-timeout | \
       --incremental-mode | --failure-mode | --batch-insert-threads | --single-insert-threads | \
       --max-conn | --max-conn-per-route | --batch-size | --max-parse-errors | --max-insert-errors | \
       --timeout | --shutdown-timeout | --retry-times | --retry-interval | --check-vertex | \
       --print-progress | --dry-run | --help)
-      hugegraph_params="$hugegraph_params $1 $2"
-      shift 2
-      ;;
-
-    *) # preserve positional arguments
-      engine_params="$engine_params $1"
-      shift
-      ;;
+        HUGEGRAPH_PARAMS="$HUGEGRAPH_PARAMS $1 $2"
+        shift 2
+        ;;
+      --file)
+        file=$2
+        shift 2
+        ;;
+      --deploy-mode)
+        mode=$2
+        ENGINE_PARAMS="$ENGINE_PARAMS $1 $2"
+        shift 2
+        ;;
+      *) # preserve positional arguments
+        ENGINE_PARAMS="$ENGINE_PARAMS $1"
+        shift
+        ;;
     esac
   done
+
+  if [ $mode = 'cluster' ];then
+    HUGEGRAPH_PARAMS="$HUGEGRAPH_PARAMS --file ${file##*/}"
+    ENGINE_PARAMS="$ENGINE_PARAMS --files ${file}"
+  else
+    HUGEGRAPH_PARAMS="$HUGEGRAPH_PARAMS --file ${file}"
+  fi
 }
diff --git a/hugegraph-loader/assembly/static/bin/hugegraph-flinkcdc-loader.sh b/hugegraph-loader/assembly/static/bin/hugegraph-flinkcdc-loader.sh
index f8b52d6..9e6b08c 100755
--- a/hugegraph-loader/assembly/static/bin/hugegraph-flinkcdc-loader.sh
+++ b/hugegraph-loader/assembly/static/bin/hugegraph-flinkcdc-loader.sh
@@ -3,18 +3,18 @@
 BIN_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 APP_DIR=$(dirname ${BIN_DIR})
 LIB_DIR=${APP_DIR}/lib
-assemblyJarName=$(find ${LIB_DIR} -name hugegraph-loader*.jar)
+ASSEMBLY_JAR_NAME=$(find ${LIB_DIR} -name hugegraph-loader*.jar)
 
 # get hugegraph_params and engine_params
-source "$BIN_DIR"/get_params.sh
+source "$BIN_DIR"/get-params.sh
 get_params $*
-echo "engine_params: $engine_params"
-echo "hugegraph_params: $hugegraph_params"
+echo "engine_params: $ENGINE_PARAMS"
+echo "hugegraph_params: $HUGEGRAPH_PARAMS"
 
 CMD=${FLINK_HOME}/bin/flink run \
-  ${engine_params} \
+  ${ENGINE_PARAMS} \
   -c com.baidu.hugegraph.loader.flink.HugeGraphFlinkCDCLoader \
-  ${assemblyJarName} ${hugegraph_params}
+  ${ASSEMBLY_JAR_NAME} ${HUGEGRAPH_PARAMS}
 
 echo ${CMD}
 exec ${CMD}
diff --git a/hugegraph-loader/assembly/static/bin/hugegraph-spark-loader.sh b/hugegraph-loader/assembly/static/bin/hugegraph-spark-loader.sh
index 70a8412..c2004ea 100755
--- a/hugegraph-loader/assembly/static/bin/hugegraph-spark-loader.sh
+++ b/hugegraph-loader/assembly/static/bin/hugegraph-spark-loader.sh
@@ -5,20 +5,21 @@ APP_DIR=$(dirname ${BIN_DIR})
 LIB_DIR=${APP_DIR}/lib
 
 # get hugegraph_params and engine_params
-source "$BIN_DIR"/get_params.sh
+source "$BIN_DIR"/get-params.sh
 get_params $*
-echo "engine_params: $engine_params"
-echo "hugegraph_params: $hugegraph_params"
+echo "engine_params: $ENGINE_PARAMS"
+echo "hugegraph_params: $HUGEGRAPH_PARAMS"
+
+ASSEMBLY_JAR_NAME=$(find ${LIB_DIR} -name hugegraph-loader*.jar)
 
-assemblyJarName=$(find ${LIB_DIR} -name hugegraph-loader*.jar)
 
 DEFAULT_APP_NAME="hugegraph-spark-loader"
 APP_NAME=${APP_NAME:-$DEFAULT_APP_NAME}
 
 CMD="${SPARK_HOME}/bin/spark-submit
     --class com.baidu.hugegraph.loader.spark.HugeGraphSparkLoader \
-    ${engine_params}
-    --jars $(echo ${LIB_DIR}/*.jar | tr ' ' ',') ${assemblyJarName} ${hugegraph_params}"
+    ${ENGINE_PARAMS}
+    --jars $(echo ${LIB_DIR}/*.jar | tr ' ' ',') ${ASSEMBLY_JAR_NAME} ${HUGEGRAPH_PARAMS}"
 
 echo ${CMD}
 exec ${CMD}