You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by si...@apache.org on 2022/10/07 21:02:17 UTC

[hudi] branch master updated: [HUDI-2786] Docker demo on mac aarch64 (#6859)

This is an automated email from the ASF dual-hosted git repository.

sivabalan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 06d924137b [HUDI-2786] Docker demo on mac aarch64 (#6859)
06d924137b is described below

commit 06d924137bbf216864ee4fa09018b325c8b0a636
Author: Jon Vexler <jo...@onehouse.ai>
AuthorDate: Fri Oct 7 17:02:09 2022 -0400

    [HUDI-2786] Docker demo on mac aarch64 (#6859)
---
 ...pose_hadoop284_hive233_spark244_mac_aarch64.yml | 259 +++++++++++++++++++++
 docker/setup_demo.sh                               |  10 +-
 docker/stop_demo.sh                                |   7 +-
 3 files changed, 272 insertions(+), 4 deletions(-)

diff --git a/docker/compose/docker-compose_hadoop284_hive233_spark244_mac_aarch64.yml b/docker/compose/docker-compose_hadoop284_hive233_spark244_mac_aarch64.yml
new file mode 100644
index 0000000000..857180cfbe
--- /dev/null
+++ b/docker/compose/docker-compose_hadoop284_hive233_spark244_mac_aarch64.yml
@@ -0,0 +1,259 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+version: "3.3"
+
+services:
+
+  namenode:
+    image: apachehudi/hudi-hadoop_2.8.4-namenode:linux-arm64-0.10.1
+    platform: linux/arm64
+    hostname: namenode
+    container_name: namenode
+    environment:
+      - CLUSTER_NAME=hudi_hadoop284_hive232_spark244
+    ports:
+      - "50070:50070"
+      - "8020:8020"
+      # JVM debugging port (will be mapped to a random port on host)
+      - "5005"
+    env_file:
+      - ./hadoop.env
+    healthcheck:
+      test: [ "CMD", "curl", "-f", "http://namenode:50070" ]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+
+  datanode1:
+    image: apachehudi/hudi-hadoop_2.8.4-datanode:linux-arm64-0.10.1
+    platform: linux/arm64
+    container_name: datanode1
+    hostname: datanode1
+    environment:
+      - CLUSTER_NAME=hudi_hadoop284_hive232_spark244
+    env_file:
+      - ./hadoop.env
+    ports:
+      - "50075:50075"
+      - "50010:50010"
+      # JVM debugging port (will be mapped to a random port on host)
+      - "5005"
+    links:
+      - "namenode"
+      - "historyserver"
+    healthcheck:
+      test: [ "CMD", "curl", "-f", "http://datanode1:50075" ]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+    depends_on:
+      - namenode
+
+  historyserver:
+    image: apachehudi/hudi-hadoop_2.8.4-history:latest
+    hostname: historyserver
+    container_name: historyserver
+    environment:
+      - CLUSTER_NAME=hudi_hadoop284_hive232_spark244
+    depends_on:
+      - "namenode"
+    links:
+      - "namenode"
+    ports:
+      - "58188:8188"
+    healthcheck:
+      test: [ "CMD", "curl", "-f", "http://historyserver:8188" ]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+    env_file:
+      - ./hadoop.env
+    volumes:
+      - historyserver:/hadoop/yarn/timeline
+
+  hive-metastore-postgresql:
+    image: menorah84/hive-metastore-postgresql:2.3.0
+    platform: linux/arm64
+    environment:
+      - POSTGRES_HOST_AUTH_METHOD=trust
+    volumes:
+      - hive-metastore-postgresql:/var/lib/postgresql
+    hostname: hive-metastore-postgresql
+    container_name: hive-metastore-postgresql
+
+  hivemetastore:
+    image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3:linux-arm64-0.10.1
+    platform: linux/arm64
+    hostname: hivemetastore
+    container_name: hivemetastore
+    links:
+      - "hive-metastore-postgresql"
+      - "namenode"
+    env_file:
+      - ./hadoop.env
+    command: /opt/hive/bin/hive --service metastore
+    environment:
+      SERVICE_PRECONDITION: "namenode:50070 hive-metastore-postgresql:5432"
+    ports:
+      - "9083:9083"
+      # JVM debugging port (will be mapped to a random port on host)
+      - "5005"
+    healthcheck:
+      test: [ "CMD", "nc", "-z", "hivemetastore", "9083" ]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+    depends_on:
+      - "hive-metastore-postgresql"
+      - "namenode"
+
+  hiveserver:
+    image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3:linux-arm64-0.10.1
+    platform: linux/arm64
+    hostname: hiveserver
+    container_name: hiveserver
+    env_file:
+      - ./hadoop.env
+    environment:
+      SERVICE_PRECONDITION: "hivemetastore:9083"
+    ports:
+      - "10000:10000"
+      # JVM debugging port (will be mapped to a random port on host)
+      - "5005"
+    depends_on:
+      - "hivemetastore"
+    links:
+      - "hivemetastore"
+      - "hive-metastore-postgresql"
+      - "namenode"
+    volumes:
+      - ${HUDI_WS}:/var/hoodie/ws
+
+  sparkmaster:
+    image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3-sparkmaster_2.4.4:linux-arm64-0.10.1
+    platform: linux/arm64
+    hostname: sparkmaster
+    container_name: sparkmaster
+    env_file:
+      - ./hadoop.env
+    ports:
+      - "8080:8080"
+      - "7077:7077"
+      # JVM debugging port (will be mapped to a random port on host)
+      - "5005"
+    environment:
+      - INIT_DAEMON_STEP=setup_spark
+    links:
+      - "hivemetastore"
+      - "hiveserver"
+      - "hive-metastore-postgresql"
+      - "namenode"
+
+  spark-worker-1:
+    image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3-sparkworker_2.4.4:linux-arm64-0.10.1
+    platform: linux/arm64
+    hostname: spark-worker-1
+    container_name: spark-worker-1
+    env_file:
+      - ./hadoop.env
+    depends_on:
+      - sparkmaster
+    ports:
+      - "8081:8081"
+      # JVM debugging port (will be mapped to a random port on host)
+      - "5005"
+    environment:
+      - "SPARK_MASTER=spark://sparkmaster:7077"
+    links:
+      - "hivemetastore"
+      - "hiveserver"
+      - "hive-metastore-postgresql"
+      - "namenode"
+
+  zookeeper:
+    image: 'arm64v8/zookeeper:3.4.12'
+    platform: linux/arm64
+    hostname: zookeeper
+    container_name: zookeeper
+    ports:
+      - "2181:2181"
+    environment:
+      - ALLOW_ANONYMOUS_LOGIN=yes
+
+  kafka:
+    image: 'wurstmeister/kafka:2.12-2.0.1'
+    platform: linux/arm64
+    hostname: kafkabroker
+    container_name: kafkabroker
+    ports:
+      - "9092:9092"
+    environment:
+      - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
+      - ALLOW_PLAINTEXT_LISTENER=yes
+      - KAFKA_ADVERTISED_HOST_NAME=kafkabroker
+
+  adhoc-1:
+    image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3-sparkadhoc_2.4.4:linux-arm64-0.10.1
+    platform: linux/arm64
+    hostname: adhoc-1
+    container_name: adhoc-1
+    env_file:
+      - ./hadoop.env
+    depends_on:
+      - sparkmaster
+    ports:
+      - '4040:4040'
+      # JVM debugging port (mapped to 5006 on the host)
+      - "5006:5005"
+    environment:
+      - "SPARK_MASTER=spark://sparkmaster:7077"
+    links:
+      - "hivemetastore"
+      - "hiveserver"
+      - "hive-metastore-postgresql"
+      - "namenode"
+    volumes:
+      - ${HUDI_WS}:/var/hoodie/ws
+
+  adhoc-2:
+    image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3-sparkadhoc_2.4.4:linux-arm64-0.10.1
+    platform: linux/arm64
+    hostname: adhoc-2
+    container_name: adhoc-2
+    env_file:
+      - ./hadoop.env
+    ports:
+      # JVM debugging port (mapped to 5005 on the host)
+      - "5005:5005"
+    depends_on:
+      - sparkmaster
+    environment:
+      - "SPARK_MASTER=spark://sparkmaster:7077"
+    links:
+      - "hivemetastore"
+      - "hiveserver"
+      - "hive-metastore-postgresql"
+      - "namenode"
+    volumes:
+      - ${HUDI_WS}:/var/hoodie/ws
+
+volumes:
+  namenode:
+  historyserver:
+  hive-metastore-postgresql:
+
+networks:
+  default:
diff --git a/docker/setup_demo.sh b/docker/setup_demo.sh
index 9f0a100da6..81270bba75 100755
--- a/docker/setup_demo.sh
+++ b/docker/setup_demo.sh
@@ -19,14 +19,18 @@
 SCRIPT_PATH=$(cd `dirname $0`; pwd)
 HUDI_DEMO_ENV=$1
 WS_ROOT=`dirname $SCRIPT_PATH`
+COMPOSE_FILE_NAME="docker-compose_hadoop284_hive233_spark244.yml"
+if [ "$HUDI_DEMO_ENV" = "--mac-aarch64" ]; then
+  COMPOSE_FILE_NAME="docker-compose_hadoop284_hive233_spark244_mac_aarch64.yml"
+fi
 # restart cluster
-HUDI_WS=${WS_ROOT} docker-compose -f ${SCRIPT_PATH}/compose/docker-compose_hadoop284_hive233_spark244.yml down
+HUDI_WS=${WS_ROOT} docker-compose -f ${SCRIPT_PATH}/compose/${COMPOSE_FILE_NAME} down
 if [ "$HUDI_DEMO_ENV" != "dev" ]; then
   echo "Pulling docker demo images ..."
-  HUDI_WS=${WS_ROOT} docker-compose -f ${SCRIPT_PATH}/compose/docker-compose_hadoop284_hive233_spark244.yml pull
+  HUDI_WS=${WS_ROOT} docker-compose -f ${SCRIPT_PATH}/compose/${COMPOSE_FILE_NAME} pull
 fi
 sleep 5
-HUDI_WS=${WS_ROOT} docker-compose -f ${SCRIPT_PATH}/compose/docker-compose_hadoop284_hive233_spark244.yml up -d
+HUDI_WS=${WS_ROOT} docker-compose --verbose -f ${SCRIPT_PATH}/compose/${COMPOSE_FILE_NAME} up -d
 sleep 15
 
 docker exec -it adhoc-1 /bin/bash /var/hoodie/ws/docker/demo/setup_demo_container.sh
diff --git a/docker/stop_demo.sh b/docker/stop_demo.sh
index 83b8a2c1ef..32a0e70c37 100755
--- a/docker/stop_demo.sh
+++ b/docker/stop_demo.sh
@@ -17,10 +17,15 @@
 # limitations under the License.
 
 SCRIPT_PATH=$(cd `dirname $0`; pwd)
+HUDI_DEMO_ENV=$1
 # set up root directory
 WS_ROOT=`dirname $SCRIPT_PATH`
+COMPOSE_FILE_NAME="docker-compose_hadoop284_hive233_spark244.yml"
+if [ "$HUDI_DEMO_ENV" = "--mac-aarch64" ]; then
+  COMPOSE_FILE_NAME="docker-compose_hadoop284_hive233_spark244_mac_aarch64.yml"
+fi
 # shut down cluster
-HUDI_WS=${WS_ROOT} docker-compose -f ${SCRIPT_PATH}/compose/docker-compose_hadoop284_hive233_spark244.yml down
+HUDI_WS=${WS_ROOT} docker-compose -f ${SCRIPT_PATH}/compose/${COMPOSE_FILE_NAME} down
 
 # remove houst mount directory
 rm -rf /tmp/hadoop_data