You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ag...@apache.org on 2021/06/02 14:13:20 UTC

[arrow-datafusion] branch master updated: Update k8s user guide to use deployments (#474)

This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new 01b57f7  Update k8s user guide to use deployments (#474)
01b57f7 is described below

commit 01b57f70241e158d471a1396c0b6461eccbd6e82
Author: Ximo Guanter <xi...@gmail.com>
AuthorDate: Wed Jun 2 16:13:13 2021 +0200

    Update k8s user guide to use deployments (#474)
---
 ballista/rust/executor/executor_config_spec.toml   |  2 +-
 ballista/rust/executor/src/main.rs                 |  2 +-
 ballista/rust/scheduler/scheduler_config_spec.toml |  2 +-
 ballista/rust/scheduler/src/main.rs                |  2 +-
 benchmarks/README.md                               |  6 +-
 benchmarks/docker-compose.yaml                     |  4 +-
 docs/user-guide/src/distributed/docker-compose.md  |  2 +-
 docs/user-guide/src/distributed/kubernetes.md      | 70 ++++++++++------------
 docs/user-guide/src/distributed/raspberrypi.md     |  2 +-
 docs/user-guide/src/distributed/standalone.md      | 12 ++--
 10 files changed, 50 insertions(+), 54 deletions(-)

diff --git a/ballista/rust/executor/executor_config_spec.toml b/ballista/rust/executor/executor_config_spec.toml
index 8d817fe..3cb168e 100644
--- a/ballista/rust/executor/executor_config_spec.toml
+++ b/ballista/rust/executor/executor_config_spec.toml
@@ -53,7 +53,7 @@ doc = "Host name or IP address to register with scheduler so that other executor
 
 [[param]]
 abbr = "p"
-name = "port"
+name = "bind_port"
 type = "u16"
 default = "50051"
 doc = "bind port"
diff --git a/ballista/rust/executor/src/main.rs b/ballista/rust/executor/src/main.rs
index aad53d7..4c63ba8 100644
--- a/ballista/rust/executor/src/main.rs
+++ b/ballista/rust/executor/src/main.rs
@@ -75,7 +75,7 @@ async fn main() -> Result<()> {
 
     let external_host = opt.external_host;
     let bind_host = opt.bind_host;
-    let port = opt.port;
+    let port = opt.bind_port;
 
     let addr = format!("{}:{}", bind_host, port);
     let addr = addr
diff --git a/ballista/rust/scheduler/scheduler_config_spec.toml b/ballista/rust/scheduler/scheduler_config_spec.toml
index 560e9a2..81e77d3 100644
--- a/ballista/rust/scheduler/scheduler_config_spec.toml
+++ b/ballista/rust/scheduler/scheduler_config_spec.toml
@@ -54,7 +54,7 @@ doc = "Local host name or IP address to bind to. Default: 0.0.0.0"
 
 [[param]]
 abbr = "p"
-name = "port"
+name = "bind_port"
 type = "u16"
 default = "50050"
 doc = "bind port. Default: 50050"
\ No newline at end of file
diff --git a/ballista/rust/scheduler/src/main.rs b/ballista/rust/scheduler/src/main.rs
index 713103f..34386ca 100644
--- a/ballista/rust/scheduler/src/main.rs
+++ b/ballista/rust/scheduler/src/main.rs
@@ -116,7 +116,7 @@ async fn main() -> Result<()> {
 
     let namespace = opt.namespace;
     let bind_host = opt.bind_host;
-    let port = opt.port;
+    let port = opt.bind_port;
 
     let addr = format!("{}:{}", bind_host, port);
     let addr = addr.parse()?;
diff --git a/benchmarks/README.md b/benchmarks/README.md
index e347130..0b5ccfc 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -122,7 +122,7 @@ RUST_LOG=info RUSTFLAGS='-C target-cpu=native -C lto -C codegen-units=1 -C embed
 To run the benchmarks:
 
 ```bash
-cd $ARROW_HOME/ballista/rust/benchmarks/tpch
+cd $ARROW_HOME/benchmarks
 cargo run --release benchmark ballista --host localhost --port 50050 --query 1 --path $(pwd)/data --format tbl
 ```
 
@@ -131,9 +131,9 @@ cargo run --release benchmark ballista --host localhost --port 50050 --query 1 -
 To start a Rust scheduler and executor using Docker Compose:
 
 ```bash
-cd $BALLISTA_HOME
+cd $ARROW_HOME
 ./dev/build-rust.sh
-cd $BALLISTA_HOME/rust/benchmarks/tpch
+cd $ARROW_HOME/benchmarks
 docker-compose up
 ```
 
diff --git a/benchmarks/docker-compose.yaml b/benchmarks/docker-compose.yaml
index c13e9eb..74c6703 100644
--- a/benchmarks/docker-compose.yaml
+++ b/benchmarks/docker-compose.yaml
@@ -21,7 +21,7 @@ services:
     command: "etcd -advertise-client-urls http://etcd:2379 -listen-client-urls http://0.0.0.0:2379"
   ballista-scheduler:
     image: ballista:0.5.0-SNAPSHOT
-    command: "/scheduler --config-backend etcd --etcd-urls etcd:2379 --bind-host 0.0.0.0 --port 50050"
+    command: "/scheduler --config-backend etcd --etcd-urls etcd:2379 --bind-host 0.0.0.0 --bind-port 50050"
     environment:
       - RUST_LOG=ballista=debug
     volumes:
@@ -30,7 +30,7 @@ services:
       - etcd
   ballista-executor:
     image: ballista:0.5.0-SNAPSHOT
-    command: "/executor --bind-host 0.0.0.0 --port 50051 --scheduler-host ballista-scheduler"
+    command: "/executor --bind-host 0.0.0.0 --bind-port 50051 --scheduler-host ballista-scheduler"
     scale: 2
     environment:
       - RUST_LOG=info
diff --git a/docs/user-guide/src/distributed/docker-compose.md b/docs/user-guide/src/distributed/docker-compose.md
index de27364..5ea86b5 100644
--- a/docs/user-guide/src/distributed/docker-compose.md
+++ b/docs/user-guide/src/distributed/docker-compose.md
@@ -33,7 +33,7 @@ services:
       - "2379:2379"
   ballista-executor:
     image: ballistacompute/ballista-rust:0.4.2-SNAPSHOT
-    command: "/executor --bind-host 0.0.0.0 --port 50051 --local"
+    command: "/executor --bind-host 0.0.0.0 --bind-port 50051 --local"
     environment:
       - RUST_LOG=info
     ports:
diff --git a/docs/user-guide/src/distributed/kubernetes.md b/docs/user-guide/src/distributed/kubernetes.md
index 7b9b356..07b51f7 100644
--- a/docs/user-guide/src/distributed/kubernetes.md
+++ b/docs/user-guide/src/distributed/kubernetes.md
@@ -24,8 +24,8 @@ you are already comfortable with managing Kubernetes deployments.
 
 The k8s deployment consists of:
 
-- k8s stateful set for one or more scheduler processes
-- k8s stateful set for one or more executor processes
+- k8s deployment for one or more scheduler processes
+- k8s deployment for one or more executor processes
 - k8s service to route traffic to the schedulers
 - k8s persistent volume and persistent volume claims to make local data accessible to Ballista
 
@@ -38,6 +38,14 @@ Ballista is at an early stage of development and therefore has some significant
 - Only a single scheduler instance is currently supported unless the scheduler is configured to use `etcd` as a
   backing store.
 
+## Publishing your images
+
+Currently there are no official Ballista images that work with the instructions in this guide. For the time being,
+you will need to build and publish your own images. You can do that by invoking the `dev/build-ballista-docker.sh`.
+
+Once the images have been built, you can retag them with `docker tag ballista:0.5.0-SNAPSHOT <new-image-name>` so you
+can push them to your favourite docker registry.
+
 ## Create Persistent Volume and Persistent Volume Claim
 
 Copy the following yaml to a `pv.yaml` file and apply to the cluster to create a persistent volume and a persistent
@@ -88,7 +96,7 @@ persistentvolumeclaim/data-pv-claim created
 
 ## Deploying Ballista Scheduler and Executors
 
-Copy the following yaml to a `cluster.yaml` file.
+Copy the following yaml to a `cluster.yaml` file and change `<your-image>` with the name of your Ballista Docker image.
 
 ```yaml
 apiVersion: v1
@@ -101,16 +109,14 @@ spec:
   ports:
     - port: 50050
       name: scheduler
-  clusterIP: None
   selector:
     app: ballista-scheduler
 ---
 apiVersion: apps/v1
-kind: StatefulSet
+kind: Deployment
 metadata:
   name: ballista-scheduler
 spec:
-  serviceName: "ballista-scheduler"
   replicas: 1
   selector:
     matchLabels:
@@ -122,27 +128,26 @@ spec:
         ballista-cluster: ballista
     spec:
       containers:
-        - name: ballista-scheduler
-          image: ballistacompute/ballista-rust:0.4.2-SNAPSHOT
-          command: ["/scheduler"]
-          args: ["--port=50050"]
-          ports:
-            - containerPort: 50050
-              name: flight
-          volumeMounts:
-            - mountPath: /mnt
-              name: data
+      - name: ballista-scheduler
+        image: <your-image>
+        command: ["/scheduler"]
+        args: ["--bind-port=50050"]
+        ports:
+          - containerPort: 50050
+            name: flight
+        volumeMounts:
+          - mountPath: /mnt
+            name: data
       volumes:
         - name: data
           persistentVolumeClaim:
             claimName: data-pv-claim
 ---
 apiVersion: apps/v1
-kind: StatefulSet
+kind: Deployment
 metadata:
   name: ballista-executor
 spec:
-  serviceName: "ballista-scheduler"
   replicas: 2
   selector:
     matchLabels:
@@ -155,20 +160,12 @@ spec:
     spec:
       containers:
         - name: ballista-executor
-          image: ballistacompute/ballista-rust:0.4.2-SNAPSHOT
+          image: <your-image>
           command: ["/executor"]
           args:
-            [
-              "--port=50051",
-              "--scheduler-host=ballista-scheduler",
-              "--scheduler-port=50050",
-              "--external-host=$(MY_POD_IP)",
-            ]
-          env:
-            - name: MY_POD_IP
-              valueFrom:
-                fieldRef:
-                  fieldPath: status.podIP
+            - "--bind-port=50051",
+            - "--scheduler-host=ballista-scheduler",
+            - "--scheduler-port=50050"
           ports:
             - containerPort: 50051
               name: flight
@@ -189,19 +186,18 @@ This should show the following output:
 
 ```
 service/ballista-scheduler created
-statefulset.apps/ballista-scheduler created
-statefulset.apps/ballista-executor created
+deployment.apps/ballista-scheduler created
+deployment.apps/ballista-executor created
 ```
 
 You can also check status by running `kubectl get pods`:
 
 ```bash
 $ kubectl get pods
-NAME                   READY   STATUS    RESTARTS   AGE
-busybox                1/1     Running   0          16m
-ballista-scheduler-0   1/1     Running   0          42s
-ballista-executor-0    1/1     Running   2          42s
-ballista-executor-1    1/1     Running   0          26s
+NAME                                 READY   STATUS    RESTARTS   AGE
+ballista-executor-78cc5b6486-4rkn4   0/1     Pending   0          42s
+ballista-executor-78cc5b6486-7crdm   0/1     Pending   0          42s
+ballista-scheduler-879f874c5-rnbd6   0/1     Pending   0          42s
 ```
 
 You can view the scheduler logs with `kubectl logs ballista-scheduler-0`:
diff --git a/docs/user-guide/src/distributed/raspberrypi.md b/docs/user-guide/src/distributed/raspberrypi.md
index 0083d19..3bf36c7 100644
--- a/docs/user-guide/src/distributed/raspberrypi.md
+++ b/docs/user-guide/src/distributed/raspberrypi.md
@@ -116,7 +116,7 @@ Run the benchmarks:
 ```bash
 docker run -it myrepo/ballista-arm64 \
   /tpch benchmark datafusion --query=1 --path=/path/to/data --format=parquet \
-  --concurrency=24 --iterations=1 --debug --host=ballista-scheduler --port=50050
+  --concurrency=24 --iterations=1 --debug --host=ballista-scheduler --bind-port=50050
 ```
 
 Note that it will be necessary to mount appropriate volumes into the containers and also configure networking
diff --git a/docs/user-guide/src/distributed/standalone.md b/docs/user-guide/src/distributed/standalone.md
index e9db425..66b6bc8 100644
--- a/docs/user-guide/src/distributed/standalone.md
+++ b/docs/user-guide/src/distributed/standalone.md
@@ -26,7 +26,7 @@ Start a scheduler using the following syntax:
 ```bash
 docker run --network=host \
   -d ballistacompute/ballista-rust:0.4.2-SNAPSHOT \
-  /scheduler --port 50050
+  /scheduler --bind-port 50050
 ```
 
 Run `docker ps` to check that the process is running:
@@ -34,7 +34,7 @@ Run `docker ps` to check that the process is running:
 ```
 $ docker ps
 CONTAINER ID   IMAGE                                         COMMAND                  CREATED         STATUS         PORTS     NAMES
-59452ce72138   ballistacompute/ballista-rust:0.4.2-SNAPSHOT   "/scheduler --port 5…"   6 seconds ago   Up 5 seconds             affectionate_hofstadter
+59452ce72138   ballistacompute/ballista-rust:0.4.2-SNAPSHOT   "/scheduler --bind-p…"   6 seconds ago   Up 5 seconds             affectionate_hofstadter
 ```
 
 Run `docker logs CONTAINER_ID` to check the output from the process:
@@ -51,7 +51,7 @@ Start one or more executor processes. Each executor process will need to listen
 ```bash
 docker run --network=host \
   -d ballistacompute/ballista-rust:0.4.2-SNAPSHOT \
-  /executor --external-host localhost --port 50051
+  /executor --external-host localhost --bind-port 50051
 ```
 
 Use `docker ps` to check that both the scheduer and executor(s) are now running:
@@ -60,14 +60,14 @@ Use `docker ps` to check that both the scheduer and executor(s) are now running:
 $ docker ps
 CONTAINER ID   IMAGE                                         COMMAND                  CREATED         STATUS         PORTS     NAMES
 0746ce262a19   ballistacompute/ballista-rust:0.4.2-SNAPSHOT   "/executor --externa…"   2 seconds ago   Up 1 second              naughty_mclean
-59452ce72138   ballistacompute/ballista-rust:0.4.2-SNAPSHOT   "/scheduler --port 5…"   4 minutes ago   Up 4 minutes             affectionate_hofstadter
+59452ce72138   ballistacompute/ballista-rust:0.4.2-SNAPSHOT   "/scheduler --bind-p…"   4 minutes ago   Up 4 minutes             affectionate_hofstadter
 ```
 
 Use `docker logs CONTAINER_ID` to check the output from the executor(s):
 
 ```
 $ docker logs 0746ce262a19
-[2021-02-14T18:36:25Z INFO  executor] Running with config: ExecutorConfig { host: "localhost", port: 50051, work_dir: "/tmp/.tmpVRFSvn", concurrent_tasks: 4 }
+[2021-02-14T18:36:25Z INFO  executor] Running with config: ExecutorConfig { host: "localhost", bind_port: 50051, work_dir: "/tmp/.tmpVRFSvn", concurrent_tasks: 4 }
 [2021-02-14T18:36:25Z INFO  executor] Ballista v0.4.2-SNAPSHOT Rust Executor listening on 0.0.0.0:50051
 [2021-02-14T18:36:25Z INFO  executor] Starting registration with scheduler
 ```
@@ -84,7 +84,7 @@ Ballista can optionally use [etcd](https://etcd.io/) as a backing store for the
 ```bash
 docker run --network=host \
   -d ballistacompute/ballista-rust:0.4.2-SNAPSHOT \
-  /scheduler --port 50050 \
+  /scheduler --bind-port 50050 \
   --config-backend etcd \
   --etcd-urls etcd:2379
 ```