You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ag...@apache.org on 2021/06/02 14:13:20 UTC
[arrow-datafusion] branch master updated: Update k8s user guide to
use deployments (#474)
This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 01b57f7 Update k8s user guide to use deployments (#474)
01b57f7 is described below
commit 01b57f70241e158d471a1396c0b6461eccbd6e82
Author: Ximo Guanter <xi...@gmail.com>
AuthorDate: Wed Jun 2 16:13:13 2021 +0200
Update k8s user guide to use deployments (#474)
---
ballista/rust/executor/executor_config_spec.toml | 2 +-
ballista/rust/executor/src/main.rs | 2 +-
ballista/rust/scheduler/scheduler_config_spec.toml | 2 +-
ballista/rust/scheduler/src/main.rs | 2 +-
benchmarks/README.md | 6 +-
benchmarks/docker-compose.yaml | 4 +-
docs/user-guide/src/distributed/docker-compose.md | 2 +-
docs/user-guide/src/distributed/kubernetes.md | 70 ++++++++++------------
docs/user-guide/src/distributed/raspberrypi.md | 2 +-
docs/user-guide/src/distributed/standalone.md | 12 ++--
10 files changed, 50 insertions(+), 54 deletions(-)
diff --git a/ballista/rust/executor/executor_config_spec.toml b/ballista/rust/executor/executor_config_spec.toml
index 8d817fe..3cb168e 100644
--- a/ballista/rust/executor/executor_config_spec.toml
+++ b/ballista/rust/executor/executor_config_spec.toml
@@ -53,7 +53,7 @@ doc = "Host name or IP address to register with scheduler so that other executor
[[param]]
abbr = "p"
-name = "port"
+name = "bind_port"
type = "u16"
default = "50051"
doc = "bind port"
diff --git a/ballista/rust/executor/src/main.rs b/ballista/rust/executor/src/main.rs
index aad53d7..4c63ba8 100644
--- a/ballista/rust/executor/src/main.rs
+++ b/ballista/rust/executor/src/main.rs
@@ -75,7 +75,7 @@ async fn main() -> Result<()> {
let external_host = opt.external_host;
let bind_host = opt.bind_host;
- let port = opt.port;
+ let port = opt.bind_port;
let addr = format!("{}:{}", bind_host, port);
let addr = addr
diff --git a/ballista/rust/scheduler/scheduler_config_spec.toml b/ballista/rust/scheduler/scheduler_config_spec.toml
index 560e9a2..81e77d3 100644
--- a/ballista/rust/scheduler/scheduler_config_spec.toml
+++ b/ballista/rust/scheduler/scheduler_config_spec.toml
@@ -54,7 +54,7 @@ doc = "Local host name or IP address to bind to. Default: 0.0.0.0"
[[param]]
abbr = "p"
-name = "port"
+name = "bind_port"
type = "u16"
default = "50050"
doc = "bind port. Default: 50050"
\ No newline at end of file
diff --git a/ballista/rust/scheduler/src/main.rs b/ballista/rust/scheduler/src/main.rs
index 713103f..34386ca 100644
--- a/ballista/rust/scheduler/src/main.rs
+++ b/ballista/rust/scheduler/src/main.rs
@@ -116,7 +116,7 @@ async fn main() -> Result<()> {
let namespace = opt.namespace;
let bind_host = opt.bind_host;
- let port = opt.port;
+ let port = opt.bind_port;
let addr = format!("{}:{}", bind_host, port);
let addr = addr.parse()?;
diff --git a/benchmarks/README.md b/benchmarks/README.md
index e347130..0b5ccfc 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -122,7 +122,7 @@ RUST_LOG=info RUSTFLAGS='-C target-cpu=native -C lto -C codegen-units=1 -C embed
To run the benchmarks:
```bash
-cd $ARROW_HOME/ballista/rust/benchmarks/tpch
+cd $ARROW_HOME/benchmarks
cargo run --release benchmark ballista --host localhost --port 50050 --query 1 --path $(pwd)/data --format tbl
```
@@ -131,9 +131,9 @@ cargo run --release benchmark ballista --host localhost --port 50050 --query 1 -
To start a Rust scheduler and executor using Docker Compose:
```bash
-cd $BALLISTA_HOME
+cd $ARROW_HOME
./dev/build-rust.sh
-cd $BALLISTA_HOME/rust/benchmarks/tpch
+cd $ARROW_HOME/benchmarks
docker-compose up
```
diff --git a/benchmarks/docker-compose.yaml b/benchmarks/docker-compose.yaml
index c13e9eb..74c6703 100644
--- a/benchmarks/docker-compose.yaml
+++ b/benchmarks/docker-compose.yaml
@@ -21,7 +21,7 @@ services:
command: "etcd -advertise-client-urls http://etcd:2379 -listen-client-urls http://0.0.0.0:2379"
ballista-scheduler:
image: ballista:0.5.0-SNAPSHOT
- command: "/scheduler --config-backend etcd --etcd-urls etcd:2379 --bind-host 0.0.0.0 --port 50050"
+ command: "/scheduler --config-backend etcd --etcd-urls etcd:2379 --bind-host 0.0.0.0 --bind-port 50050"
environment:
- RUST_LOG=ballista=debug
volumes:
@@ -30,7 +30,7 @@ services:
- etcd
ballista-executor:
image: ballista:0.5.0-SNAPSHOT
- command: "/executor --bind-host 0.0.0.0 --port 50051 --scheduler-host ballista-scheduler"
+ command: "/executor --bind-host 0.0.0.0 --bind-port 50051 --scheduler-host ballista-scheduler"
scale: 2
environment:
- RUST_LOG=info
diff --git a/docs/user-guide/src/distributed/docker-compose.md b/docs/user-guide/src/distributed/docker-compose.md
index de27364..5ea86b5 100644
--- a/docs/user-guide/src/distributed/docker-compose.md
+++ b/docs/user-guide/src/distributed/docker-compose.md
@@ -33,7 +33,7 @@ services:
- "2379:2379"
ballista-executor:
image: ballistacompute/ballista-rust:0.4.2-SNAPSHOT
- command: "/executor --bind-host 0.0.0.0 --port 50051 --local"
+ command: "/executor --bind-host 0.0.0.0 --bind-port 50051 --local"
environment:
- RUST_LOG=info
ports:
diff --git a/docs/user-guide/src/distributed/kubernetes.md b/docs/user-guide/src/distributed/kubernetes.md
index 7b9b356..07b51f7 100644
--- a/docs/user-guide/src/distributed/kubernetes.md
+++ b/docs/user-guide/src/distributed/kubernetes.md
@@ -24,8 +24,8 @@ you are already comfortable with managing Kubernetes deployments.
The k8s deployment consists of:
-- k8s stateful set for one or more scheduler processes
-- k8s stateful set for one or more executor processes
+- k8s deployment for one or more scheduler processes
+- k8s deployment for one or more executor processes
- k8s service to route traffic to the schedulers
- k8s persistent volume and persistent volume claims to make local data accessible to Ballista
@@ -38,6 +38,14 @@ Ballista is at an early stage of development and therefore has some significant
- Only a single scheduler instance is currently supported unless the scheduler is configured to use `etcd` as a
backing store.
+## Publishing your images
+
+Currently there are no official Ballista images that work with the instructions in this guide. For the time being,
+you will need to build and publish your own images. You can do that by invoking the `dev/build-ballista-docker.sh`.
+
+Once the images have been built, you can retag them with `docker tag ballista:0.5.0-SNAPSHOT <new-image-name>` so you
+can push them to your favourite docker registry.
+
## Create Persistent Volume and Persistent Volume Claim
Copy the following yaml to a `pv.yaml` file and apply to the cluster to create a persistent volume and a persistent
@@ -88,7 +96,7 @@ persistentvolumeclaim/data-pv-claim created
## Deploying Ballista Scheduler and Executors
-Copy the following yaml to a `cluster.yaml` file.
+Copy the following yaml to a `cluster.yaml` file and change `<your-image>` with the name of your Ballista Docker image.
```yaml
apiVersion: v1
@@ -101,16 +109,14 @@ spec:
ports:
- port: 50050
name: scheduler
- clusterIP: None
selector:
app: ballista-scheduler
---
apiVersion: apps/v1
-kind: StatefulSet
+kind: Deployment
metadata:
name: ballista-scheduler
spec:
- serviceName: "ballista-scheduler"
replicas: 1
selector:
matchLabels:
@@ -122,27 +128,26 @@ spec:
ballista-cluster: ballista
spec:
containers:
- - name: ballista-scheduler
- image: ballistacompute/ballista-rust:0.4.2-SNAPSHOT
- command: ["/scheduler"]
- args: ["--port=50050"]
- ports:
- - containerPort: 50050
- name: flight
- volumeMounts:
- - mountPath: /mnt
- name: data
+ - name: ballista-scheduler
+ image: <your-image>
+ command: ["/scheduler"]
+ args: ["--bind-port=50050"]
+ ports:
+ - containerPort: 50050
+ name: flight
+ volumeMounts:
+ - mountPath: /mnt
+ name: data
volumes:
- name: data
persistentVolumeClaim:
claimName: data-pv-claim
---
apiVersion: apps/v1
-kind: StatefulSet
+kind: Deployment
metadata:
name: ballista-executor
spec:
- serviceName: "ballista-scheduler"
replicas: 2
selector:
matchLabels:
@@ -155,20 +160,12 @@ spec:
spec:
containers:
- name: ballista-executor
- image: ballistacompute/ballista-rust:0.4.2-SNAPSHOT
+ image: <your-image>
command: ["/executor"]
args:
- [
- "--port=50051",
- "--scheduler-host=ballista-scheduler",
- "--scheduler-port=50050",
- "--external-host=$(MY_POD_IP)",
- ]
- env:
- - name: MY_POD_IP
- valueFrom:
- fieldRef:
- fieldPath: status.podIP
+ - "--bind-port=50051",
+ - "--scheduler-host=ballista-scheduler",
+ - "--scheduler-port=50050"
ports:
- containerPort: 50051
name: flight
@@ -189,19 +186,18 @@ This should show the following output:
```
service/ballista-scheduler created
-statefulset.apps/ballista-scheduler created
-statefulset.apps/ballista-executor created
+deployment.apps/ballista-scheduler created
+deployment.apps/ballista-executor created
```
You can also check status by running `kubectl get pods`:
```bash
$ kubectl get pods
-NAME READY STATUS RESTARTS AGE
-busybox 1/1 Running 0 16m
-ballista-scheduler-0 1/1 Running 0 42s
-ballista-executor-0 1/1 Running 2 42s
-ballista-executor-1 1/1 Running 0 26s
+NAME READY STATUS RESTARTS AGE
+ballista-executor-78cc5b6486-4rkn4 0/1 Pending 0 42s
+ballista-executor-78cc5b6486-7crdm 0/1 Pending 0 42s
+ballista-scheduler-879f874c5-rnbd6 0/1 Pending 0 42s
```
You can view the scheduler logs with `kubectl logs ballista-scheduler-0`:
diff --git a/docs/user-guide/src/distributed/raspberrypi.md b/docs/user-guide/src/distributed/raspberrypi.md
index 0083d19..3bf36c7 100644
--- a/docs/user-guide/src/distributed/raspberrypi.md
+++ b/docs/user-guide/src/distributed/raspberrypi.md
@@ -116,7 +116,7 @@ Run the benchmarks:
```bash
docker run -it myrepo/ballista-arm64 \
/tpch benchmark datafusion --query=1 --path=/path/to/data --format=parquet \
- --concurrency=24 --iterations=1 --debug --host=ballista-scheduler --port=50050
+ --concurrency=24 --iterations=1 --debug --host=ballista-scheduler --bind-port=50050
```
Note that it will be necessary to mount appropriate volumes into the containers and also configure networking
diff --git a/docs/user-guide/src/distributed/standalone.md b/docs/user-guide/src/distributed/standalone.md
index e9db425..66b6bc8 100644
--- a/docs/user-guide/src/distributed/standalone.md
+++ b/docs/user-guide/src/distributed/standalone.md
@@ -26,7 +26,7 @@ Start a scheduler using the following syntax:
```bash
docker run --network=host \
-d ballistacompute/ballista-rust:0.4.2-SNAPSHOT \
- /scheduler --port 50050
+ /scheduler --bind-port 50050
```
Run `docker ps` to check that the process is running:
@@ -34,7 +34,7 @@ Run `docker ps` to check that the process is running:
```
$ docker ps
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
-59452ce72138 ballistacompute/ballista-rust:0.4.2-SNAPSHOT "/scheduler --port 5…" 6 seconds ago Up 5 seconds affectionate_hofstadter
+59452ce72138 ballistacompute/ballista-rust:0.4.2-SNAPSHOT "/scheduler --bind-p…" 6 seconds ago Up 5 seconds affectionate_hofstadter
```
Run `docker logs CONTAINER_ID` to check the output from the process:
@@ -51,7 +51,7 @@ Start one or more executor processes. Each executor process will need to listen
```bash
docker run --network=host \
-d ballistacompute/ballista-rust:0.4.2-SNAPSHOT \
- /executor --external-host localhost --port 50051
+ /executor --external-host localhost --bind-port 50051
```
Use `docker ps` to check that both the scheduer and executor(s) are now running:
@@ -60,14 +60,14 @@ Use `docker ps` to check that both the scheduer and executor(s) are now running:
$ docker ps
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
0746ce262a19 ballistacompute/ballista-rust:0.4.2-SNAPSHOT "/executor --externa…" 2 seconds ago Up 1 second naughty_mclean
-59452ce72138 ballistacompute/ballista-rust:0.4.2-SNAPSHOT "/scheduler --port 5…" 4 minutes ago Up 4 minutes affectionate_hofstadter
+59452ce72138 ballistacompute/ballista-rust:0.4.2-SNAPSHOT "/scheduler --bind-p…" 4 minutes ago Up 4 minutes affectionate_hofstadter
```
Use `docker logs CONTAINER_ID` to check the output from the executor(s):
```
$ docker logs 0746ce262a19
-[2021-02-14T18:36:25Z INFO executor] Running with config: ExecutorConfig { host: "localhost", port: 50051, work_dir: "/tmp/.tmpVRFSvn", concurrent_tasks: 4 }
+[2021-02-14T18:36:25Z INFO executor] Running with config: ExecutorConfig { host: "localhost", bind_port: 50051, work_dir: "/tmp/.tmpVRFSvn", concurrent_tasks: 4 }
[2021-02-14T18:36:25Z INFO executor] Ballista v0.4.2-SNAPSHOT Rust Executor listening on 0.0.0.0:50051
[2021-02-14T18:36:25Z INFO executor] Starting registration with scheduler
```
@@ -84,7 +84,7 @@ Ballista can optionally use [etcd](https://etcd.io/) as a backing store for the
```bash
docker run --network=host \
-d ballistacompute/ballista-rust:0.4.2-SNAPSHOT \
- /scheduler --port 50050 \
+ /scheduler --bind-port 50050 \
--config-backend etcd \
--etcd-urls etcd:2379
```