You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by bl...@apache.org on 2022/11/17 18:09:48 UTC
[iceberg-docs] branch main updated: Update Spark+Iceberg quickstart docker-compose example (#178)
This is an automated email from the ASF dual-hosted git repository.
blue pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-docs.git
The following commit(s) were added to refs/heads/main by this push:
new 3b0d73ab Update Spark+Iceberg quickstart docker-compose example (#178)
3b0d73ab is described below
commit 3b0d73abe2be8c2fd57cc298ab13828c38bd59ba
Author: Eduard Tudenhöfner <et...@gmail.com>
AuthorDate: Thu Nov 17 19:09:42 2022 +0100
Update Spark+Iceberg quickstart docker-compose example (#178)
---
landing-page/content/common/spark-quickstart.md | 67 ++++++++++++++++++-------
1 file changed, 50 insertions(+), 17 deletions(-)
diff --git a/landing-page/content/common/spark-quickstart.md b/landing-page/content/common/spark-quickstart.md
index cef9558a..5fee25ca 100644
--- a/landing-page/content/common/spark-quickstart.md
+++ b/landing-page/content/common/spark-quickstart.md
@@ -53,29 +53,62 @@ version: "3"
services:
spark-iceberg:
image: tabulario/spark-iceberg
- depends_on:
- - postgres
container_name: spark-iceberg
- environment:
- - SPARK_HOME=/opt/spark
- - PYSPARK_PYTON=/usr/bin/python3.9
- - PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/spark/bin
+ build: spark/
+ depends_on:
+ - rest
+ - minio
volumes:
- ./warehouse:/home/iceberg/warehouse
- ./notebooks:/home/iceberg/notebooks/notebooks
+ environment:
+ - AWS_ACCESS_KEY_ID=admin
+ - AWS_SECRET_ACCESS_KEY=password
+ - AWS_REGION=us-east-1
ports:
- 8888:8888
- 8080:8080
- - 18080:18080
- postgres:
- image: postgres:13.4-bullseye
- container_name: postgres
+ links:
+ - rest:rest
+ - minio:minio
+ rest:
+ image: tabulario/iceberg-rest:0.1.0
+ ports:
+ - 8181:8181
environment:
- - POSTGRES_USER=admin
- - POSTGRES_PASSWORD=password
- - POSTGRES_DB=demo_catalog
- volumes:
- - ./postgres/data:/var/lib/postgresql/data
+ - AWS_ACCESS_KEY_ID=admin
+ - AWS_SECRET_ACCESS_KEY=password
+ - AWS_REGION=us-east-1
+ - CATALOG_WAREHOUSE=s3a://warehouse/wh/
+ - CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO
+ - CATALOG_S3_ENDPOINT=http://minio:9000
+ minio:
+ image: minio/minio
+ container_name: minio
+ environment:
+ - MINIO_ROOT_USER=admin
+ - MINIO_ROOT_PASSWORD=password
+ ports:
+ - 9001:9001
+ - 9000:9000
+ command: ["server", "/data", "--console-address", ":9001"]
+ mc:
+ depends_on:
+ - minio
+ image: minio/mc
+ container_name: mc
+ environment:
+ - AWS_ACCESS_KEY_ID=admin
+ - AWS_SECRET_ACCESS_KEY=password
+ - AWS_REGION=us-east-1
+ entrypoint: >
+ /bin/sh -c "
+ until (/usr/bin/mc config host add minio http://minio:9000 admin password) do echo '...waiting...' && sleep 1; done;
+ /usr/bin/mc rm -r --force minio/warehouse;
+ /usr/bin/mc mb minio/warehouse;
+ /usr/bin/mc policy set public minio/warehouse;
+ exit 0;
+ "
```
Next, start up the docker containers with this command:
@@ -155,8 +188,8 @@ schema = StructType([
StructField("vendor_id", LongType(), True),
StructField("trip_id", LongType(), True),
StructField("trip_distance", FloatType(), True),
- StructField("fare_amount', DoubleType(), True),
- StructField("store_and_fwd_flag', StringType(), True)
+ StructField("fare_amount", DoubleType(), True),
+ StructField("store_and_fwd_flag", StringType(), True)
])
df = spark.createDataFrame([], schema)