You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by GitBox <gi...@apache.org> on 2021/02/23 15:15:29 UTC

[GitHub] [airflow] ldealmei commented on issue #8605: Add Production-ready docker compose for the production image

ldealmei commented on issue #8605:
URL: https://github.com/apache/airflow/issues/8605#issuecomment-784273838


   Thank you all for the `docker-compose` files :)
   I'm sharing mine as it addresses some aspects that I couldn't find in this thread and had me spend some time on it to get it to work. These are:
   - Working with DockerOperator
   - Deploy behind a proxy (Traefik)
   - Deploy dags on push with `git-sync` (This one is optional but is quite convienent).
   
   @mik-laj I also have a working healthcheck on the scheduler. Not the most expressive but works.
   
   This configuration relies on an existing and initialized database.
   
   External database - LocalExecutor - Airflow 2.0.0 - Traefik - Dags mostly  based on DockerOperator.
   ```yml
   version: "3.7"
   x-airflow-environment: &airflow-environment
     AIRFLOW__CORE__EXECUTOR: LocalExecutor
     AIRFLOW__CORE__LOAD_EXAMPLES: "False"
     AIRFLOW__CORE__LOAD_DEFAULT_CONNECTIONS: "False"
     AIRFLOW__CORE__SQL_ALCHEMY_CONN: ${DB_CONNECTION_STRING}
     AIRFLOW__CORE__FERNET_KEY: ${ENCRYPTION_KEY}
     AIRFLOW__CORE__DAGS_FOLDER: /opt/airflow/sync/git/dags
     AIRFLOW__CORE__ENABLE_XCOM_PICKLING: "True"  # because of https://github.com/apache/airflow/issues/13487
     AIRFLOW__WEBSERVER__BASE_URL: https://airflow.example.com
     AIRFLOW__WEBSERVER__ENABLE_PROXY_FIX: "True"
     AIRFLOW__WEBSERVER__RBAC: "True"
   
   services:
     traefik:
       image: traefik:v2.4
       container_name: traefik
       command:
         - --ping=true
         - --providers.docker=true
         - --providers.docker.exposedbydefault=false
         - --entrypoints.web.address=:80
         - --entrypoints.websecure.address=:443
         # HTTP -> HTTPS redirect
         - --entrypoints.web.http.redirections.entrypoint.to=websecure
         - --entrypoints.web.http.redirections.entrypoint.scheme=https
         # TLS config
         - --certificatesresolvers.myresolver.acme.dnschallenge=true
         - --certificatesresolvers.myresolver.acme.storage=/letsencrypt/acme.json
         ## Comment following line for a production deployment
         - --certificatesresolvers.myresolver.acme.caserver=https://acme-staging-v02.api.letsencrypt.org/directory
         ## See https://doc.traefik.io/traefik/https/acme/#providers for other providers
         - --certificatesresolvers.myresolver.acme.dnschallenge.provider=digitalocean
         - --certificatesresolvers.myresolver.acme.email=user@example.com
       ports:
         - 80:80
         - 443:443
       environment:
         # See https://doc.traefik.io/traefik/https/acme/#providers for other providers
         DO_AUTH_TOKEN:
       restart: always
       healthcheck:
         test: ["CMD", "traefik", "healthcheck", "--ping"]
         interval: 10s
         timeout: 10s
         retries: 5
       volumes:
         - certs:/letsencrypt
         - /var/run/docker.sock:/var/run/docker.sock:ro
   
     # Required because of DockerOperator. For secure access and handling permissions.
     docker-socket-proxy:
       image: tecnativa/docker-socket-proxy:0.1.1
       environment:
         CONTAINERS: 1
         IMAGES: 1
         AUTH: 1
         POST: 1
       privileged: true
       volumes:
         - /var/run/docker.sock:/var/run/docker.sock:ro
       restart: always
   
     # Allows to deploy Dags on pushes to master
     git-sync:
       image: k8s.gcr.io/git-sync/git-sync:v3.2.2
       container_name: dags-sync
       environment:
         GIT_SYNC_USERNAME:
         GIT_SYNC_PASSWORD:
         GIT_SYNC_REPO: https://example.com/my/repo.git
         GIT_SYNC_DEST: dags
         GIT_SYNC_BRANCH: master
         GIT_SYNC_WAIT: 60
       volumes:
         - dags:/tmp:rw
       restart: always
   
     webserver:
       image: apache/airflow:2.0.0
       container_name: airflow_webserver
       environment:
         <<: *airflow-environment
       command: webserver
       healthcheck:
         test: ["CMD", "curl", "--fail", "http://localhost:8080/health"]
         interval: 10s
         timeout: 10s
         retries: 5
       restart: always
       volumes:
         - dags:/opt/airflow/sync
         - logs:/opt/airflow/logs
       depends_on:
         - git-sync
         - traefik
       labels:
         - traefik.enable=true
         - traefik.http.routers.webserver.rule=Host(`airflow.example.com`)
         - traefik.http.routers.webserver.entrypoints=websecure
         - traefik.http.routers.webserver.tls.certresolver=myresolver
         - traefik.http.services.webserver.loadbalancer.server.port=8080
   
     scheduler:
       image: apache/airflow:2.0.0
       container_name: airflow_scheduler
       environment:
         <<: *airflow-environment
       command: scheduler
       restart: always
       healthcheck:
         test: ["CMD-SHELL", 'curl --silent http://airflow_webserver:8080/health | grep -A 1 scheduler | grep \"healthy\"']
         interval: 10s
         timeout: 10s
         retries: 5
       volumes:
         - dags:/opt/airflow/sync
         - logs:/opt/airflow/logs
       depends_on:
         - git-sync
         - webserver
   
   volumes:
     dags:
     logs:
     certs:
   
   ```
   I have an extra container (not shown) to handle rotating logs that are output directly to files. It is based on logrotate. Not sharing it here because it is a custom image and is beyond the scope of the thread. But if anybody interested, message me.
   
   Hope it helps!


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org