You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by ka...@apache.org on 2021/04/12 17:00:05 UTC
[airflow] branch master updated: Chart: Allow disabling `git-sync`
for Webserver (#15314)
This is an automated email from the ASF dual-hosted git repository.
kaxilnaik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/master by this push:
new 30c6300 Chart: Allow disabling `git-sync` for Webserver (#15314)
30c6300 is described below
commit 30c6300c6b28554786245ddcd0da969be44979f7
Author: Kaxil Naik <ka...@gmail.com>
AuthorDate: Mon Apr 12 17:59:49 2021 +0100
Chart: Allow disabling `git-sync` for Webserver (#15314)
closes https://github.com/apache/airflow/issues/11704
---
airflow/config_templates/config.yml | 3 +-
airflow/config_templates/default_airflow.cfg | 3 +-
.../templates/webserver/webserver-deployment.yaml | 6 ++--
chart/tests/test_git_sync_webserver.py | 36 ++++++++++++++++++++++
chart/values.schema.json | 4 +++
chart/values.yaml | 7 +++++
docs/apache-airflow/dag-serialization.rst | 2 +-
docs/helm-chart/manage-dags-files.rst | 26 ++++++++++++++--
8 files changed, 79 insertions(+), 8 deletions(-)
diff --git a/airflow/config_templates/config.yml b/airflow/config_templates/config.yml
index c9fb21f..1e4d4b1 100644
--- a/airflow/config_templates/config.yml
+++ b/airflow/config_templates/config.yml
@@ -358,9 +358,10 @@
Whether to persist DAG files code in DB.
If set to True, Webserver reads file contents from DB instead of
trying to access files in a DAG folder.
+ (Default is ``True``)
version_added: 1.10.10
type: string
- example: "False"
+ example: "True"
default: ~
- name: max_num_rendered_ti_fields_per_task
description: |
diff --git a/airflow/config_templates/default_airflow.cfg b/airflow/config_templates/default_airflow.cfg
index 5c46ffa..c880f3e 100644
--- a/airflow/config_templates/default_airflow.cfg
+++ b/airflow/config_templates/default_airflow.cfg
@@ -205,7 +205,8 @@ min_serialized_dag_fetch_interval = 10
# Whether to persist DAG files code in DB.
# If set to True, Webserver reads file contents from DB instead of
# trying to access files in a DAG folder.
-# Example: store_dag_code = False
+# (Default is ``True``)
+# Example: store_dag_code = True
# store_dag_code =
# Maximum number of Rendered Task Instance Fields (Template Fields) per task to store
diff --git a/chart/templates/webserver/webserver-deployment.yaml b/chart/templates/webserver/webserver-deployment.yaml
index 8591607..1c9da15 100644
--- a/chart/templates/webserver/webserver-deployment.yaml
+++ b/chart/templates/webserver/webserver-deployment.yaml
@@ -92,7 +92,7 @@ spec:
{{- include "custom_airflow_environment" . | indent 10 }}
{{- include "standard_airflow_environment" . | indent 10 }}
containers:
-{{- if and (.Values.dags.gitSync.enabled) (not .Values.dags.persistence.enabled) }}
+{{- if and (.Values.dags.gitSync.enabled) (not .Values.dags.persistence.enabled) (not .Values.dags.gitSync.excludeWebserver) }}
{{- include "git_sync_container" . | indent 8 }}
{{- end }}
- name: webserver
@@ -118,7 +118,7 @@ spec:
subPath: airflow_local_settings.py
readOnly: true
{{- end }}
-{{- if or .Values.dags.gitSync.enabled .Values.dags.persistence.enabled }}
+{{- if or (and .Values.dags.gitSync.enabled (not .Values.dags.gitSync.excludeWebserver)) .Values.dags.persistence.enabled }}
- name: dags
mountPath: {{ template "airflow_dags_mount_path" . }}
{{- end }}
@@ -175,7 +175,7 @@ spec:
- name: dags
persistentVolumeClaim:
claimName: {{ template "airflow_dags_volume_claim" . }}
- {{- else if .Values.dags.gitSync.enabled }}
+ {{- else if and (.Values.dags.gitSync.enabled) (not .Values.dags.gitSync.excludeWebserver) }}
- name: dags
emptyDir: {}
{{- if .Values.dags.gitSync.sshKeySecret }}
diff --git a/chart/tests/test_git_sync_webserver.py b/chart/tests/test_git_sync_webserver.py
index a232287..d599be4 100644
--- a/chart/tests/test_git_sync_webserver.py
+++ b/chart/tests/test_git_sync_webserver.py
@@ -18,6 +18,7 @@
import unittest
import jmespath
+from parameterized import parameterized
from tests.helm_template_generator import render_chart
@@ -59,3 +60,38 @@ class GitSyncWebserverTest(unittest.TestCase):
)
assert "RELEASE-NAME-webserver" == jmespath.search("spec.template.spec.serviceAccountName", docs[0])
+
+ @parameterized.expand([(True,), (False,)])
+ def test_git_sync_with_exclude_webserver(self, exclude_webserver):
+ """
+ If that dags.gitSync.excludeWebserver=True - git sync related containers, volume mounts & volumes
+ are not created.
+ """
+ docs = render_chart(
+ values={
+ "dags": {
+ "gitSync": {"enabled": True, "excludeWebserver": exclude_webserver},
+ "persistence": {"enabled": False},
+ }
+ },
+ show_only=["templates/webserver/webserver-deployment.yaml"],
+ )
+
+ containers_names = [
+ container["name"] for container in jmespath.search("spec.template.spec.containers", docs[0])
+ ]
+
+ volume_mount_names = [
+ vm["name"] for vm in jmespath.search("spec.template.spec.containers[0].volumeMounts", docs[0])
+ ]
+
+ volume_names = [volume["name"] for volume in jmespath.search("spec.template.spec.volumes", docs[0])]
+
+ if exclude_webserver:
+ assert "git-sync" not in containers_names
+ assert "dags" not in volume_mount_names
+ assert "dags" not in volume_names
+ else:
+ assert "git-sync" in containers_names
+ assert "dags" in volume_mount_names
+ assert "dags" in volume_names
diff --git a/chart/values.schema.json b/chart/values.schema.json
index afa931f..c09bef2 100644
--- a/chart/values.schema.json
+++ b/chart/values.schema.json
@@ -1426,6 +1426,10 @@
"description": "Enable Git sync.",
"type": "boolean"
},
+ "excludeWebserver": {
+ "description": "Disable Git sync on webserver as it is not needed when DAG Serialization is enabled.",
+ "type": "boolean"
+ },
"repo": {
"description": "Git repository.",
"type": "string"
diff --git a/chart/values.yaml b/chart/values.yaml
index 715ff05..a38b234 100644
--- a/chart/values.yaml
+++ b/chart/values.yaml
@@ -816,6 +816,13 @@ dags:
existingClaim:
gitSync:
enabled: false
+
+ # Change it to true when DAG Serialization is turned on. This will exclude git sync containers
+ # from Webserver as DAGs are fetched from the DB. DAG Serialization was introduced in
+ # 1.10.7 and is optional for <2.0.0.
+ # https://airflow.apache.org/docs/apache-airflow/1.10.15/dag-serialization.html
+ excludeWebserver: false
+
# git repo clone url
# ssh examples ssh://git@github.com/apache/airflow.git
# git@github.com:apache/airflow.git
diff --git a/docs/apache-airflow/dag-serialization.rst b/docs/apache-airflow/dag-serialization.rst
index 2008ece..72b8f37 100644
--- a/docs/apache-airflow/dag-serialization.rst
+++ b/docs/apache-airflow/dag-serialization.rst
@@ -16,7 +16,7 @@
under the License.
-
+.. _dag-serialization:
DAG Serialization
=================
diff --git a/docs/helm-chart/manage-dags-files.rst b/docs/helm-chart/manage-dags-files.rst
index 1f8ec8a..eab6298 100644
--- a/docs/helm-chart/manage-dags-files.rst
+++ b/docs/helm-chart/manage-dags-files.rst
@@ -80,7 +80,12 @@ If you are deploying an image with a constant tag, you need to make sure that th
Mounting DAGs using Git-Sync sidecar with Persistence enabled
-------------------------------------------------------------
-This option will use a Persistent Volume Claim with an access mode of ``ReadWriteMany``. The scheduler pod will sync DAGs from a git repository onto the PVC every configured number of seconds. The other pods will read the synced DAGs. Not all volume plugins have support for ``ReadWriteMany`` access mode. Refer `Persistent Volume Access Modes <https://kubernetes.io/docs/concepts/storage/persistent-volumes/#access-modes>`__ for details
+This option will use a Persistent Volume Claim with an access mode of ``ReadWriteMany``.
+The scheduler pod will sync DAGs from a git repository onto the PVC every configured number of
+seconds. The other pods will read the synced DAGs. Not all volume plugins have support for
+``ReadWriteMany`` access mode.
+Refer `Persistent Volume Access Modes <https://kubernetes.io/docs/concepts/storage/persistent-volumes/#access-modes>`__
+for details.
.. code-block:: bash
@@ -91,10 +96,27 @@ This option will use a Persistent Volume Claim with an access mode of ``ReadWrit
# by setting the dags.persistence.* and dags.gitSync.* values
# Please refer to values.yaml for details
+When using ``apache-airflow>=2.0.0``, :ref:`DAG Serialization <apache-airflow:dag-serialization>` is enabled by default,
+hence Webserver does not need access to DAG files, so you can turn off ``git-sync`` for Webserver by setting
+``dags.gitSync.excludeWebserver`` to ``true``.
+This is also recommended when enabling DAG Serialization for ``apache-airflow>=1.10.11,<2``.
+
+.. code-block:: bash
+
+ helm upgrade airflow . \
+ --set dags.persistence.enabled=true \
+ --set dags.gitSync.enabled=true \
+ --set dags.gitSync.excludeWebserver=true
+ # you can also override the other persistence or gitSync values
+ # by setting the dags.persistence.* and dags.gitSync.* values
+ # Please refer to values.yaml for details
+
Mounting DAGs using Git-Sync sidecar without Persistence
--------------------------------------------------------
-This option will use an always running Git-Sync side car on every scheduler, webserver and worker pods. The Git-Sync side car containers will sync DAGs from a git repository every configured number of seconds. If you are using the KubernetesExecutor, Git-sync will run as an init container on your worker pods.
+This option will use an always running Git-Sync sidecar on every scheduler, webserver and worker pods.
+The Git-Sync sidecar containers will sync DAGs from a git repository every configured number of
+seconds. If you are using the ``KubernetesExecutor``, Git-sync will run as an init container on your worker pods.
.. code-block:: bash