You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by ka...@apache.org on 2021/04/12 17:00:05 UTC

[airflow] branch master updated: Chart: Allow disabling `git-sync` for Webserver (#15314)

This is an automated email from the ASF dual-hosted git repository.

kaxilnaik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/master by this push:
     new 30c6300  Chart: Allow disabling `git-sync` for Webserver (#15314)
30c6300 is described below

commit 30c6300c6b28554786245ddcd0da969be44979f7
Author: Kaxil Naik <ka...@gmail.com>
AuthorDate: Mon Apr 12 17:59:49 2021 +0100

    Chart: Allow disabling `git-sync` for Webserver (#15314)
    
    closes https://github.com/apache/airflow/issues/11704
---
 airflow/config_templates/config.yml                |  3 +-
 airflow/config_templates/default_airflow.cfg       |  3 +-
 .../templates/webserver/webserver-deployment.yaml  |  6 ++--
 chart/tests/test_git_sync_webserver.py             | 36 ++++++++++++++++++++++
 chart/values.schema.json                           |  4 +++
 chart/values.yaml                                  |  7 +++++
 docs/apache-airflow/dag-serialization.rst          |  2 +-
 docs/helm-chart/manage-dags-files.rst              | 26 ++++++++++++++--
 8 files changed, 79 insertions(+), 8 deletions(-)

diff --git a/airflow/config_templates/config.yml b/airflow/config_templates/config.yml
index c9fb21f..1e4d4b1 100644
--- a/airflow/config_templates/config.yml
+++ b/airflow/config_templates/config.yml
@@ -358,9 +358,10 @@
         Whether to persist DAG files code in DB.
         If set to True, Webserver reads file contents from DB instead of
         trying to access files in a DAG folder.
+        (Default is ``True``)
       version_added: 1.10.10
       type: string
-      example: "False"
+      example: "True"
       default: ~
     - name: max_num_rendered_ti_fields_per_task
       description: |
diff --git a/airflow/config_templates/default_airflow.cfg b/airflow/config_templates/default_airflow.cfg
index 5c46ffa..c880f3e 100644
--- a/airflow/config_templates/default_airflow.cfg
+++ b/airflow/config_templates/default_airflow.cfg
@@ -205,7 +205,8 @@ min_serialized_dag_fetch_interval = 10
 # Whether to persist DAG files code in DB.
 # If set to True, Webserver reads file contents from DB instead of
 # trying to access files in a DAG folder.
-# Example: store_dag_code = False
+# (Default is ``True``)
+# Example: store_dag_code = True
 # store_dag_code =
 
 # Maximum number of Rendered Task Instance Fields (Template Fields) per task to store
diff --git a/chart/templates/webserver/webserver-deployment.yaml b/chart/templates/webserver/webserver-deployment.yaml
index 8591607..1c9da15 100644
--- a/chart/templates/webserver/webserver-deployment.yaml
+++ b/chart/templates/webserver/webserver-deployment.yaml
@@ -92,7 +92,7 @@ spec:
           {{- include "custom_airflow_environment" . | indent 10 }}
           {{- include "standard_airflow_environment" . | indent 10 }}
       containers:
-{{- if and (.Values.dags.gitSync.enabled) (not .Values.dags.persistence.enabled) }}
+{{- if and (.Values.dags.gitSync.enabled) (not .Values.dags.persistence.enabled) (not .Values.dags.gitSync.excludeWebserver) }}
 {{- include "git_sync_container" . | indent 8 }}
 {{- end }}
         - name: webserver
@@ -118,7 +118,7 @@ spec:
               subPath: airflow_local_settings.py
               readOnly: true
 {{- end }}
-{{- if or .Values.dags.gitSync.enabled .Values.dags.persistence.enabled }}
+{{- if or (and .Values.dags.gitSync.enabled (not .Values.dags.gitSync.excludeWebserver)) .Values.dags.persistence.enabled }}
             - name: dags
               mountPath: {{ template "airflow_dags_mount_path" . }}
 {{- end }}
@@ -175,7 +175,7 @@ spec:
         - name: dags
           persistentVolumeClaim:
             claimName: {{ template "airflow_dags_volume_claim" . }}
-        {{- else if .Values.dags.gitSync.enabled }}
+        {{- else if and (.Values.dags.gitSync.enabled) (not .Values.dags.gitSync.excludeWebserver) }}
         - name: dags
           emptyDir: {}
         {{- if  .Values.dags.gitSync.sshKeySecret }}
diff --git a/chart/tests/test_git_sync_webserver.py b/chart/tests/test_git_sync_webserver.py
index a232287..d599be4 100644
--- a/chart/tests/test_git_sync_webserver.py
+++ b/chart/tests/test_git_sync_webserver.py
@@ -18,6 +18,7 @@
 import unittest
 
 import jmespath
+from parameterized import parameterized
 
 from tests.helm_template_generator import render_chart
 
@@ -59,3 +60,38 @@ class GitSyncWebserverTest(unittest.TestCase):
         )
 
         assert "RELEASE-NAME-webserver" == jmespath.search("spec.template.spec.serviceAccountName", docs[0])
+
+    @parameterized.expand([(True,), (False,)])
+    def test_git_sync_with_exclude_webserver(self, exclude_webserver):
+        """
+        If that dags.gitSync.excludeWebserver=True - git sync related containers, volume mounts & volumes
+        are not created.
+        """
+        docs = render_chart(
+            values={
+                "dags": {
+                    "gitSync": {"enabled": True, "excludeWebserver": exclude_webserver},
+                    "persistence": {"enabled": False},
+                }
+            },
+            show_only=["templates/webserver/webserver-deployment.yaml"],
+        )
+
+        containers_names = [
+            container["name"] for container in jmespath.search("spec.template.spec.containers", docs[0])
+        ]
+
+        volume_mount_names = [
+            vm["name"] for vm in jmespath.search("spec.template.spec.containers[0].volumeMounts", docs[0])
+        ]
+
+        volume_names = [volume["name"] for volume in jmespath.search("spec.template.spec.volumes", docs[0])]
+
+        if exclude_webserver:
+            assert "git-sync" not in containers_names
+            assert "dags" not in volume_mount_names
+            assert "dags" not in volume_names
+        else:
+            assert "git-sync" in containers_names
+            assert "dags" in volume_mount_names
+            assert "dags" in volume_names
diff --git a/chart/values.schema.json b/chart/values.schema.json
index afa931f..c09bef2 100644
--- a/chart/values.schema.json
+++ b/chart/values.schema.json
@@ -1426,6 +1426,10 @@
                             "description": "Enable Git sync.",
                             "type": "boolean"
                         },
+                        "excludeWebserver": {
+                            "description": "Disable Git sync on webserver as it is not needed when DAG Serialization is enabled.",
+                            "type": "boolean"
+                        },
                         "repo": {
                             "description": "Git repository.",
                             "type": "string"
diff --git a/chart/values.yaml b/chart/values.yaml
index 715ff05..a38b234 100644
--- a/chart/values.yaml
+++ b/chart/values.yaml
@@ -816,6 +816,13 @@ dags:
     existingClaim:
   gitSync:
     enabled: false
+
+    # Change it to true when DAG Serialization is turned on. This will exclude git sync containers
+    # from Webserver as DAGs are fetched from the DB. DAG Serialization was introduced in
+    # 1.10.7 and is optional for <2.0.0.
+    # https://airflow.apache.org/docs/apache-airflow/1.10.15/dag-serialization.html
+    excludeWebserver: false
+
     # git repo clone url
     # ssh examples ssh://git@github.com/apache/airflow.git
     # git@github.com:apache/airflow.git
diff --git a/docs/apache-airflow/dag-serialization.rst b/docs/apache-airflow/dag-serialization.rst
index 2008ece..72b8f37 100644
--- a/docs/apache-airflow/dag-serialization.rst
+++ b/docs/apache-airflow/dag-serialization.rst
@@ -16,7 +16,7 @@
     under the License.
 
 
-
+.. _dag-serialization:
 
 DAG Serialization
 =================
diff --git a/docs/helm-chart/manage-dags-files.rst b/docs/helm-chart/manage-dags-files.rst
index 1f8ec8a..eab6298 100644
--- a/docs/helm-chart/manage-dags-files.rst
+++ b/docs/helm-chart/manage-dags-files.rst
@@ -80,7 +80,12 @@ If you are deploying an image with a constant tag, you need to make sure that th
 Mounting DAGs using Git-Sync sidecar with Persistence enabled
 -------------------------------------------------------------
 
-This option will use a Persistent Volume Claim with an access mode of ``ReadWriteMany``. The scheduler pod will sync DAGs from a git repository onto the PVC every configured number of seconds. The other pods will read the synced DAGs. Not all volume  plugins have support for ``ReadWriteMany`` access mode. Refer `Persistent Volume Access Modes <https://kubernetes.io/docs/concepts/storage/persistent-volumes/#access-modes>`__ for details
+This option will use a Persistent Volume Claim with an access mode of ``ReadWriteMany``.
+The scheduler pod will sync DAGs from a git repository onto the PVC every configured number of
+seconds. The other pods will read the synced DAGs. Not all volume  plugins have support for
+``ReadWriteMany`` access mode.
+Refer `Persistent Volume Access Modes <https://kubernetes.io/docs/concepts/storage/persistent-volumes/#access-modes>`__
+for details.
 
 .. code-block:: bash
 
@@ -91,10 +96,27 @@ This option will use a Persistent Volume Claim with an access mode of ``ReadWrit
       # by setting the  dags.persistence.* and dags.gitSync.* values
       # Please refer to values.yaml for details
 
+When using ``apache-airflow>=2.0.0``, :ref:`DAG Serialization <apache-airflow:dag-serialization>` is enabled by default,
+hence Webserver does not need access to DAG files, so you can turn off ``git-sync`` for Webserver by setting
+``dags.gitSync.excludeWebserver`` to ``true``.
+This is also recommended when enabling DAG Serialization for ``apache-airflow>=1.10.11,<2``.
+
+.. code-block:: bash
+
+    helm upgrade airflow . \
+      --set dags.persistence.enabled=true \
+      --set dags.gitSync.enabled=true \
+      --set dags.gitSync.excludeWebserver=true
+      # you can also override the other persistence or gitSync values
+      # by setting the  dags.persistence.* and dags.gitSync.* values
+      # Please refer to values.yaml for details
+
 Mounting DAGs using Git-Sync sidecar without Persistence
 --------------------------------------------------------
 
-This option will use an always running Git-Sync side car on every scheduler, webserver and worker pods. The Git-Sync side car containers will sync DAGs from a git repository every configured number of seconds. If you are using the KubernetesExecutor, Git-sync will run as an init container on your worker pods.
+This option will use an always running Git-Sync sidecar on every scheduler, webserver and worker pods.
+The Git-Sync sidecar containers will sync DAGs from a git repository every configured number of
+seconds. If you are using the ``KubernetesExecutor``, Git-sync will run as an init container on your worker pods.
 
 .. code-block:: bash