You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by as...@apache.org on 2021/11/19 18:23:13 UTC

[airflow] branch main updated: Speed up webserver boot time by delaying provider initialization (#19709)

This is an automated email from the ASF dual-hosted git repository.

ash pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 7a676a1  Speed up webserver boot time by delaying provider initialization (#19709)
7a676a1 is described below

commit 7a676a148f94f0dcf0bf86789d2684295798bd16
Author: Ash Berlin-Taylor <as...@firemirror.com>
AuthorDate: Fri Nov 19 18:22:42 2021 +0000

    Speed up webserver boot time by delaying provider initialization (#19709)
    
    * Speed up webserver boot time by delaying provider initialization
    
    This drops the time to first request from 37s to 20s by making the
    following changes:
    
    - Don't pre-load the app when not in daemon mode.
    
      The purpose of the call to `cached_app()` was to ensure that any
      errors are reported on the terminal before it is detached to make
      failures more obvious to the user (which is a good feature).
    
      However the comment about "pre-warm the cache" was incorrect and did
      not happen -- since we run gunicorn by spawning a whole new process
      it doesn't share any state from the current python interpreter.
    
    - Don't load/initialize providers when only importing airflow.www.views
    
      As it was written it would load the providers hook's at import time.
    
      This changes it through a combination of cached properties and the
      existing `init_connection_form` function.
    
      (`extra_fields` is not set as a cached_property because of how FAB
      works -- it iterates over all attributes of the class looking for
      methods/routes and then looks at properties on it, meaning it would
      still access the property too early)
---
 airflow/cli/commands/webserver_command.py | 12 ++++++------
 airflow/www/gunicorn_config.py            |  7 +++++++
 airflow/www/views.py                      | 17 +++++++++++++----
 3 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/airflow/cli/commands/webserver_command.py b/airflow/cli/commands/webserver_command.py
index 7fdc743..0269083 100644
--- a/airflow/cli/commands/webserver_command.py
+++ b/airflow/cli/commands/webserver_command.py
@@ -40,7 +40,7 @@ from airflow.utils import cli as cli_utils
 from airflow.utils.cli import setup_locations, setup_logging
 from airflow.utils.log.logging_mixin import LoggingMixin
 from airflow.utils.process_utils import check_if_pidfile_process_is_running
-from airflow.www.app import cached_app, create_app
+from airflow.www.app import create_app
 
 log = logging.getLogger(__name__)
 
@@ -354,11 +354,6 @@ def webserver(args):
             ssl_context=(ssl_cert, ssl_key) if ssl_cert and ssl_key else None,
         )
     else:
-        # This pre-warms the cache, and makes possible errors
-        # get reported earlier (i.e. before demonization)
-        os.environ['SKIP_DAGS_PARSING'] = 'True'
-        app = cached_app(None)
-        os.environ.pop('SKIP_DAGS_PARSING')
 
         pid_file, stdout, stderr, log_file = setup_locations(
             "webserver", args.pid, args.stdout, args.stderr, args.log_file
@@ -446,6 +441,11 @@ def webserver(args):
             ).start()
 
         if args.daemon:
+            # This makes possible errors get reported before daemonization
+            os.environ['SKIP_DAGS_PARSING'] = 'True'
+            app = create_app(None)
+            os.environ.pop('SKIP_DAGS_PARSING')
+
             handle = setup_logging(log_file)
 
             base, ext = os.path.splitext(pid_file)
diff --git a/airflow/www/gunicorn_config.py b/airflow/www/gunicorn_config.py
index 7c07b66..7a40a56 100644
--- a/airflow/www/gunicorn_config.py
+++ b/airflow/www/gunicorn_config.py
@@ -30,3 +30,10 @@ def post_worker_init(_):
     """
     old_title = setproctitle.getproctitle()
     setproctitle.setproctitle(settings.GUNICORN_WORKER_READY_PREFIX + old_title)
+
+
+def on_starting(server):
+    from airflow.providers_manager import ProvidersManager
+
+    # Load providers before forking workers
+    ProvidersManager().connection_form_widgets
diff --git a/airflow/www/views.py b/airflow/www/views.py
index d561cc7..85338c5 100644
--- a/airflow/www/views.py
+++ b/airflow/www/views.py
@@ -94,6 +94,7 @@ from airflow.api.common.experimental.mark_tasks import (
     set_dag_run_state_to_failed,
     set_dag_run_state_to_success,
 )
+from airflow.compat.functools import cached_property
 from airflow.configuration import AIRFLOW_CONFIG, conf
 from airflow.exceptions import AirflowException
 from airflow.executors.executor_loader import ExecutorLoader
@@ -3314,6 +3315,9 @@ def lazy_add_provider_discovered_options_to_connection_form():
     )
     for key, value in ProvidersManager().connection_form_widgets.items():
         setattr(ConnectionForm, key, value.field)
+        ConnectionModelView.add_columns.append(key)
+        ConnectionModelView.edit_columns.append(key)
+        ConnectionModelView.extra_fields.append(key)
 
 
 # Used to store a dictionary of field behaviours used to dynamically change available
@@ -3323,7 +3327,9 @@ def lazy_add_provider_discovered_options_to_connection_form():
 class ConnectionFormWidget(FormWidget):
     """Form widget used to display connection"""
 
-    field_behaviours = json.dumps(ProvidersManager().field_behaviours)
+    @cached_property
+    def field_behaviours(self):
+        return json.dumps(ProvidersManager().field_behaviours)
 
 
 class ConnectionModelView(AirflowModelView):
@@ -3351,7 +3357,6 @@ class ConnectionModelView(AirflowModelView):
         permissions.ACTION_CAN_ACCESS_MENU,
     ]
 
-    extra_fields = list(ProvidersManager().connection_form_widgets.keys())
     list_columns = [
         'conn_id',
         'conn_type',
@@ -3361,7 +3366,7 @@ class ConnectionModelView(AirflowModelView):
         'is_encrypted',
         'is_extra_encrypted',
     ]
-    add_columns = edit_columns = [
+    add_columns = [
         'conn_id',
         'conn_type',
         'description',
@@ -3371,7 +3376,11 @@ class ConnectionModelView(AirflowModelView):
         'password',
         'port',
         'extra',
-    ] + extra_fields
+    ]
+    edit_columns = add_columns.copy()
+
+    # Initialized later by lazy_add_provider_discovered_options_to_connection_form
+    extra_fields = []
 
     add_form = edit_form = ConnectionForm
     add_template = 'airflow/conn_create.html'