You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by as...@apache.org on 2021/11/19 18:23:13 UTC
[airflow] branch main updated: Speed up webserver boot time by delaying provider initialization (#19709)
This is an automated email from the ASF dual-hosted git repository.
ash pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new 7a676a1 Speed up webserver boot time by delaying provider initialization (#19709)
7a676a1 is described below
commit 7a676a148f94f0dcf0bf86789d2684295798bd16
Author: Ash Berlin-Taylor <as...@firemirror.com>
AuthorDate: Fri Nov 19 18:22:42 2021 +0000
Speed up webserver boot time by delaying provider initialization (#19709)
* Speed up webserver boot time by delaying provider initialization
This drops the time to first request from 37s to 20s by making the
following changes:
- Don't pre-load the app when not in daemon mode.
The purpose of the call to `cached_app()` was to ensure that any
errors are reported on the terminal before it is detached to make
failures more obvious to the user (which is a good feature).
However the comment about "pre-warm the cache" was incorrect and did
not happen -- since we run gunicorn by spawning a whole new process
it doesn't share any state from the current python interpreter.
- Don't load/initialize providers when only importing airflow.www.views
As it was written it would load the providers hook's at import time.
This changes it through a combination of cached properties and the
existing `init_connection_form` function.
(`extra_fields` is not set as a cached_property because of how FAB
works -- it iterates over all attributes of the class looking for
methods/routes and then looks at properties on it, meaning it would
still access the property too early)
---
airflow/cli/commands/webserver_command.py | 12 ++++++------
airflow/www/gunicorn_config.py | 7 +++++++
airflow/www/views.py | 17 +++++++++++++----
3 files changed, 26 insertions(+), 10 deletions(-)
diff --git a/airflow/cli/commands/webserver_command.py b/airflow/cli/commands/webserver_command.py
index 7fdc743..0269083 100644
--- a/airflow/cli/commands/webserver_command.py
+++ b/airflow/cli/commands/webserver_command.py
@@ -40,7 +40,7 @@ from airflow.utils import cli as cli_utils
from airflow.utils.cli import setup_locations, setup_logging
from airflow.utils.log.logging_mixin import LoggingMixin
from airflow.utils.process_utils import check_if_pidfile_process_is_running
-from airflow.www.app import cached_app, create_app
+from airflow.www.app import create_app
log = logging.getLogger(__name__)
@@ -354,11 +354,6 @@ def webserver(args):
ssl_context=(ssl_cert, ssl_key) if ssl_cert and ssl_key else None,
)
else:
- # This pre-warms the cache, and makes possible errors
- # get reported earlier (i.e. before demonization)
- os.environ['SKIP_DAGS_PARSING'] = 'True'
- app = cached_app(None)
- os.environ.pop('SKIP_DAGS_PARSING')
pid_file, stdout, stderr, log_file = setup_locations(
"webserver", args.pid, args.stdout, args.stderr, args.log_file
@@ -446,6 +441,11 @@ def webserver(args):
).start()
if args.daemon:
+ # This makes possible errors get reported before daemonization
+ os.environ['SKIP_DAGS_PARSING'] = 'True'
+ app = create_app(None)
+ os.environ.pop('SKIP_DAGS_PARSING')
+
handle = setup_logging(log_file)
base, ext = os.path.splitext(pid_file)
diff --git a/airflow/www/gunicorn_config.py b/airflow/www/gunicorn_config.py
index 7c07b66..7a40a56 100644
--- a/airflow/www/gunicorn_config.py
+++ b/airflow/www/gunicorn_config.py
@@ -30,3 +30,10 @@ def post_worker_init(_):
"""
old_title = setproctitle.getproctitle()
setproctitle.setproctitle(settings.GUNICORN_WORKER_READY_PREFIX + old_title)
+
+
+def on_starting(server):
+ from airflow.providers_manager import ProvidersManager
+
+ # Load providers before forking workers
+ ProvidersManager().connection_form_widgets
diff --git a/airflow/www/views.py b/airflow/www/views.py
index d561cc7..85338c5 100644
--- a/airflow/www/views.py
+++ b/airflow/www/views.py
@@ -94,6 +94,7 @@ from airflow.api.common.experimental.mark_tasks import (
set_dag_run_state_to_failed,
set_dag_run_state_to_success,
)
+from airflow.compat.functools import cached_property
from airflow.configuration import AIRFLOW_CONFIG, conf
from airflow.exceptions import AirflowException
from airflow.executors.executor_loader import ExecutorLoader
@@ -3314,6 +3315,9 @@ def lazy_add_provider_discovered_options_to_connection_form():
)
for key, value in ProvidersManager().connection_form_widgets.items():
setattr(ConnectionForm, key, value.field)
+ ConnectionModelView.add_columns.append(key)
+ ConnectionModelView.edit_columns.append(key)
+ ConnectionModelView.extra_fields.append(key)
# Used to store a dictionary of field behaviours used to dynamically change available
@@ -3323,7 +3327,9 @@ def lazy_add_provider_discovered_options_to_connection_form():
class ConnectionFormWidget(FormWidget):
"""Form widget used to display connection"""
- field_behaviours = json.dumps(ProvidersManager().field_behaviours)
+ @cached_property
+ def field_behaviours(self):
+ return json.dumps(ProvidersManager().field_behaviours)
class ConnectionModelView(AirflowModelView):
@@ -3351,7 +3357,6 @@ class ConnectionModelView(AirflowModelView):
permissions.ACTION_CAN_ACCESS_MENU,
]
- extra_fields = list(ProvidersManager().connection_form_widgets.keys())
list_columns = [
'conn_id',
'conn_type',
@@ -3361,7 +3366,7 @@ class ConnectionModelView(AirflowModelView):
'is_encrypted',
'is_extra_encrypted',
]
- add_columns = edit_columns = [
+ add_columns = [
'conn_id',
'conn_type',
'description',
@@ -3371,7 +3376,11 @@ class ConnectionModelView(AirflowModelView):
'password',
'port',
'extra',
- ] + extra_fields
+ ]
+ edit_columns = add_columns.copy()
+
+ # Initialized later by lazy_add_provider_discovered_options_to_connection_form
+ extra_fields = []
add_form = edit_form = ConnectionForm
add_template = 'airflow/conn_create.html'