You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by GitBox <gi...@apache.org> on 2019/07/16 00:06:54 UTC

[GitHub] [airflow] coufon commented on a change in pull request #5594: [AIRFLOW-4924] Loading DAGs asynchronously in Airflow webserver

coufon commented on a change in pull request #5594: [AIRFLOW-4924] Loading DAGs asynchronously in Airflow webserver
URL: https://github.com/apache/airflow/pull/5594#discussion_r303684267
 
 

 ##########
 File path: airflow/dag/stringified_dags.py
 ##########
 @@ -0,0 +1,137 @@
+# -*- coding: utf-8 -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# DagCached is a new feature in Airflow that caches processed DAGs in Airflow database.
+# DAGs are stringified first and seriailized by Pickle to be stored in database.
+# Stringified DAGs holds metadata of original DAGs and tasks, and can be used by
+# Airflow webserver and scheduler.
+
+"""Methods to stringify DAGs and tasks to be compatible with pickle."""
+
+import copy
+import functools
+import inspect
+import logging
+
+from airflow import models
+
+
+# Stringify all fields of DAGs and tasks except for time related fields.
+_dag_fields_to_keep = set([
+    'schedule_interval', 'start_date', 'end_date', 'dagrun_timeout',
+    'timezone', 'last_loaded', '_schedule_interval', 'test_field'])
+
+_task_fields_to_keep = set([
+    'retry_delay', 'max_retry_delay', 'start_date', 'end_date',
+    'schedule_interval', 'sla', 'execution_timeout'])
+
+_primitive_types = (int, bool, float, str, bytes)
+
+
+def _is_primitive(x):
+    return x is None or isinstance(x, _primitive_types)
+
+
+def _stringify_dag_or_task(x, stringified_dags, is_dag):
+    """Returns a stringified DAG or task."""
+    if is_dag and x.dag_id in stringified_dags:
+        return stringified_dags[x.dag_id]
+
+    # Cast any operators defined in non-airlfow modules to BaseOperator to ensure
+    # unpickle is successful. The downside is that the task will be displayed as
+    # BaseOperator in UI.
+    if not is_dag and not x.__class__.__module__.startswith('airflow.operators'):
 
 Review comment:
   > What do you think about add support for `airflow.contrib.operators'? Are there any obstacles to do it?
   
   Thanks for pointing it out. It actually support 'airflow.contrib.operators'. I added it in with unit tests.
   
   In the Composer implementation we did not check the type of Operator. if pickle fails for non-airflow tasks, the DAG will not be in UI. I just added this and forgot to include contrib operators.
   

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services