You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by po...@apache.org on 2021/01/23 04:13:01 UTC
[airflow] branch v1-10-test updated: Remove
UndefinedJinjaVariableRule
This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch v1-10-test
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/v1-10-test by this push:
new b5b74cc Remove UndefinedJinjaVariableRule
b5b74cc is described below
commit b5b74cca1b60b8c5d9ec66c3581615262d0bdaf3
Author: Jarek Potiuk <ja...@potiuk.com>
AuthorDate: Sat Jan 23 05:10:29 2021 +0100
Remove UndefinedJinjaVariableRule
This rule is problematic because it hangs randomly - both in test
and production.
This PR removes the rule.
---
airflow/upgrade/rules/undefined_jinja_varaibles.py | 154 ----------------
.../rules/test_undefined_jinja_varaibles.py | 198 ---------------------
2 files changed, 352 deletions(-)
diff --git a/airflow/upgrade/rules/undefined_jinja_varaibles.py b/airflow/upgrade/rules/undefined_jinja_varaibles.py
deleted file mode 100644
index 100a31b..0000000
--- a/airflow/upgrade/rules/undefined_jinja_varaibles.py
+++ /dev/null
@@ -1,154 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from __future__ import absolute_import
-
-import re
-
-import jinja2
-import six
-
-from airflow import conf
-from airflow.models import DagBag, TaskInstance
-from airflow.upgrade.rules.base_rule import BaseRule
-from airflow.utils import timezone
-
-
-class UndefinedJinjaVariablesRule(BaseRule):
-
- title = "Jinja Template Variables cannot be undefined"
-
- description = """\
-The default behavior for DAG's Jinja templates has changed. Now, more restrictive validation
-of non-existent variables is applied - `jinja2.StrictUndefined`.
-
-The user should do either of the following to fix this -
-1. Fix the Jinja Templates by defining every variable or providing default values
-2. Explicitly declare `template_undefined=jinja2.Undefined` while defining the DAG
-"""
-
- def _check_rendered_content(self, rendered_content, seen_oids=None):
- """Replicates the logic in BaseOperator.render_template() to
- cover all the cases needed to be checked.
- """
- if isinstance(rendered_content, six.string_types):
- return set(re.findall(r"{{(.*?)}}", rendered_content))
-
- elif isinstance(rendered_content, (tuple, list, set)):
- debug_error_messages = set()
- for element in rendered_content:
- debug_error_messages.update(self._check_rendered_content(element))
- return debug_error_messages
-
- elif isinstance(rendered_content, dict):
- debug_error_messages = set()
- for key, value in rendered_content.items():
- debug_error_messages.update(self._check_rendered_content(value))
- return debug_error_messages
-
- elif hasattr(rendered_content, "template_fields"):
- if seen_oids is None:
- seen_oids = set()
- return self._nested_check_rendered(rendered_content, seen_oids)
-
- else:
- # Rendered content is not actually rendered
- return set()
-
- def _nested_check_rendered(self, rendered_content, seen_oids):
- debug_error_messages = set()
- if id(rendered_content) not in seen_oids:
- seen_oids.add(id(rendered_content))
- nested_template_fields = rendered_content.template_fields
- for attr_name in nested_template_fields:
- nested_rendered_content = getattr(rendered_content, attr_name)
-
- if nested_rendered_content:
- errors = list(
- self._check_rendered_content(nested_rendered_content, seen_oids)
- )
- for i in range(len(errors)):
- errors[i].strip()
- errors[i] += " NestedTemplateField={}".format(attr_name)
- debug_error_messages.update(errors)
- return debug_error_messages
-
- def _render_task_content(self, task, content, context):
- completed_rendering = False
- errors_while_rendering = []
- while not completed_rendering:
- # Catch errors such as {{ object.element }} where
- # object is not defined
- try:
- renderend_content = task.render_template(content, context)
- completed_rendering = True
- except Exception as e:
- undefined_variable = re.sub(" is undefined", "", str(e))
- undefined_variable = re.sub("'", "", undefined_variable)
- context[undefined_variable] = dict()
- message = "Could not find the object '{}'".format(undefined_variable)
- errors_while_rendering.append(message)
- return renderend_content, errors_while_rendering
-
- def iterate_over_template_fields(self, task):
- messages = {}
- task_instance = TaskInstance(task=task, execution_date=timezone.utcnow())
- context = task_instance.get_template_context()
- for attr_name in task.template_fields:
- content = getattr(task, attr_name)
- if content:
- rendered_content, errors_while_rendering = self._render_task_content(
- task, content, context
- )
- debug_error_messages = list(
- self._check_rendered_content(rendered_content, set())
- )
- messages[attr_name] = errors_while_rendering + debug_error_messages
-
- return messages
-
- def iterate_over_dag_tasks(self, dag):
- dag.template_undefined = jinja2.DebugUndefined
- tasks = dag.tasks
- messages = {}
- for task in tasks:
- error_messages = self.iterate_over_template_fields(task)
- messages[task.task_id] = error_messages
- return messages
-
- def check(self, dagbag=None):
- if not dagbag:
- dag_folder = conf.get("core", "dags_folder")
- dagbag = DagBag(dag_folder)
- dags = dagbag.dags
- messages = []
- for dag_id, dag in dags.items():
- if dag.template_undefined:
- continue
- dag_messages = self.iterate_over_dag_tasks(dag)
-
- for task_id, task_messages in dag_messages.items():
- for attr_name, error_messages in task_messages.items():
- for error_message in error_messages:
- message = (
- "Possible UndefinedJinjaVariable -> DAG: {}, Task: {}, "
- "Attribute: {}, Error: {}".format(
- dag_id, task_id, attr_name, error_message.strip()
- )
- )
- messages.append(message)
- return messages
diff --git a/tests/upgrade/rules/test_undefined_jinja_varaibles.py b/tests/upgrade/rules/test_undefined_jinja_varaibles.py
deleted file mode 100644
index 96cb30c..0000000
--- a/tests/upgrade/rules/test_undefined_jinja_varaibles.py
+++ /dev/null
@@ -1,198 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from tempfile import mkdtemp
-from unittest import TestCase
-
-import jinja2
-
-from airflow import DAG
-from airflow.models import DagBag
-from airflow.operators.bash_operator import BashOperator
-from airflow.upgrade.rules.undefined_jinja_varaibles import UndefinedJinjaVariablesRule
-from tests.models import DEFAULT_DATE
-
-
-class ClassWithCustomAttributes:
- """Class for testing purpose: allows to create objects with custom attributes in one single statement."""
-
- def __init__(self, **kwargs):
- for key, value in kwargs.items():
- setattr(self, key, value)
-
- def __str__(self):
- return "{}({})".format(ClassWithCustomAttributes.__name__, str(self.__dict__))
-
- def __repr__(self):
- return self.__str__()
-
- def __eq__(self, other):
- return self.__dict__ == other.__dict__
-
- def __ne__(self, other):
- return not self.__eq__(other)
-
-
-class TestUndefinedJinjaVariablesRule(TestCase):
- @classmethod
- def setUpClass(cls):
- cls.empty_dir = mkdtemp()
-
- def setUpValidDag(self):
- self.valid_dag = DAG(
- dag_id="test-defined-jinja-variables", start_date=DEFAULT_DATE
- )
-
- BashOperator(
- task_id="templated_string",
- depends_on_past=False,
- bash_command="echo",
- env={
- "integer": "{{ params.integer }}",
- "float": "{{ params.float }}",
- "string": "{{ params.string }}",
- "boolean": "{{ params.boolean }}",
- "integer_direct": 1,
- "float_direct": 1.0,
- "string_direct": "test_string",
- "boolean_direct": True,
- "none_direct": None,
- "object_direct": object(),
- },
- params={
- "integer": 1,
- "float": 1.0,
- "string": "test_string",
- "boolean": True,
- },
- dag=self.valid_dag,
- )
-
- def setUpDagToSkip(self):
- self.skip_dag = DAG(
- dag_id="test-defined-jinja-variables",
- start_date=DEFAULT_DATE,
- template_undefined=jinja2.Undefined,
- )
-
- BashOperator(
- task_id="templated_string",
- depends_on_past=False,
- bash_command="{{ undefined }}",
- dag=self.skip_dag,
- )
-
- def setUpInvalidDag(self):
- self.invalid_dag = DAG(
- dag_id="test-undefined-jinja-variables", start_date=DEFAULT_DATE
- )
-
- invalid_template_command = """
- {% for i in range(5) %}
- echo "{{ params.defined_variable }}"
- echo "{{ execution_date.today }}"
- echo "{{ execution_date.invalid_element }}"
- echo "{{ params.undefined_variable }}"
- echo "{{ foo }}"
- {% endfor %}
- """
-
- nested_validation = ClassWithCustomAttributes(
- nested1=ClassWithCustomAttributes(
- att1="{{ nested.undefined }}", template_fields=["att1"]
- ),
- nested2=ClassWithCustomAttributes(
- att2="{{ bar }}", template_fields=["att2"]
- ),
- template_fields=["nested1", "nested2"],
- )
-
- BashOperator(
- task_id="templated_string",
- depends_on_past=False,
- bash_command=invalid_template_command,
- env={
- "undefined_object": "{{ undefined_object.element }}",
- "nested_object": nested_validation,
- },
- params={"defined_variable": "defined_value"},
- dag=self.invalid_dag,
- )
-
- def setUp(self):
- self.setUpValidDag()
- self.setUpDagToSkip()
- self.setUpInvalidDag()
-
- def test_description_and_title_is_defined(self):
- rule = UndefinedJinjaVariablesRule()
- assert isinstance(rule.description, str)
- assert isinstance(rule.title, str)
-
- def test_valid_check(self):
- dagbag = DagBag(dag_folder=self.empty_dir, include_examples=False)
- dagbag.dags[self.valid_dag.dag_id] = self.valid_dag
- rule = UndefinedJinjaVariablesRule()
-
- messages = rule.check(dagbag)
-
- assert len(messages) == 0
-
- def test_skipping_dag_check(self):
- dagbag = DagBag(dag_folder=self.empty_dir, include_examples=False)
- dagbag.dags[self.skip_dag.dag_id] = self.skip_dag
- rule = UndefinedJinjaVariablesRule()
-
- messages = rule.check(dagbag)
-
- assert len(messages) == 0
-
- def test_invalid_check(self):
- dagbag = DagBag(dag_folder=self.empty_dir, include_examples=False)
- dagbag.dags[self.invalid_dag.dag_id] = self.invalid_dag
- rule = UndefinedJinjaVariablesRule()
-
- messages = rule.check(dagbag)
-
- expected_messages = [
- "Possible UndefinedJinjaVariable -> DAG: test-undefined-jinja-variables, "
- "Task: templated_string, Attribute: bash_command, Error: no such element: "
- "dict object['undefined_variable']",
- "Possible UndefinedJinjaVariable -> DAG: test-undefined-jinja-variables, "
- "Task: templated_string, Attribute: bash_command, Error: no such element: "
- "pendulum.pendulum.Pendulum object['invalid_element']",
- "Possible UndefinedJinjaVariable -> DAG: test-undefined-jinja-variables, "
- "Task: templated_string, Attribute: bash_command, Error: foo",
- "Possible UndefinedJinjaVariable -> DAG: test-undefined-jinja-variables, "
- "Task: templated_string, Attribute: env, Error: Could not find the "
- "object 'undefined_object",
- "Possible UndefinedJinjaVariable -> DAG: test-undefined-jinja-variables, "
- "Task: templated_string, Attribute: env, Error: Could not find the object 'nested'",
- "Possible UndefinedJinjaVariable -> DAG: test-undefined-jinja-variables, "
- "Task: templated_string, Attribute: env, Error: bar NestedTemplateField=att2 "
- "NestedTemplateField=nested2",
- "Possible UndefinedJinjaVariable -> DAG: test-undefined-jinja-variables, "
- "Task: templated_string, Attribute: env, Error: no such element: "
- "dict object['undefined'] NestedTemplateField=att1 NestedTemplateField=nested1",
- "Possible UndefinedJinjaVariable -> DAG: test-undefined-jinja-variables, "
- "Task: templated_string, Attribute: env, Error: no such element: dict object['element']",
- ]
-
- assert len(messages) == len(expected_messages)
- assert [m for m in messages if m in expected_messages], len(messages) == len(
- expected_messages
- )