You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by GitBox <gi...@apache.org> on 2020/11/10 08:05:39 UTC

[GitHub] [airflow] yuqian90 commented on a change in pull request #12228: taskgroup decorator

yuqian90 commented on a change in pull request #12228:
URL: https://github.com/apache/airflow/pull/12228#discussion_r520353635



##########
File path: tests/utils/test_task_group_decorator.py
##########
@@ -0,0 +1,189 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pendulum
+from airflow.models.dag import DAG
+from airflow.operators.python import PythonOperator, task
+from airflow.utils.task_group import taskgroup
+from airflow.www.views import task_group_to_dict
+
+
+def test_build_task_group_context_manager():
+    """
+    Tests Following :
+    1. Nested TaskGroup creation using taskgroup decorator should create same TaskGroup which can be created using
+    TaskGroup context manager.
+    2. TaskGroup consisting Tasks created using task decorator.
+    3. Node Ids of dags created with taskgroup decorator.
+    """
+
+    # Creating Tasks
+    @task
+    def task_start():
+        """Dummy Task which is First Task of Dag """
+        return '[Task_start]'
+
+    @task
+    def task_end():
+        """Dummy Task which is Last Task of Dag"""
+        print(f'[ Task_End ]')
+
+    @task
+    def task_1(value):
+        """ Dummy Task1"""
+        return f'[ Task1 {value} ]'
+
+    @task
+    def task_2(value):
+        """ Dummy Task2"""
+        print(f'[ Task2 {value} ]')
+
+    @task
+    def task_3(value):
+        """ Dummy Task3"""
+        return f'[ Task3 {value} ]'
+
+    @task
+    def task_4(value):
+        """ Dummy Task3"""
+        print(f'[ Task4 {value} ]')
+
+    # Creating TaskGroups
+    @taskgroup
+    def section_1(value):
+        """ TaskGroup for grouping related Tasks"""
+
+        @taskgroup()
+        def section_2(value2):
+            """ TaskGroup for grouping related Tasks"""
+            return task_4(task_3(value2))
+
+        op1 = task_2(task_1(value))
+        return section_2(op1)
+
+    execution_date = pendulum.parse("20201109")
+    with DAG(
+        dag_id="example_nested_task_group_decorator", start_date=execution_date, tags=["example"]
+    ) as dag:
+        t1 = task_start()
+        s1 = section_1(t1)
+        s1.set_downstream(task_end())
+
+    # Testing TaskGroup created using taskgroup decorator
+    assert set(dag.task_group.children.keys()) == {"task_start", "task_end", "section_1"}
+    assert set(dag.task_group.children['section_1'].children.keys()) == {
+        'section_1.task_1',
+        'section_1.task_2',
+        'section_1.section_2',
+    }
+
+    # Testing TaskGroup consisting Tasks created using task decorator
+    assert dag.task_dict['task_start'].downstream_task_ids == {'section_1.task_1'}
+    assert dag.task_dict['section_1.task_2'].downstream_task_ids == {'section_1.section_2.task_3'}
+    assert dag.task_dict['section_1.section_2.task_4'].downstream_task_ids == {'task_end'}
+
+    def extract_node_id(node, include_label=False):

Review comment:
       I suggest putting these tests inside the existing `test_task_group.py` instead so that things like `extract_node_id` can be reused. It also makes sense because they are closely related.

##########
File path: airflow/utils/task_group.py
##########
@@ -344,3 +346,51 @@ def get_current_task_group(cls, dag: Optional["DAG"]) -> Optional[TaskGroup]:
                 return dag.task_group
 
         return cls._context_managed_task_group
+
+
+def taskgroup(python_callable: Optional[Callable] = None, *tg_args, **tg_kwargs) -> Callable[[T], T]:
+    """
+    Python TaskGroup decorator. Wraps a function into an Airflow TaskGroup.
+    Accepts kwargs for operator TaskGroup. Can be used to parametrize TaskGroup.
+
+    :param python_callable: Function to decorate
+    :param tg_args: Arguments for TaskGroup object
+    :type tg_args: list
+    :param tg_kwargs: Kwargs for TaskGroup object.
+    :type tg_kwargs: dict
+    """
+
+    def wrapper(f: T):
+        # Setting group_id as function name if not given
+        if len(tg_args) == 0 and 'group_id' not in tg_kwargs.keys():
+            tg_kwargs['group_id'] = f.__name__
+
+        # Get dag initializer signature and bind it to validate that task_group_args,
+        # and task_group_kwargs are correct
+        task_group_sig = signature(TaskGroup.__init__)
+        task_group_bound_args = task_group_sig.bind_partial(*tg_args, **tg_kwargs)
+
+        @functools.wraps(f)
+        def factory(*args, **kwargs):
+            # Generate signature for decorated function and bind the arguments when called
+            # we do this to extract parameters so we can annotate them on the DAG object.
+            # In addition, this fails if we are missing any args/kwargs with TypeError as expected.
+            f_sig = signature(f).bind(*args, **kwargs)
+            # Apply defaults to capture default values if set.
+            f_sig.apply_defaults()
+
+            # Initialize TaskGroup with bound arguments
+            with TaskGroup(*task_group_bound_args.args, **task_group_bound_args.kwargs) as tg_obj:
+                # Invoke function to run Tasks inside the TaskGroup
+                f(**f_sig.arguments)
+
+            # Return task_group object such that it's accessible in Globals.
+            return tg_obj
+
+        return cast(T, factory)
+
+    if callable(python_callable):

Review comment:
       I think this is the same as providing just a `tg_kwargs`? If there's no `*tg_args`, the user can't put in positional arguments after the callable. You shouldn't need to explicitly raise an `AirflowException` for this.
   
   ```python
   def taskgroup(python_callable: Optional[Callable], **tg_kwargs)
   ```

##########
File path: tests/utils/test_task_group_decorator.py
##########
@@ -0,0 +1,189 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pendulum
+from airflow.models.dag import DAG
+from airflow.operators.python import PythonOperator, task
+from airflow.utils.task_group import taskgroup
+from airflow.www.views import task_group_to_dict
+
+
+def test_build_task_group_context_manager():
+    """
+    Tests Following :
+    1. Nested TaskGroup creation using taskgroup decorator should create same TaskGroup which can be created using
+    TaskGroup context manager.
+    2. TaskGroup consisting Tasks created using task decorator.
+    3. Node Ids of dags created with taskgroup decorator.
+    """
+
+    # Creating Tasks
+    @task
+    def task_start():
+        """Dummy Task which is First Task of Dag """
+        return '[Task_start]'
+
+    @task
+    def task_end():
+        """Dummy Task which is Last Task of Dag"""
+        print(f'[ Task_End ]')
+
+    @task
+    def task_1(value):
+        """ Dummy Task1"""
+        return f'[ Task1 {value} ]'
+
+    @task
+    def task_2(value):
+        """ Dummy Task2"""
+        print(f'[ Task2 {value} ]')
+
+    @task
+    def task_3(value):
+        """ Dummy Task3"""
+        return f'[ Task3 {value} ]'
+
+    @task
+    def task_4(value):
+        """ Dummy Task3"""
+        print(f'[ Task4 {value} ]')
+
+    # Creating TaskGroups
+    @taskgroup
+    def section_1(value):
+        """ TaskGroup for grouping related Tasks"""
+
+        @taskgroup()
+        def section_2(value2):
+            """ TaskGroup for grouping related Tasks"""
+            return task_4(task_3(value2))
+
+        op1 = task_2(task_1(value))
+        return section_2(op1)
+
+    execution_date = pendulum.parse("20201109")
+    with DAG(
+        dag_id="example_nested_task_group_decorator", start_date=execution_date, tags=["example"]
+    ) as dag:
+        t1 = task_start()
+        s1 = section_1(t1)
+        s1.set_downstream(task_end())
+
+    # Testing TaskGroup created using taskgroup decorator
+    assert set(dag.task_group.children.keys()) == {"task_start", "task_end", "section_1"}
+    assert set(dag.task_group.children['section_1'].children.keys()) == {
+        'section_1.task_1',
+        'section_1.task_2',
+        'section_1.section_2',
+    }
+
+    # Testing TaskGroup consisting Tasks created using task decorator
+    assert dag.task_dict['task_start'].downstream_task_ids == {'section_1.task_1'}
+    assert dag.task_dict['section_1.task_2'].downstream_task_ids == {'section_1.section_2.task_3'}
+    assert dag.task_dict['section_1.section_2.task_4'].downstream_task_ids == {'task_end'}
+
+    def extract_node_id(node, include_label=False):
+        ret = {"id": node["id"]}
+        if include_label:
+            ret["label"] = node["value"]["label"]
+        if "children" in node:
+            children = []
+            for child in node["children"]:
+                children.append(extract_node_id(child, include_label=include_label))
+
+            ret["children"] = children
+        return ret
+
+    # Node IDs test
+    node_ids = {
+        'id': None,
+        'children': [
+            {
+                'id': 'section_1',
+                'children': [
+                    {
+                        'id': 'section_1.section_2',
+                        'children': [
+                            {'id': 'section_1.section_2.task_3'},
+                            {'id': 'section_1.section_2.task_4'},
+                        ],
+                    },
+                    {'id': 'section_1.task_1'},
+                    {'id': 'section_1.task_2'},
+                    {'id': 'section_1.downstream_join_id'},
+                ],
+            },
+            {'id': 'task_end'},
+            {'id': 'task_start'},
+        ],
+    }
+
+    assert extract_node_id(task_group_to_dict(dag.task_group)) == node_ids
+
+
+def test_build_task_group_with_operators():
+    """  Tests DAG with Tasks created with *Operators and TaskGroup created with taskgroup decorator """
+
+    def task_start():
+        """Dummy Task which is First Task of Dag """
+        return '[Task_start]'
+
+    def task_end():
+        """Dummy Task which is Last Task of Dag"""
+        print(f'[ Task_End  ]')
+
+    # Creating Tasks
+    @task
+    def task_1(value):
+        """ Dummy Task1"""
+        return f'[ Task1 {value} ]'
+
+    @task
+    def task_2(value):
+        """ Dummy Task2"""
+        return f'[ Task2 {value} ]'
+
+    @task
+    def task_3(value):
+        """ Dummy Task3"""
+        print(f'[ Task3 {value} ]')
+
+    # Creating TaskGroups
+    @taskgroup(group_id='section_1')
+    def section_1(value):
+        """ TaskGroup for grouping related Tasks"""
+        return task_3(task_2(task_1(value)))
+
+    execution_date = pendulum.parse("20201109")
+    with DAG(dag_id="example_task_group_decorator_mix", start_date=execution_date, tags=["example"]) as dag:
+        t_start = PythonOperator(task_id='task_start', python_callable=task_start, dag=dag)
+        s1 = section_1(t_start.output)
+        t_end = PythonOperator(task_id='task_end', python_callable=task_end, dag=dag)
+        s1.set_downstream(t_end)
+
+    # Testing Tasks ing DAG
+    assert set(dag.task_group.children.keys()) == {'section_1', 'task_start', 'task_end'}
+    assert set(dag.task_group.children['section_1'].children.keys()) == {
+        'section_1.task_2',
+        'section_1.task_3',
+        'section_1.task_1',
+    }
+
+    # Testing Tasks downstream
+    assert dag.task_dict['task_start'].downstream_task_ids == {'section_1.task_1'}

Review comment:
       Maybe also mix `TaskGroup` context manager and decorator in the same DAG to test if they play nice together?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org