You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by GitBox <gi...@apache.org> on 2020/06/01 09:29:30 UTC

[GitHub] [airflow] mik-laj commented on a change in pull request #9088: Add query count test for SchedulerJob

mik-laj commented on a change in pull request #9088:
URL: https://github.com/apache/airflow/pull/9088#discussion_r433133083



##########
File path: tests/jobs/test_scheduler_job.py
##########
@@ -3376,3 +3376,58 @@ def test_task_with_upstream_skip_process_task_instances():
         assert tis[dummy2.task_id].state == State.SUCCESS
         # dummy3 should be skipped because dummy1 is skipped.
         assert tis[dummy3.task_id].state == State.SKIPPED
+
+
+class TestSchedulerJobQueriesCount(unittest.TestCase):
+    """
+    These tests are designed to detect changes in the number of queries for
+    different DAG files. These tests allow easy detection when a change is
+    made that affects the performance of the SchedulerJob.
+    """
+    def setUp(self) -> None:
+        clear_db_runs()
+        clear_db_pools()
+        clear_db_dags()
+        clear_db_sla_miss()
+        clear_db_errors()
+
+    @parameterized.expand(
+        [
+            # pylint: disable=bad-whitespace
+            # expected, dag_count, task_count
+            # One DAG with one task per DAG file
+            ( 13,  1,  1),  # noqa
+            # One DAG with five tasks per DAG  file
+            ( 25,  1,  5),  # noqa
+            # 10 DAGs with 10 tasks per DAG file
+            (108, 10, 10),  # noqa
+        ]
+    )
+    def test_execute_queries_count(self, expected_query_count, dag_count, task_count):
+        with mock.patch.dict("os.environ", {
+            "PERF_DAGS_COUNT": str(dag_count),
+            "PERF_TASKS_COUNT": str(task_count),
+            "PERF_START_AGO": "1d",
+            "PERF_SCHEDULE_INTERVAL": "30m",
+            "PERF_SHAPE": "no_structure",
+        }), conf_vars({
+            ('scheduler', 'use_job_schedule'): 'True',
+            ('core', 'load_examples'): 'False',
+        }):
+
+            dagbag = DagBag(dag_folder=ELASTIC_DAG_FILE, include_examples=False)
+            for i, dag in enumerate(dagbag.dags.values()):
+                dr = dag.create_dagrun(state=State.RUNNING, run_id=f"{DagRunType.MANUAL.value}__{i}")
+                for ti in dr.get_task_instances():
+                    ti.set_state(state=State.SCHEDULED)
+
+            mock_agent = mock.MagicMock()
+            mock_agent.harvest_simple_dags.return_value = [SimpleDag(d) for d in dagbag.dags.values()]

Review comment:
       The number of queries depends on whether this value is None or not.
   https://github.com/apache/airflow/blob/7c0e6ede6078ff364da5d96afee65ec53e87cf65/airflow/jobs/scheduler_job.py#L1628
   https://github.com/apache/airflow/blob/7c0e6ede6078ff364da5d96afee65ec53e87cf65/airflow/jobs/scheduler_job.py#L1650




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org