You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by da...@apache.org on 2017/04/12 18:56:42 UTC
incubator-airflow git commit: [AIRFLOW-1074] Don't count queued tasks
for concurrency limits
Repository: incubator-airflow
Updated Branches:
refs/heads/master 6b1c327ee -> 8f9f5084b
[AIRFLOW-1074] Don't count queued tasks for concurrency limits
There may be orphaned tasks queued but not in a
running dag run that
will not cleared. We should not count these as
they will interfere.
I hate to do this, but I changed my mind on
counting queued tasks.
1. Queued tasks that are actually queued generally
get set to running pretty quickly.
2. Because of the worker-side check, we won't
actually pass concurrency.
I don't think the queued thing is a big deal
because of this, I'm more worried about orphaned
tasks that are in QUEUED state but not in a
running dag_run (so they wont get reset)
interfering with concurrency.
There may be orphaned tasks queued but not in a
running dag run that
will not cleared. We should not count these as
they will interfere.
Closes #2221 from saguziel/aguziel-concurrency-2
Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/8f9f5084
Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/8f9f5084
Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/8f9f5084
Branch: refs/heads/master
Commit: 8f9f5084bfdc2aa4017fee12e22d2e94672765ba
Parents: 6b1c327
Author: Alex Guziel <al...@airbnb.com>
Authored: Wed Apr 12 11:56:03 2017 -0700
Committer: Dan Davydov <da...@airbnb.com>
Committed: Wed Apr 12 11:56:06 2017 -0700
----------------------------------------------------------------------
airflow/jobs.py | 3 ++-
tests/jobs.py | 6 +++---
2 files changed, 5 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/8f9f5084/airflow/jobs.py
----------------------------------------------------------------------
diff --git a/airflow/jobs.py b/airflow/jobs.py
index f031f6e..18cd82e 100644
--- a/airflow/jobs.py
+++ b/airflow/jobs.py
@@ -1064,11 +1064,12 @@ class SchedulerJob(BaseJob):
dag_id = task_instance.dag_id
if dag_id not in dag_id_to_possibly_running_task_count:
+ # TODO(saguziel): also check against QUEUED state, see AIRFLOW-1104
dag_id_to_possibly_running_task_count[dag_id] = \
DAG.get_num_task_instances(
dag_id,
simple_dag_bag.get_dag(dag_id).task_ids,
- states=[State.RUNNING, State.QUEUED],
+ states=[State.RUNNING],
session=session)
current_task_concurrency = dag_id_to_possibly_running_task_count[dag_id]
http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/8f9f5084/tests/jobs.py
----------------------------------------------------------------------
diff --git a/tests/jobs.py b/tests/jobs.py
index e3caa5d..e99778a 100644
--- a/tests/jobs.py
+++ b/tests/jobs.py
@@ -504,14 +504,14 @@ class SchedulerJobTest(unittest.TestCase):
ti1.refresh_from_db()
ti2.refresh_from_db()
ti1.state = State.RUNNING
- ti2.state = State.QUEUED
+ ti2.state = State.RUNNING
session.merge(ti1)
session.merge(ti2)
session.commit()
self.assertEqual(State.RUNNING, dr1.state)
self.assertEqual(2, DAG.get_num_task_instances(dag_id, dag.task_ids,
- states=[State.RUNNING, State.QUEUED], session=session))
+ states=[State.RUNNING], session=session))
# create second dag run
dr2 = scheduler.create_dag_run(dag)
@@ -538,7 +538,7 @@ class SchedulerJobTest(unittest.TestCase):
self.assertEqual(3, DAG.get_num_task_instances(dag_id, dag.task_ids,
states=[State.RUNNING, State.QUEUED], session=session))
self.assertEqual(State.RUNNING, ti1.state)
- self.assertEqual(State.QUEUED, ti2.state)
+ self.assertEqual(State.RUNNING, ti2.state)
six.assertCountEqual(self, [State.QUEUED, State.SCHEDULED], [ti3.state, ti4.state])
session.close()