You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by GitBox <gi...@apache.org> on 2019/11/21 17:21:11 UTC

[GitHub] [airflow] ashb commented on a change in pull request #6627: [AIRFLOW-5931] Use os.fork when appropriate to speed up task execution.

ashb commented on a change in pull request #6627: [AIRFLOW-5931] Use os.fork when appropriate to speed up task execution.
URL: https://github.com/apache/airflow/pull/6627#discussion_r349217376
 
 

 ##########
 File path: airflow/task/task_runner/standard_task_runner.py
 ##########
 @@ -17,28 +17,69 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import os
+
 import psutil
+from setproctitle import setproctitle
 
 from airflow.task.task_runner.base_task_runner import BaseTaskRunner
 from airflow.utils.helpers import reap_process_group
 
+CAN_FORK = hasattr(os, 'fork')
+
 
 class StandardTaskRunner(BaseTaskRunner):
     """
     Runs the raw Airflow task by invoking through the Bash shell.
     """
     def __init__(self, local_task_job):
         super().__init__(local_task_job)
+        self._rc = None
 
     def start(self):
-        self.process = self.run_command()
+        if CAN_FORK and not self.run_as_user:
+            self.process = self._start_by_fork()
+        else:
+            self.process = self._start_by_exec()
 
-    def return_code(self):
-        return self.process.poll()
+    def _start_by_exec(self):
+        subprocess = self.run_command()
+        return psutil.Process(subprocess.pid)
 
-    def terminate(self):
-        if self.process and psutil.pid_exists(self.process.pid):
-            reap_process_group(self.process.pid, self.log)
+    def _start_by_fork(self):
+        pid = os.fork()
+        if pid:
 
 Review comment:
   Could do, but for python it doesn't matter so much - either it turns a pid, 0 in the child or throws an error (C can return 0, pid or -1 on error, but python converts that to an exception for us)

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services