You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by di...@apache.org on 2020/06/29 18:13:26 UTC

[airflow] 01/01: Add support for fetching logs from running pods (#8626)

This is an automated email from the ASF dual-hosted git repository.

dimberman pushed a commit to branch v1-10-test
in repository https://gitbox.apache.org/repos/asf/airflow.git

commit 915dbea173840420f1292307ff334912538e2152
Author: Sumit Maheshwari <sm...@twitter.com>
AuthorDate: Sun May 3 13:27:11 2020 +0530

    Add support for fetching logs from running pods (#8626)
    
    When using KubernetesExecutor without any centralized PV for log storage, one has to wait until the logs get uploaded to cloud storage before viewing them on UI. With this change, the webserver will try to fetch logs from running worker pods and display them.
    
    (cherry picked from commit 19ac45aacb00acc956025be4e607a0d7ea0ef6f2)
    (cherry picked from commit a4a3b8bcea7fe05a7680148cc8fbe85d9a9278b0)
---
 airflow/kubernetes/pod_launcher.py     |  4 +--
 airflow/utils/log/file_task_handler.py | 24 +++++++++++++++
 tests/kubernetes/test_pod_launcher.py  | 56 ++++++++++++++++++++++++++++++++++
 3 files changed, 82 insertions(+), 2 deletions(-)

diff --git a/airflow/kubernetes/pod_launcher.py b/airflow/kubernetes/pod_launcher.py
index 6fb5989..5fd675d 100644
--- a/airflow/kubernetes/pod_launcher.py
+++ b/airflow/kubernetes/pod_launcher.py
@@ -166,7 +166,7 @@ class PodLauncher(LoggingMixin):
         wait=tenacity.wait_exponential(),
         reraise=True
     )
-    def read_pod_logs(self, pod):
+    def read_pod_logs(self, pod, tail_lines=10):
         """Reads log from the POD"""
         try:
             return self._client.read_namespaced_pod_log(
@@ -174,7 +174,7 @@ class PodLauncher(LoggingMixin):
                 namespace=pod.metadata.namespace,
                 container='base',
                 follow=True,
-                tail_lines=10,
+                tail_lines=tail_lines,
                 _preload_content=False
             )
         except BaseHTTPError as e:
diff --git a/airflow/utils/log/file_task_handler.py b/airflow/utils/log/file_task_handler.py
index 4496a5b..d424ac2 100644
--- a/airflow/utils/log/file_task_handler.py
+++ b/airflow/utils/log/file_task_handler.py
@@ -108,6 +108,30 @@ class FileTaskHandler(logging.Handler):
             except Exception as e:
                 log = "*** Failed to load local log file: {}\n".format(location)
                 log += "*** {}\n".format(str(e))
+        elif conf.get('core', 'executor') == 'KubernetesExecutor':
+            log += '*** Trying to get logs (last 100 lines) from worker pod {} ***\n\n'\
+                .format(ti.hostname)
+
+            try:
+                from airflow.kubernetes.kube_client import get_kube_client
+
+                kube_client = get_kube_client()
+                res = kube_client.read_namespaced_pod_log(
+                    name=ti.hostname,
+                    namespace=conf.get('kubernetes', 'namespace'),
+                    container='base',
+                    follow=False,
+                    tail_lines=100,
+                    _preload_content=False
+                )
+
+                for line in res:
+                    log += line.decode()
+
+            except Exception as f:  # pylint: disable=broad-except
+                log += '*** Unable to fetch logs from worker pod {} ***\n{}\n\n'.format(
+                    ti.hostname, str(f)
+                )
         else:
             url = os.path.join(
                 "http://{ti.hostname}:{worker_log_server_port}/log", log_relative_path
diff --git a/tests/kubernetes/test_pod_launcher.py b/tests/kubernetes/test_pod_launcher.py
index 9e0c288..09ba339 100644
--- a/tests/kubernetes/test_pod_launcher.py
+++ b/tests/kubernetes/test_pod_launcher.py
@@ -75,6 +75,62 @@ class TestPodLauncher(unittest.TestCase):
             mock.sentinel
         )
 
+    def test_read_pod_logs_successfully_with_tail_lines(self):
+        mock.sentinel.metadata = mock.MagicMock()
+        self.mock_kube_client.read_namespaced_pod_log.side_effect = [
+            mock.sentinel.logs
+        ]
+        logs = self.pod_launcher.read_pod_logs(mock.sentinel, 100)
+        self.assertEqual(mock.sentinel.logs, logs)
+        self.mock_kube_client.read_namespaced_pod_log.assert_has_calls([
+            mock.call(
+                _preload_content=False,
+                container='base',
+                follow=True,
+                name=mock.sentinel.metadata.name,
+                namespace=mock.sentinel.metadata.namespace,
+                tail_lines=100
+            ),
+        ])
+
+    def test_read_pod_events_successfully_returns_events(self):
+        mock.sentinel.metadata = mock.MagicMock()
+        self.mock_kube_client.list_namespaced_event.return_value = mock.sentinel.events
+        events = self.pod_launcher.read_pod_events(mock.sentinel)
+        self.assertEqual(mock.sentinel.events, events)
+
+    def test_read_pod_events_retries_successfully(self):
+        mock.sentinel.metadata = mock.MagicMock()
+        self.mock_kube_client.list_namespaced_event.side_effect = [
+            BaseHTTPError('Boom'),
+            mock.sentinel.events
+        ]
+        events = self.pod_launcher.read_pod_events(mock.sentinel)
+        self.assertEqual(mock.sentinel.events, events)
+        self.mock_kube_client.list_namespaced_event.assert_has_calls([
+            mock.call(
+                namespace=mock.sentinel.metadata.namespace,
+                field_selector="involvedObject.name={}".format(mock.sentinel.metadata.name)
+            ),
+            mock.call(
+                namespace=mock.sentinel.metadata.namespace,
+                field_selector="involvedObject.name={}".format(mock.sentinel.metadata.name)
+            )
+        ])
+
+    def test_read_pod_events_retries_fails(self):
+        mock.sentinel.metadata = mock.MagicMock()
+        self.mock_kube_client.list_namespaced_event.side_effect = [
+            BaseHTTPError('Boom'),
+            BaseHTTPError('Boom'),
+            BaseHTTPError('Boom')
+        ]
+        self.assertRaises(
+            AirflowException,
+            self.pod_launcher.read_pod_events,
+            mock.sentinel
+        )
+
     def test_read_pod_returns_logs(self):
         mock.sentinel.metadata = mock.MagicMock()
         self.mock_kube_client.read_namespaced_pod.return_value = mock.sentinel.pod_info