You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by po...@apache.org on 2020/12/24 13:21:10 UTC

[airflow] branch master updated: Decode Remote Google Logs (#13115)

This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/master by this push:
     new e9d65bd  Decode Remote Google Logs (#13115)
e9d65bd is described below

commit e9d65bd4582b083914f2fc1213bea44cf41d1a08
Author: Kevin Yuen <ke...@delphia.com>
AuthorDate: Thu Dec 24 08:21:01 2020 -0500

    Decode Remote Google Logs (#13115)
    
    * decode remote google logs before returning
    
    The `Blob.download_as_string` function returns a byte which cause
    the log result to be displayed in a single line like:
    
    b"line1\nline2"
    
    instead of
    
    line1
    line2
    
    added an isinstance check to make sure it doesn't break if it
    returns string in some case and not others
---
 .../providers/google/cloud/log/gcs_task_handler.py |  4 +--
 .../google/cloud/log/test_gcs_task_handler.py      | 34 +++++++++++++---------
 2 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/airflow/providers/google/cloud/log/gcs_task_handler.py b/airflow/providers/google/cloud/log/gcs_task_handler.py
index 9fd456d..b09c713 100644
--- a/airflow/providers/google/cloud/log/gcs_task_handler.py
+++ b/airflow/providers/google/cloud/log/gcs_task_handler.py
@@ -152,7 +152,7 @@ class GCSTaskHandler(FileTaskHandler, LoggingMixin):
 
         try:
             blob = storage.Blob.from_string(remote_loc, self.client)
-            remote_log = blob.download_as_string()
+            remote_log = blob.download_as_bytes().decode()
             log = f'*** Reading remote log from {remote_loc}.\n{remote_log}\n'
             return log, {'end_of_log': True}
         except Exception as e:  # pylint: disable=broad-except
@@ -174,7 +174,7 @@ class GCSTaskHandler(FileTaskHandler, LoggingMixin):
         """
         try:
             blob = storage.Blob.from_string(remote_log_location, self.client)
-            old_log = blob.download_as_string()
+            old_log = blob.download_as_bytes().decode()
             log = '\n'.join([old_log, log]) if old_log else log
         except Exception as e:  # pylint: disable=broad-except
             if not hasattr(e, 'resp') or e.resp.get('status') != '404':  # pylint: disable=no-member
diff --git a/tests/providers/google/cloud/log/test_gcs_task_handler.py b/tests/providers/google/cloud/log/test_gcs_task_handler.py
index 53437b2..2f0a746 100644
--- a/tests/providers/google/cloud/log/test_gcs_task_handler.py
+++ b/tests/providers/google/cloud/log/test_gcs_task_handler.py
@@ -75,7 +75,7 @@ class TestGCSTaskHandler(unittest.TestCase):
     @mock.patch("google.cloud.storage.Client")
     @mock.patch("google.cloud.storage.Blob")
     def test_should_read_logs_from_remote(self, mock_blob, mock_client, mock_creds):
-        mock_blob.from_string.return_value.download_as_string.return_value = "CONTENT"
+        mock_blob.from_string.return_value.download_as_bytes.return_value = b"CONTENT"
 
         logs, metadata = self.gcs_task_handler._read(self.ti, self.ti.try_number)
         mock_blob.from_string.assert_called_once_with(
@@ -94,7 +94,7 @@ class TestGCSTaskHandler(unittest.TestCase):
     @mock.patch("google.cloud.storage.Client")
     @mock.patch("google.cloud.storage.Blob")
     def test_should_read_from_local(self, mock_blob, mock_client, mock_creds):
-        mock_blob.from_string.return_value.download_as_string.side_effect = Exception("Failed to connect")
+        mock_blob.from_string.return_value.download_as_bytes.side_effect = Exception("Failed to connect")
 
         self.gcs_task_handler.set_context(self.ti)
         log, metadata = self.gcs_task_handler._read(self.ti, self.ti.try_number)
@@ -116,7 +116,7 @@ class TestGCSTaskHandler(unittest.TestCase):
     @mock.patch("google.cloud.storage.Client")
     @mock.patch("google.cloud.storage.Blob")
     def test_write_to_remote_on_close(self, mock_blob, mock_client, mock_creds):
-        mock_blob.from_string.return_value.download_as_string.return_value = "CONTENT"
+        mock_blob.from_string.return_value.download_as_bytes.return_value = b"CONTENT"
 
         self.gcs_task_handler.set_context(self.ti)
         self.gcs_task_handler.emit(
@@ -135,7 +135,7 @@ class TestGCSTaskHandler(unittest.TestCase):
         mock_blob.assert_has_calls(
             [
                 mock.call.from_string("gs://bucket/remote/log/location/1.log", mock_client.return_value),
-                mock.call.from_string().download_as_string(),
+                mock.call.from_string().download_as_bytes(),
                 mock.call.from_string("gs://bucket/remote/log/location/1.log", mock_client.return_value),
                 mock.call.from_string().upload_from_string("CONTENT\nMESSAGE\n", content_type="text/plain"),
             ],
@@ -152,17 +152,26 @@ class TestGCSTaskHandler(unittest.TestCase):
     @mock.patch("google.cloud.storage.Blob")
     def test_failed_write_to_remote_on_close(self, mock_blob, mock_client, mock_creds):
         mock_blob.from_string.return_value.upload_from_string.side_effect = Exception("Failed to connect")
-        mock_blob.from_string.return_value.download_as_string.return_value = b"Old log"
+        mock_blob.from_string.return_value.download_as_bytes.return_value = b"Old log"
 
         self.gcs_task_handler.set_context(self.ti)
+        self.gcs_task_handler.emit(
+            logging.LogRecord(
+                name="NAME",
+                level="DEBUG",
+                pathname=None,
+                lineno=None,
+                msg="MESSAGE",
+                args=None,
+                exc_info=None,
+            )
+        )
         with self.assertLogs(self.gcs_task_handler.log) as cm:
             self.gcs_task_handler.close()
 
         self.assertEqual(
             cm.output,
             [
-                'INFO:airflow.providers.google.cloud.log.gcs_task_handler.GCSTaskHandler:Previous '
-                'log discarded: sequence item 0: expected str instance, bytes found',
                 'ERROR:airflow.providers.google.cloud.log.gcs_task_handler.GCSTaskHandler:Could '
                 'not write logs to gs://bucket/remote/log/location/1.log: Failed to connect',
             ],
@@ -170,12 +179,9 @@ class TestGCSTaskHandler(unittest.TestCase):
         mock_blob.assert_has_calls(
             [
                 mock.call.from_string("gs://bucket/remote/log/location/1.log", mock_client.return_value),
-                mock.call.from_string().download_as_string(),
+                mock.call.from_string().download_as_bytes(),
                 mock.call.from_string("gs://bucket/remote/log/location/1.log", mock_client.return_value),
-                mock.call.from_string().upload_from_string(
-                    "*** Previous log discarded: sequence item 0: expected str instance, bytes found\n\n",
-                    content_type="text/plain",
-                ),
+                mock.call.from_string().upload_from_string("Old log\nMESSAGE\n", content_type="text/plain"),
             ],
             any_order=False,
         )
@@ -187,7 +193,7 @@ class TestGCSTaskHandler(unittest.TestCase):
     @mock.patch("google.cloud.storage.Client")
     @mock.patch("google.cloud.storage.Blob")
     def test_write_to_remote_on_close_failed_read_old_logs(self, mock_blob, mock_client, mock_creds):
-        mock_blob.from_string.return_value.download_as_string.side_effect = Exception("Fail to download")
+        mock_blob.from_string.return_value.download_as_bytes.side_effect = Exception("Fail to download")
 
         self.gcs_task_handler.set_context(self.ti)
         self.gcs_task_handler.emit(
@@ -206,7 +212,7 @@ class TestGCSTaskHandler(unittest.TestCase):
         mock_blob.assert_has_calls(
             [
                 mock.call.from_string("gs://bucket/remote/log/location/1.log", mock_client.return_value),
-                mock.call.from_string().download_as_string(),
+                mock.call.from_string().download_as_bytes(),
                 mock.call.from_string("gs://bucket/remote/log/location/1.log", mock_client.return_value),
                 mock.call.from_string().upload_from_string(
                     "*** Previous log discarded: Fail to download\n\nMESSAGE\n", content_type="text/plain"