You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by as...@apache.org on 2022/12/15 10:29:48 UTC

[airflow] branch main updated: Add a new config for adapting masked secrets to make it easier to prevent secret leakage in logs (#28239)

This is an automated email from the ASF dual-hosted git repository.

ash pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 2794662459 Add a new config for adapting masked secrets to make it easier to prevent secret leakage in logs (#28239)
2794662459 is described below

commit 2794662459b6d2f40b55b9b1d39eed14b5c3c5e9
Author: Charles Machalow <cm...@linkedin.com>
AuthorDate: Thu Dec 15 02:29:25 2022 -0800

    Add a new config for adapting masked secrets to make it easier to prevent secret leakage in logs (#28239)
    
    
    Co-authored-by: Jarek Potiuk <ja...@potiuk.com>
    Co-authored-by: Ash Berlin-Taylor <as...@firemirror.com>
---
 airflow/config_templates/config.yml          | 11 ++++++
 airflow/config_templates/default_airflow.cfg |  8 +++++
 airflow/utils/log/secrets_masker.py          | 54 +++++++++++++++++++++++-----
 tests/utils/log/test_secrets_masker.py       | 42 +++++++++++++++++++++-
 4 files changed, 106 insertions(+), 9 deletions(-)

diff --git a/airflow/config_templates/config.yml b/airflow/config_templates/config.yml
index 962de33d9d..21d787cb56 100644
--- a/airflow/config_templates/config.yml
+++ b/airflow/config_templates/config.yml
@@ -720,6 +720,17 @@
       type: string
       example: ~
       default: "airflow.utils.log.timezone_aware.TimezoneAware"
+    - name: secret_mask_adapter
+      description: |
+        An import path to a function to add adaptations of each secret added with
+        `airflow.utils.log.secrets_masker.mask_secret` to be masked in log messages. The given function
+        is expected to require a single parameter: the secret to be adapted. It may return a
+        single adaptation of the secret or an iterable of adaptations to each be masked as secrets.
+        The original secret will be masked as well as any adaptations returned.
+      version_added: 2.6.0
+      type: string
+      default: ""
+      example: "urllib.parse.quote"
     - name: task_log_prefix_template
       description: |
         Specify prefix pattern like mentioned below with stream handler TaskHandlerWithCustomFormatter
diff --git a/airflow/config_templates/default_airflow.cfg b/airflow/config_templates/default_airflow.cfg
index 365de48f50..a3aff78e95 100644
--- a/airflow/config_templates/default_airflow.cfg
+++ b/airflow/config_templates/default_airflow.cfg
@@ -397,6 +397,14 @@ dag_processor_log_target = file
 dag_processor_log_format = [%%(asctime)s] [SOURCE:DAG_PROCESSOR] {{%%(filename)s:%%(lineno)d}} %%(levelname)s - %%(message)s
 log_formatter_class = airflow.utils.log.timezone_aware.TimezoneAware
 
+# An import path to a function to add adaptations of each secret added with
+# `airflow.utils.log.secrets_masker.mask_secret` to be masked in log messages. The given function
+# is expected to require a single parameter: the secret to be adapted. It may return a
+# single adaptation of the secret or an iterable of adaptations to each be masked as secrets.
+# The original secret will be masked as well as any adaptations returned.
+# Example: secret_mask_adapter = urllib.parse.quote
+secret_mask_adapter =
+
 # Specify prefix pattern like mentioned below with stream handler TaskHandlerWithCustomFormatter
 # Example: task_log_prefix_template = {{ti.dag_id}}-{{ti.task_id}}-{{execution_date}}-{{try_number}}
 task_log_prefix_template =
diff --git a/airflow/utils/log/secrets_masker.py b/airflow/utils/log/secrets_masker.py
index 7983fc836a..23a57b45d5 100644
--- a/airflow/utils/log/secrets_masker.py
+++ b/airflow/utils/log/secrets_masker.py
@@ -21,7 +21,7 @@ import collections
 import logging
 import re
 import sys
-from typing import Any, Dict, Iterable, List, TextIO, Tuple, TypeVar, Union
+from typing import Any, Callable, Dict, Generator, Iterable, List, TextIO, Tuple, TypeVar, Union
 
 from airflow import settings
 from airflow.compat.functools import cache, cached_property
@@ -240,21 +240,59 @@ class SecretsMasker(logging.Filter):
         """
         return self._redact(item, name, depth=0)
 
-    def add_mask(self, secret: str | dict | Iterable, name: str | None = None):
-        """Add a new secret to be masked to this filter instance."""
+    @cached_property
+    def _mask_adapter(self) -> None | Callable:
+        """Pulls the secret mask adapter from config.
+
+        This lives in a function here to be cached and only hit the config once.
+        """
+        from airflow.configuration import conf
+
+        return conf.getimport("logging", "secret_mask_adapter", fallback=None)
+
+    @cached_property
+    def _test_mode(self) -> bool:
+        """Pulls the unit test mode flag from config.
+
+        This lives in a function here to be cached and only hit the config once.
+        """
         from airflow.configuration import conf
 
-        test_mode: bool = conf.getboolean("core", "unit_test_mode")
+        return conf.getboolean("core", "unit_test_mode")
+
+    def _adaptations(self, secret: str) -> Generator[str, None, None]:
+        """Yields the secret along with any adaptations to the secret that should be masked."""
+        yield secret
+
+        if self._mask_adapter:
+            # This can return an iterable of secrets to mask OR a single secret as a string
+            secret_or_secrets = self._mask_adapter(secret)
+            if not isinstance(secret_or_secrets, str):
+                # if its not a string, it must be an iterable
+                yield from secret_or_secrets
+            else:
+                yield secret_or_secrets
+
+    def add_mask(self, secret: str | dict | Iterable, name: str | None = None):
+        """Add a new secret to be masked to this filter instance."""
         if isinstance(secret, dict):
             for k, v in secret.items():
                 self.add_mask(v, k)
         elif isinstance(secret, str):
-            if not secret or (test_mode and secret in SECRETS_TO_SKIP_MASKING_FOR_TESTS):
+            if not secret or (self._test_mode and secret in SECRETS_TO_SKIP_MASKING_FOR_TESTS):
                 return
-            pattern = re.escape(secret)
-            if pattern not in self.patterns and (not name or should_hide_value_for_key(name)):
-                self.patterns.add(pattern)
+
+            new_mask = False
+            for s in self._adaptations(secret):
+                if s:
+                    pattern = re.escape(s)
+                    if pattern not in self.patterns and (not name or should_hide_value_for_key(name)):
+                        self.patterns.add(pattern)
+                        new_mask = True
+
+            if new_mask:
                 self.replacer = re.compile("|".join(self.patterns))
+
         elif isinstance(secret, collections.abc.Iterable):
             for v in secret:
                 self.add_mask(v, name)
diff --git a/tests/utils/log/test_secrets_masker.py b/tests/utils/log/test_secrets_masker.py
index 9946694d43..16cec002e2 100644
--- a/tests/utils/log/test_secrets_masker.py
+++ b/tests/utils/log/test_secrets_masker.py
@@ -24,11 +24,12 @@ import logging.config
 import os
 import sys
 import textwrap
+from unittest.mock import patch
 
 import pytest
 
 from airflow import settings
-from airflow.utils.log.secrets_masker import RedactedIO, SecretsMasker, should_hide_value_for_key
+from airflow.utils.log.secrets_masker import RedactedIO, SecretsMasker, mask_secret, should_hide_value_for_key
 from tests.test_utils.config import conf_vars
 
 settings.MASK_SECRETS_IN_LOGS = True
@@ -325,6 +326,13 @@ def lineno():
 
 
 class TestRedactedIO:
+    @pytest.fixture(scope="class", autouse=True)
+    def reset_secrets_masker(self):
+        self.secrets_masker = SecretsMasker()
+        with patch("airflow.utils.log.secrets_masker._secrets_masker", return_value=self.secrets_masker):
+            mask_secret(p)
+            yield
+
     def test_redacts_from_print(self, capsys):
         # Without redacting, password is printed.
         print(p)
@@ -352,3 +360,35 @@ class TestRedactedIO:
         monkeypatch.setattr(sys, "stdin", io.StringIO("a\n"))
         with contextlib.redirect_stdout(RedactedIO()):
             assert input() == "a"
+
+
+class TestMaskSecretAdapter:
+    @pytest.fixture(scope="function", autouse=True)
+    def reset_secrets_masker_and_skip_escape(self):
+        self.secrets_masker = SecretsMasker()
+        with patch("airflow.utils.log.secrets_masker._secrets_masker", return_value=self.secrets_masker):
+            with patch("airflow.utils.log.secrets_masker.re.escape", lambda x: x):
+                yield
+
+    def test_calling_mask_secret_adds_adaptations_for_returned_str(self):
+        with conf_vars({("logging", "secret_mask_adapter"): "urllib.parse.quote"}):
+            mask_secret("secret<>&", None)
+
+        assert self.secrets_masker.patterns == {"secret%3C%3E%26", "secret<>&"}
+
+    def test_calling_mask_secret_adds_adaptations_for_returned_iterable(self):
+        with conf_vars({("logging", "secret_mask_adapter"): "urllib.parse.urlparse"}):
+            mask_secret("https://airflow.apache.org/docs/apache-airflow/stable", "password")
+
+        assert self.secrets_masker.patterns == {
+            "https",
+            "airflow.apache.org",
+            "/docs/apache-airflow/stable",
+            "https://airflow.apache.org/docs/apache-airflow/stable",
+        }
+
+    def test_calling_mask_secret_not_set(self):
+        with conf_vars({("logging", "secret_mask_adapter"): None}):
+            mask_secret("a secret")
+
+        assert self.secrets_masker.patterns == {"a secret"}