You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by as...@apache.org on 2021/06/22 13:46:21 UTC

[airflow] 31/38: Avoid recursing too deep when redacting logs (#16491)

This is an automated email from the ASF dual-hosted git repository.

ash pushed a commit to branch v2-1-test
in repository https://gitbox.apache.org/repos/asf/airflow.git

commit 6a5e6760e8c463b4ae163b2f579262bf1f41a2b0
Author: Tzu-ping Chung <tp...@astronomer.io>
AuthorDate: Thu Jun 17 23:38:04 2021 +0800

    Avoid recursing too deep when redacting logs (#16491)
    
    Fix #16473
    
    (cherry picked from commit 7453d3e81039573f4d621d13439bd6bcc97e6fa5)
---
 airflow/utils/log/secrets_masker.py | 50 +++++++++++++++++++++----------------
 1 file changed, 29 insertions(+), 21 deletions(-)

diff --git a/airflow/utils/log/secrets_masker.py b/airflow/utils/log/secrets_masker.py
index 2fd0d0a..6775b18 100644
--- a/airflow/utils/log/secrets_masker.py
+++ b/airflow/utils/log/secrets_masker.py
@@ -124,6 +124,7 @@ class SecretsMasker(logging.Filter):
     patterns: Set[str]
 
     ALREADY_FILTERED_FLAG = "__SecretsMasker_filtered"
+    MAX_RECURSION_DEPTH = 5
 
     def __init__(self):
         super().__init__()
@@ -169,35 +170,34 @@ class SecretsMasker(logging.Filter):
 
         return True
 
-    def _redact_all(self, item: "RedactableItem") -> "RedactableItem":
-        if isinstance(item, dict):
-            return {dict_key: self._redact_all(subval) for dict_key, subval in item.items()}
-        elif isinstance(item, str):
+    def _redact_all(self, item: "RedactableItem", depth: int) -> "RedactableItem":
+        if depth > self.MAX_RECURSION_DEPTH or isinstance(item, str):
             return '***'
+        if isinstance(item, dict):
+            return {dict_key: self._redact_all(subval, depth + 1) for dict_key, subval in item.items()}
         elif isinstance(item, (tuple, set)):
             # Turn set in to tuple!
-            return tuple(self._redact_all(subval) for subval in item)
+            return tuple(self._redact_all(subval, depth + 1) for subval in item)
         elif isinstance(item, list):
-            return list(self._redact_all(subval) for subval in item)
+            return list(self._redact_all(subval, depth + 1) for subval in item)
         else:
             return item
 
     # pylint: disable=too-many-return-statements
-    def redact(self, item: "RedactableItem", name: str = None) -> "RedactableItem":
-        """
-        Redact an any secrets found in ``item``, if it is a string.
-
-        If ``name`` is given, and it's a "sensitive" name (see
-        :func:`should_hide_value_for_key`) then all string values in the item
-        is redacted.
-
-        """
+    def _redact(self, item: "RedactableItem", name: Optional[str], depth: int) -> "RedactableItem":
+        # Avoid spending too much effort on redacting on deeply nested
+        # structures. This also avoid infinite recursion if a structure has
+        # reference to self.
+        if depth > self.MAX_RECURSION_DEPTH:
+            return item
         try:
             if name and should_hide_value_for_key(name):
-                return self._redact_all(item)
-
+                return self._redact_all(item, depth)
             if isinstance(item, dict):
-                return {dict_key: self.redact(subval, dict_key) for dict_key, subval in item.items()}
+                return {
+                    dict_key: self._redact(subval, name=dict_key, depth=(depth + 1))
+                    for dict_key, subval in item.items()
+                }
             elif isinstance(item, str):
                 if self.replacer:
                     # We can't replace specific values, but the key-based redacting
@@ -207,9 +207,9 @@ class SecretsMasker(logging.Filter):
                 return item
             elif isinstance(item, (tuple, set)):
                 # Turn set in to tuple!
-                return tuple(self.redact(subval) for subval in item)
+                return tuple(self._redact(subval, name=None, depth=(depth + 1)) for subval in item)
             elif isinstance(item, list):
-                return list(self.redact(subval) for subval in item)
+                return [self._redact(subval, name=None, depth=(depth + 1)) for subval in item]
             else:
                 return item
         # I think this should never happen, but it does not hurt to leave it just in case
@@ -223,8 +223,16 @@ class SecretsMasker(logging.Filter):
             )
             return item
 
-    # pylint: enable=too-many-return-statements
+    def redact(self, item: "RedactableItem", name: Optional[str] = None) -> "RedactableItem":
+        """Redact an any secrets found in ``item``, if it is a string.
 
+        If ``name`` is given, and it's a "sensitive" name (see
+        :func:`should_hide_value_for_key`) then all string values in the item
+        is redacted.
+        """
+        return self._redact(item, name, depth=0)
+
+    # pylint: enable=too-many-return-statements
     def add_mask(self, secret: Union[str, dict, Iterable], name: str = None):
         """Add a new secret to be masked to this filter instance."""
         if isinstance(secret, dict):