You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by ep...@apache.org on 2022/03/22 17:51:30 UTC

[airflow] 15/30: Filter out default configs when overrides exist. (#21539)

This is an automated email from the ASF dual-hosted git repository.

ephraimanierobi pushed a commit to branch v2-2-test
in repository https://gitbox.apache.org/repos/asf/airflow.git

commit d88d9f736c334a88aba1cedf52955d4c58652854
Author: Xiao Yu <me...@xyu.io>
AuthorDate: Tue Mar 15 18:06:50 2022 +0000

    Filter out default configs when overrides exist. (#21539)
    
    * Filter out default configs when overrides exist.
    
    When sending configs to Airflow workers we materialize a temp config file. In #18772 a feature was added so that `_cmd` generated secrets are not written to the files in some cases instead favoring maintaining the raw `_cmd` settings. Unfortunately during materializing of the configs via `as_dict()` Airflow defaults are generated and materialized as well including defaults for the non `_cmd` versions of some settings. And due to Airflow setting precedence stating bare versions of sett [...]
    https://airflow.apache.org/docs/apache-airflow/stable/howto/set-config.html
    
    This change checks `_cmd`, env, and secrets when materializing configs via `as_dict()` so that if the bare versions of the values is exactly the same as Airflow defaults and we have "hidden" / special versions of these configs that are trying to be set we remove the bare versions so that the correct version can be used.
    
    Fixes: #20092
    Related to: #18772 #4050
    
    (cherry picked from commit e07bc63ec0e5b679c87de8e8d4cdff1cf4671146)
---
 airflow/configuration.py                 | 58 ++++++++++++++++++++++++++++++++
 docs/apache-airflow/howto/set-config.rst |  4 +++
 tests/core/test_configuration.py         | 46 +++++++++++++++++++++++++
 3 files changed, 108 insertions(+)

diff --git a/airflow/configuration.py b/airflow/configuration.py
index e120b26..88587f3 100644
--- a/airflow/configuration.py
+++ b/airflow/configuration.py
@@ -581,6 +581,15 @@ class AirflowConfigParser(ConfigParser):
         """
         Returns the current configuration as an OrderedDict of OrderedDicts.
 
+        When materializing current configuration Airflow defaults are
+        materialized along with user set configs. If any of the `include_*`
+        options are False then the result of calling command or secret key
+        configs do not override Airflow defaults and instead are passed through.
+        In order to then avoid Airflow defaults from overwriting user set
+        command or secret key configs we filter out bare sensitive_config_values
+        that are set to Airflow defaults when command or secret key configs
+        produce different values.
+
         :param display_source: If False, the option value is returned. If True,
             a tuple of (option_value, source) is returned. Source is either
             'airflow.cfg', 'default', 'env var', or 'cmd'.
@@ -618,14 +627,21 @@ class AirflowConfigParser(ConfigParser):
         # add env vars and overwrite because they have priority
         if include_env:
             self._include_envs(config_sources, display_sensitive, display_source, raw)
+        else:
+            self._filter_by_source(config_sources, display_source, self._get_env_var_option)
 
         # add bash commands
         if include_cmds:
             self._include_commands(config_sources, display_sensitive, display_source, raw)
+        else:
+            self._filter_by_source(config_sources, display_source, self._get_cmd_option)
 
         # add config from secret backends
         if include_secret:
             self._include_secrets(config_sources, display_sensitive, display_source, raw)
+        else:
+            self._filter_by_source(config_sources, display_source, self._get_secret_option)
+
         return config_sources
 
     def _include_secrets(self, config_sources, display_sensitive, display_source, raw):
@@ -683,6 +699,48 @@ class AirflowConfigParser(ConfigParser):
                 key = key.lower()
             config_sources.setdefault(section, OrderedDict()).update({key: opt})
 
+    def _filter_by_source(self, config_sources, display_source, getter_func):
+        """
+        Deletes default configs from current configuration (an OrderedDict of
+        OrderedDicts) if it would conflict with special sensitive_config_values.
+
+        This is necessary because bare configs take precedence over the command
+        or secret key equivalents so if the current running config is
+        materialized with Airflow defaults they in turn override user set
+        command or secret key configs.
+
+        :param config_sources: The current configuration to operate on
+        :param display_source: If False, configuration options contain raw
+            values. If True, options are a tuple of (option_value, source).
+            Source is either 'airflow.cfg', 'default', 'env var', or 'cmd'.
+        :param getter_func: A callback function that gets the user configured
+            override value for a particular sensitive_config_values config.
+        :rtype: None
+        :return: None, the given config_sources is filtered if necessary,
+            otherwise untouched.
+        """
+        for (section, key) in self.sensitive_config_values:
+            # Don't bother if we don't have section / key
+            if section not in config_sources or key not in config_sources[section]:
+                continue
+            # Check that there is something to override defaults
+            try:
+                getter_opt = getter_func(section, key)
+            except ValueError:
+                continue
+            if not getter_opt:
+                continue
+            # Check to see that there is a default value
+            if not self.airflow_defaults.has_option(section, key):
+                continue
+            # Check to see if bare setting is the same as defaults
+            if display_source:
+                opt, source = config_sources[section][key]
+            else:
+                opt = config_sources[section][key]
+            if opt == self.airflow_defaults.get(section, key):
+                del config_sources[section][key]
+
     @staticmethod
     def _replace_config_with_display_sources(config_sources, configs, display_source, raw):
         for (source_name, config) in configs:
diff --git a/docs/apache-airflow/howto/set-config.rst b/docs/apache-airflow/howto/set-config.rst
index 03cf0de..ad25c1d 100644
--- a/docs/apache-airflow/howto/set-config.rst
+++ b/docs/apache-airflow/howto/set-config.rst
@@ -100,6 +100,10 @@ The universal order of precedence for all configuration options is as follows:
 #. secret key in ``airflow.cfg``
 #. Airflow's built in defaults
 
+.. note::
+    For Airflow versions >= 2.2.1, < 2.3.0 Airflow's built in defaults took precedence
+    over command and secret key in ``airflow.cfg`` in some circumstances.
+
 You can check the current configuration with the ``airflow config list`` command.
 
 If you only want to see the value for one option, you can use ``airflow config get-value`` command as in
diff --git a/tests/core/test_configuration.py b/tests/core/test_configuration.py
index da1d736..a064c48 100644
--- a/tests/core/test_configuration.py
+++ b/tests/core/test_configuration.py
@@ -15,6 +15,7 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+import copy
 import io
 import os
 import re
@@ -720,3 +721,48 @@ notacommand = OK
             "CRITICAL, FATAL, ERROR, WARN, WARNING, INFO, DEBUG."
         )
         assert message == exception
+
+    def test_as_dict_works_without_sensitive_cmds(self):
+        conf_materialize_cmds = conf.as_dict(display_sensitive=True, raw=True, include_cmds=True)
+        conf_maintain_cmds = conf.as_dict(display_sensitive=True, raw=True, include_cmds=False)
+
+        assert 'sql_alchemy_conn' in conf_materialize_cmds['core']
+        assert 'sql_alchemy_conn_cmd' not in conf_materialize_cmds['core']
+
+        assert 'sql_alchemy_conn' in conf_maintain_cmds['core']
+        assert 'sql_alchemy_conn_cmd' not in conf_maintain_cmds['core']
+
+        assert (
+            conf_materialize_cmds['core']['sql_alchemy_conn']
+            == conf_maintain_cmds['core']['sql_alchemy_conn']
+        )
+
+    def test_as_dict_respects_sensitive_cmds(self):
+        conf_conn = conf['core']['sql_alchemy_conn']
+        test_conf = copy.deepcopy(conf)
+        test_conf.read_string(
+            textwrap.dedent(
+                """
+                [core]
+                sql_alchemy_conn_cmd = echo -n my-super-secret-conn
+                """
+            )
+        )
+
+        conf_materialize_cmds = test_conf.as_dict(display_sensitive=True, raw=True, include_cmds=True)
+        conf_maintain_cmds = test_conf.as_dict(display_sensitive=True, raw=True, include_cmds=False)
+
+        assert 'sql_alchemy_conn' in conf_materialize_cmds['core']
+        assert 'sql_alchemy_conn_cmd' not in conf_materialize_cmds['core']
+
+        if conf_conn == test_conf.airflow_defaults['core']['sql_alchemy_conn']:
+            assert conf_materialize_cmds['core']['sql_alchemy_conn'] == 'my-super-secret-conn'
+
+        assert 'sql_alchemy_conn_cmd' in conf_maintain_cmds['core']
+        assert conf_maintain_cmds['core']['sql_alchemy_conn_cmd'] == 'echo -n my-super-secret-conn'
+
+        if conf_conn == test_conf.airflow_defaults['core']['sql_alchemy_conn']:
+            assert 'sql_alchemy_conn' not in conf_maintain_cmds['core']
+        else:
+            assert 'sql_alchemy_conn' in conf_maintain_cmds['core']
+            assert conf_maintain_cmds['core']['sql_alchemy_conn'] == conf_conn