You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by GitBox <gi...@apache.org> on 2019/10/21 03:14:39 UTC

[GitHub] [airflow] dstandish commented on a change in pull request #6376: [WIP] [AIRFLOW-5705] Add creds backend classes including AWS SSM

dstandish commented on a change in pull request #6376: [WIP] [AIRFLOW-5705] Add creds backend classes including AWS SSM
URL: https://github.com/apache/airflow/pull/6376#discussion_r336825455
 
 

 ##########
 File path: airflow/creds.py
 ##########
 @@ -0,0 +1,171 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Classes for obtaining creds in order to produce connection objects.
+"""
+import os
+from abc import ABC, abstractmethod
+from typing import List
+
+import boto3
+
+from airflow.configuration import DEFAULT_CONFIG, AirflowConfigParser, parameterized_config
+from airflow.hooks.base_hook import CONN_ENV_PREFIX
+from airflow.models import Connection
+from airflow.utils.db import provide_session
+
+conf = AirflowConfigParser(default_config=parameterized_config(DEFAULT_CONFIG))
+
+
+class BaseCredsBackend(ABC):
+    """
+    Abstract base class to provide connection objects given a conn_id
+    """
+
+    @abstractmethod
+    def get_connections(self, conn_id) -> List[Connection]:
+        """
+        Get list of connection objects
+
+        :param conn_id:
+        :return:
+        """
+
+
+class EnvCredsBackend(BaseCredsBackend):
+    """
+    Retrieves Connection object from environment variable.
+    """
+
+    # pylint: disable=missing-docstring
+    def get_connections(self, conn_id) -> List[Connection]:
+        environment_uri = os.environ.get(CONN_ENV_PREFIX + conn_id.upper())
+        conn = Connection(conn_id=conn_id, uri=environment_uri)
+        return [conn]
+
+
+class MetastoreCredsBackend(BaseCredsBackend):
+    """
+    Retrieves Connection object from airflow metastore database.
+    """
+
+    # pylint: disable=missing-docstring
+    @provide_session
+    def get_connections(self, conn_id, session=None) -> List[Connection]:
+        conn_list = (
+            session.query(Connection).filter(Connection.conn_id == conn_id).all()
+        )
+        session.expunge_all()
+        return conn_list
+
+
+class AwsSsmCredsBackend(BaseCredsBackend):
+    """
+    Retrieves Connection object from AWS SSM Parameter Store
+
+    Configurable via airflow config like so:
+
+    .. code-block:: ini
+
+        [aws_ssm_creds]
+        ssm_prefix = /airflow
+        profile_name = default
+
+    For example, if ssm path is ``/airflow/AIRFLOW_CONN_SMTP_DEFAULT``, this would be accessible if you
+    provide ``ssm_prefix = /airflow`` and conn_id ``smtp_default``.
+
+    """
+
+    conf_section = "aws_ssm_creds"
+    conf_key_ssm_prefix = "ssm_prefix"
+    default_prefix = "/airflow"
+    conf_key_profile_name = "profile_name"
+    default_profile = "default"
+
+    def __init__(self, *args, **kwargs):
+        pass
+
+    @property
+    def ssm_prefix(self) -> str:
+        """
+        Gets ssm prefix from conf.
+
+        Ensures that there is no trailing slash.
+
+        :return:
+        """
+        ssm_prefix = conf.get(
+            section=self.conf_section,
+            key=self.conf_key_ssm_prefix,
+            fallback=self.default_prefix,
+        )
+        if ssm_prefix[-1] == "/":
+            ssm_prefix = ssm_prefix[:-1]
+        return ssm_prefix
+
+    @property
+    def aws_profile_name(self) -> str:
+        """
+        Gets AWS profile to use from conf.
+
+        :return:
+        """
+        profile_name = conf.get(
+            section=self.conf_section,
+            key=self.conf_key_profile_name,
+            fallback=self.default_profile,
+        )
+        return profile_name
+
+    def build_ssm_path(self, conn_id: str):
+        """
+        Given conn_id, build SSM path.
+
+        Assumes connection params use same naming convention as env vars, but may have arbitrary prefix.
+
+        :param conn_id:
+        :return:
+        """
+        param_name = (CONN_ENV_PREFIX + conn_id).upper()
+        param_path = self.ssm_prefix + "/" + param_name
+        return param_path
+
+    def get_conn_uri(self, conn_id):
+        """
+        Get param value
+
+        :param conn_id:
+        :return:
+        """
+        session = boto3.Session(profile_name=self.aws_profile_name)
+        client = session.client("ssm")
+        response = client.get_parameter(
+            Name=self.build_ssm_path(conn_id=conn_id), WithDecryption=True
+        )
+        value = response["Parameter"]["Value"]
+        return value
+
+    def get_connections(self, conn_id) -> List[Connection]:
+        """
+        Create connection object.
+
+        :param conn_id:
+        :return:
+        """
+        conn_uri = self.get_conn_uri(conn_id=conn_id)
 
 Review comment:
   I haven't encountered any problem with URI format so it did not occur to me to change it.  It's used currently for both env var conn def and `airflow connections -a` so it seemed natural to keep it.
   
   But I like your idea.  If we are storing elsewhere, and building separate "backend", we don't have to adhere to same format.
   
   Here's an argument for keeping with the URI format.  Keeping the same URI format means that it would be easier to change from one creds provider to another.  If you are using env vars now, it would be extremely easy to switch to SSM, or vice versa if you want to switch back, or override something locally.
   
   But maybe this question should be considered separately, in PRs for individual providers e.g. AWS SSM or whatever.
   
   I suppose the first step is to agree on how to structure the backend framework (e.g. what is the right base class, or what functions should be in a creds provider implementation).  Then the individual details for specific creds providers could be handled separately, or implemented arbitrarily in accordance with the preferences of individual users.  
   
   Do you think it's a good idea for me to remove the SSM piece from this PR so this is more focused on the creds provider abstraction?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services