You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by po...@apache.org on 2022/07/04 22:42:03 UTC

[airflow] branch main updated: Update AWS Connection docs and deprecate some extras (#24670)

This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new c61f86dde5 Update AWS Connection docs and deprecate some extras (#24670)
c61f86dde5 is described below

commit c61f86dde522e2c9c997b2f22a6169deb40af0e0
Author: Andrey Anshin <An...@taragol.is>
AuthorDate: Tue Jul 5 02:41:40 2022 +0400

    Update AWS Connection docs and deprecate some extras (#24670)
    
    * Update AWS Connection docs and deprecate some extras
    
    * Update docs and deprecated legacy local credentials file
---
 airflow/providers/amazon/aws/hooks/base_aws.py     | 45 +++++++++++++++----
 .../connections/aws.rst                            | 52 +++++++++++++++-------
 2 files changed, 74 insertions(+), 23 deletions(-)

diff --git a/airflow/providers/amazon/aws/hooks/base_aws.py b/airflow/providers/amazon/aws/hooks/base_aws.py
index 28135b0fa4..a69798990f 100644
--- a/airflow/providers/amazon/aws/hooks/base_aws.py
+++ b/airflow/providers/amazon/aws/hooks/base_aws.py
@@ -84,6 +84,15 @@ class BaseSessionFactory(LoggingMixin):
                 self.extra_config["session_kwargs"],
             )
             session_kwargs = self.extra_config["session_kwargs"]
+
+        if "profile" in self.extra_config and "s3_config_file" not in self.extra_config:
+            if "profile_name" not in session_kwargs:
+                self.log.warning(
+                    "Found 'profile' without specifying 's3_config_file'. "
+                    "If required profile from AWS Shared Credentials please "
+                    "set 'profile_name' in extra 'session_kwargs'."
+                )
+
         self.basic_session = self._create_basic_session(session_kwargs=session_kwargs)
         self.role_arn = self._read_role_arn_from_extra_config()
         # If role_arn was specified then STS + assume_role
@@ -191,6 +200,12 @@ class BaseSessionFactory(LoggingMixin):
         role_arn = self.extra_config.get("role_arn")
         if role_arn is None and aws_account_id is not None and aws_iam_role is not None:
             self.log.info("Constructing role_arn from aws_account_id and aws_iam_role")
+            warnings.warn(
+                "Constructing 'role_arn' from 'aws_account_id' and 'aws_iam_role' is deprecated and "
+                "will be removed in a future releases. Please set 'role_arn' in extra config.",
+                DeprecationWarning,
+                stacklevel=3,
+            )
             role_arn = f"arn:aws:iam::{aws_account_id}:role/{aws_iam_role}"
         self.log.debug("role_arn is %s", role_arn)
         return role_arn
@@ -207,6 +222,12 @@ class BaseSessionFactory(LoggingMixin):
             aws_secret_access_key = self.extra_config["aws_secret_access_key"]
             self.log.info("Credentials retrieved from extra_config")
         elif "s3_config_file" in self.extra_config:
+            warnings.warn(
+                "Use local credentials file is never documented and well tested. "
+                "Obtain credentials by this way deprecated and will be removed in a future releases.",
+                DeprecationWarning,
+                stacklevel=3,
+            )
             aws_access_key_id, aws_secret_access_key = _parse_s3_config(
                 self.extra_config["s3_config_file"],
                 self.extra_config.get("s3_config_format"),
@@ -220,7 +241,13 @@ class BaseSessionFactory(LoggingMixin):
 
     def _assume_role(self, sts_client: boto3.client) -> Dict:
         assume_role_kwargs = self.extra_config.get("assume_role_kwargs", {})
-        if "external_id" in self.extra_config:  # Backwards compatibility
+        if "ExternalId" not in assume_role_kwargs and "external_id" in self.extra_config:
+            warnings.warn(
+                "'external_id' in extra config is deprecated and will be removed in a future releases. "
+                "Set 'ExternalId' in 'assume_role_kwargs' in extra config.",
+                DeprecationWarning,
+                stacklevel=3,
+            )
             assume_role_kwargs["ExternalId"] = self.extra_config.get("external_id")
         role_session_name = self._strip_invalid_session_name_characters(f"Airflow_{self.conn.conn_id}")
         self.log.debug(
@@ -379,13 +406,13 @@ class AwsGenericHook(BaseHook, Generic[BaseAwsConnection]):
         running Airflow in a distributed manner and aws_conn_id is None or
         empty, then default boto3 configuration would be used (and must be
         maintained on each worker node).
-    :param verify: Whether or not to verify SSL certificates.
+    :param verify: Whether or not to verify SSL certificates. See:
         https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
     :param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
     :param client_type: boto3.client client_type. Eg 's3', 'emr' etc
     :param resource_type: boto3.resource resource_type. Eg 'dynamodb' etc
-    :param config: Configuration for botocore client.
-        (https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html)
+    :param config: Configuration for botocore client. See:
+        https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
     """
 
     conn_name_attr = 'aws_conn_id'
@@ -424,7 +451,7 @@ class AwsGenericHook(BaseHook, Generic[BaseAwsConnection]):
             extra_config = connection_object.extra_dejson
             endpoint_url = extra_config.get("host")
 
-            # https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html#botocore.config.Config
+            # https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
             if "config_kwargs" in extra_config:
                 self.log.debug(
                     "Retrieving config_kwargs from Connection.extra_config['config_kwargs']: %s",
@@ -439,9 +466,11 @@ class AwsGenericHook(BaseHook, Generic[BaseAwsConnection]):
             return session, endpoint_url
 
         except AirflowException:
-            self.log.warning("Unable to use Airflow Connection for credentials.")
-            self.log.debug("Fallback on boto3 credential strategy")
-            # http://boto3.readthedocs.io/en/latest/guide/configuration.html
+            self.log.warning(
+                "Unable to use Airflow Connection for credentials. "
+                "Fallback on boto3 credential strategy. See: "
+                "https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html"
+            )
 
         self.log.debug(
             "Creating session using boto3 credential strategy region_name=%s",
diff --git a/docs/apache-airflow-providers-amazon/connections/aws.rst b/docs/apache-airflow-providers-amazon/connections/aws.rst
index 144eeb270b..f78685d902 100644
--- a/docs/apache-airflow-providers-amazon/connections/aws.rst
+++ b/docs/apache-airflow-providers-amazon/connections/aws.rst
@@ -51,33 +51,53 @@ Configuring the Connection
 
 Login (optional)
     Specify the AWS access key ID used for the initial connection.
-    If you do an *assume_role* by specifying a ``role_arn`` in the **Extra** field,
+    If you do an `assume role <https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html>`__
+    by specifying a ``role_arn`` in the **Extra** field,
     then temporary credentials will be used for subsequent calls to AWS.
 
 Password (optional)
     Specify the AWS secret access key used for the initial connection.
-    If you do an *assume_role* by specifying a ``role_arn`` in the **Extra** field,
+    If you do an `assume role <https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html>`__
+    by specifying a ``role_arn`` in the **Extra** field,
     then temporary credentials will be used for subsequent calls to AWS.
 
 Extra (optional)
     Specify the extra parameters (as json dictionary) that can be used in AWS
     connection. The following parameters are all optional:
 
-    * ``aws_session_token``: AWS session token used for the initial connection if you use external credentials. You are responsible for renewing these.
-
-    * ``role_arn``: If specified, then an *assume_role* will be done to this role.
-    * ``aws_account_id``: Used to construct ``role_arn`` if it was not specified.
-    * ``aws_iam_role``: Used to construct ``role_arn`` if it was not specified.
-    * ``assume_role_kwargs``: Additional ``kwargs`` passed to *assume_role*.
-
+    * ``aws_access_key_id``: AWS access key ID used for the initial connection.
+    * ``aws_secret_access_key``: AWS secret access key used for the initial connection
+    * ``aws_session_token``: AWS session token used for the initial connection if you use external credentials.
+      You are responsible for renewing these.
+    * ``region_name``: AWS Region for the connection.
+    * ``session_kwargs``: Additional **kwargs** passed to
+      `boto3.session.Session <https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html>`__.
+    * ``config_kwargs``: Additional **kwargs** used to construct a
+      `botocore.config.Config <https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html>`__
+      passed to `boto3.client <https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html#boto3.session.Session.client>`__
+      and `boto3.resource <https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html#boto3.session.Session.resource>`__.
+    * ``role_arn``: If specified, then assume this role, obtaining a set of temporary security credentials using the ``assume_role_method``.
+    * ``assume_role_method``: AWS STS client method, one of
+      `assume_role <https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html>`__,
+      `assume_role_with_saml <https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithSAML.html>`__ or
+      `assume_role_with_web_identity <https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html>`__
+      if not specified then **assume_role** is used.
+    * ``assume_role_kwargs``: Additional **kwargs** passed to ``assume_role_method``.
     * ``host``: Endpoint URL for the connection.
-    * ``region_name``: AWS region for the connection.
-    * ``external_id``: AWS external ID for the connection (deprecated, rather use ``assume_role_kwargs``).
 
-    * ``config_kwargs``: Additional ``kwargs`` used to construct a ``botocore.config.Config`` passed to *boto3.client* and *boto3.resource*.
-    * ``session_kwargs``: Additional ``kwargs`` passed to *boto3.session.Session*.
+.. warning:: Extra parameters below are deprecated and will be removed in a future version of this provider.
 
-    * ``profile``: If you are getting your credentials from the credentials file, you can specify the profile with this.
+    * ``aws_account_id``: Used to construct ``role_arn`` if it was not specified.
+    * ``aws_iam_role``: Used to construct ``role_arn`` if it was not specified.
+    * ``external_id``: A unique identifier that might be required when you assume a role in another account.
+      Used if ``ExternalId`` in ``assume_role_kwargs`` was not specified.
+    * ``s3_config_file``: Path to local credentials file.
+    * ``s3_config_format``: ``s3_config_file`` format, one of
+      `aws <https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html#cli-configure-files-settings>`_,
+      `boto <http://boto.cloudhackers.com/en/latest/boto_config_tut.html#details>`_ or
+      `s3cmd <https://s3tools.org/kb/item14.htm>`_ if not specified then **boto** is used.
+    * ``profile``: If you are getting your credentials from the ``s3_config_file``
+      you can specify the profile with this parameter.
 
 If you are configuring the connection via a URI, ensure that all components of the URI are URL-encoded.
 
@@ -179,7 +199,9 @@ The following settings may be used within the ``assume_role_with_saml`` containe
     * ``idp_auth_method``: Specify "http_spegno_auth" to use the Python ``requests_gssapi`` library. This library is more up to date than ``requests_kerberos`` and is backward compatible. See ``requests_gssapi`` documentation on PyPI.
     * ``mutual_authentication``: Can be "REQUIRED", "OPTIONAL" or "DISABLED". See ``requests_gssapi`` documentation on PyPI.
     * ``idp_request_kwargs``: Additional ``kwargs`` passed to ``requests`` when requesting from the IDP (over HTTP/S).
-    * ``idp_request_retry_kwargs``: Additional ``kwargs`` to construct a ``urllib3.util.Retry`` used as a retry strategy when requesting from the IDP. See the ``urllib3`` documentation for more details.
+    * ``idp_request_retry_kwargs``: Additional ``kwargs`` to construct a
+      `urllib3.util.Retry <https://urllib3.readthedocs.io/en/stable/reference/urllib3.util.html#urllib3.util.Retry>`_
+      used as a retry strategy when requesting from the IDP.
     * ``log_idp_response``: Useful for debugging - if specified, print the IDP response content to the log. Note that a successful response will contain sensitive information!
     * ``saml_response_xpath``: How to query the IDP response using XML / HTML xpath.
     * ``assume_role_kwargs``: Additional ``kwargs`` passed to ``sts_client.assume_role_with_saml``.