You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by GitBox <gi...@apache.org> on 2023/01/04 13:06:53 UTC

[GitHub] [airflow] feluelle commented on a diff in pull request #28710: Apply "unify bucket and key" before "provide bucket"

feluelle commented on code in PR #28710:
URL: https://github.com/apache/airflow/pull/28710#discussion_r1061456165


##########
airflow/providers/amazon/aws/hooks/s3.py:
##########
@@ -38,17 +40,22 @@
 from botocore.exceptions import ClientError
 
 from airflow.exceptions import AirflowException
+from airflow.providers.amazon.aws.exceptions import S3HookUriParseFailure
 from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook
 from airflow.utils.helpers import chunks
 
 T = TypeVar("T", bound=Callable)
 
+logger = logging.getLogger(__name__)
+
 
 def provide_bucket_name(func: T) -> T:
     """
     Function decorator that provides a bucket name taken from the connection
     in case no bucket name has been passed to the function.
     """
+    if getattr(func, "_unify_bucket_name_and_key_wrapped", False) is True:

Review Comment:
   ```suggestion
       if hasattr(func, "_unify_bucket_name_and_key_wrapped"):
   ```



##########
tests/providers/amazon/aws/hooks/test_s3.py:
##########
@@ -825,3 +826,143 @@ def test_delete_bucket_tagging_with_no_tags(self):
 
         with pytest.raises(ClientError, match=r".*NoSuchTagSet.*"):
             hook.get_bucket_tagging(bucket_name="new_bucket")
+
+
+@patch("airflow.hooks.base.BaseHook.get_connection")
+@pytest.mark.parametrize(
+    "expected",
+    [
+        # full key
+        # no conn - no bucket - full key
+        param(["key_bucket", "key.txt"], id="unify-no_conn-no_bucket-full_key"),
+        param(["key_bucket", "key.txt"], id="provide-no_conn-no_bucket-full_key"),
+        # no conn - with bucket - full key
+        param(["kwargs_bucket", "s3://key_bucket/key.txt"], id="unify-no_conn-with_bucket-full_key"),
+        param(["kwargs_bucket", "s3://key_bucket/key.txt"], id="provide-no_conn-with_bucket-full_key"),
+        # with conn - no bucket - full key
+        param(["conn_bucket", "s3://key_bucket/key.txt"], id="provide-with_conn-no_bucket-full_key"),
+        param(["key_bucket", "key.txt"], id="unify-with_conn-no_bucket-full_key"),
+        # with conn - with bucket - full key
+        param(["kwargs_bucket", "s3://key_bucket/key.txt"], id="provide-with_conn-with_bucket-full_key"),
+        param(["kwargs_bucket", "s3://key_bucket/key.txt"], id="unify-with_conn-with_bucket-full_key"),
+        # rel key
+        # no conn - no bucket - rel key
+        param([None, "key.txt"], id="unify-no_conn-no_bucket-rel_key"),
+        param([None, "key.txt"], id="provide-no_conn-no_bucket-rel_key"),
+        # no conn - with bucket - rel key
+        param(["kwargs_bucket", "key.txt"], id="unify-no_conn-with_bucket-rel_key"),
+        param(["kwargs_bucket", "key.txt"], id="provide-no_conn-with_bucket-rel_key"),
+        # with conn - no bucket - rel key
+        param(["conn_bucket", "key.txt"], id="provide-with_conn-no_bucket-rel_key"),
+        param(["conn_bucket", "key.txt"], id="unify-with_conn-no_bucket-rel_key"),
+        # with conn - with bucket - rel key
+        param(["kwargs_bucket", "key.txt"], id="provide-with_conn-with_bucket-rel_key"),
+        param(["kwargs_bucket", "key.txt"], id="unify-with_conn-with_bucket-rel_key"),
+    ],
+)
+def test_unify_and_provide_bucket_name_combination(mock_base, expected, request, caplog):
+    """
+    Verify what is the outcome when the unify_bucket_name_and_key and provide_bucket_name
+    decorators are combined.
+    """
+    tokens = request.node.callspec.id.split("-")
+    assert len(tokens) == 4
+    if "with_conn" in tokens:
+        c = Connection(schema="conn_bucket")
+    else:
+        c = Connection(schema=None)
+    key = "key.txt" if "rel_key" in tokens else "s3://key_bucket/key.txt"
+    if "with_bucket" in tokens:
+        kwargs = {"bucket_name": "kwargs_bucket", "key": key}
+    else:
+        kwargs = {"key": key}
+
+    mock_base.return_value = c
+    if "unify" in tokens:  # unify to be processed before provide

Review Comment:
   Why are we deciding based on param id? Can't we have the different Connection schema, kwargs and so on in the params?



##########
tests/providers/amazon/aws/hooks/test_s3.py:
##########
@@ -825,3 +826,143 @@ def test_delete_bucket_tagging_with_no_tags(self):
 
         with pytest.raises(ClientError, match=r".*NoSuchTagSet.*"):
             hook.get_bucket_tagging(bucket_name="new_bucket")
+
+
+@patch("airflow.hooks.base.BaseHook.get_connection")
+@pytest.mark.parametrize(
+    "expected",
+    [
+        # full key
+        # no conn - no bucket - full key
+        param(["key_bucket", "key.txt"], id="unify-no_conn-no_bucket-full_key"),
+        param(["key_bucket", "key.txt"], id="provide-no_conn-no_bucket-full_key"),
+        # no conn - with bucket - full key
+        param(["kwargs_bucket", "s3://key_bucket/key.txt"], id="unify-no_conn-with_bucket-full_key"),
+        param(["kwargs_bucket", "s3://key_bucket/key.txt"], id="provide-no_conn-with_bucket-full_key"),
+        # with conn - no bucket - full key
+        param(["conn_bucket", "s3://key_bucket/key.txt"], id="provide-with_conn-no_bucket-full_key"),
+        param(["key_bucket", "key.txt"], id="unify-with_conn-no_bucket-full_key"),
+        # with conn - with bucket - full key
+        param(["kwargs_bucket", "s3://key_bucket/key.txt"], id="provide-with_conn-with_bucket-full_key"),
+        param(["kwargs_bucket", "s3://key_bucket/key.txt"], id="unify-with_conn-with_bucket-full_key"),
+        # rel key
+        # no conn - no bucket - rel key
+        param([None, "key.txt"], id="unify-no_conn-no_bucket-rel_key"),
+        param([None, "key.txt"], id="provide-no_conn-no_bucket-rel_key"),
+        # no conn - with bucket - rel key
+        param(["kwargs_bucket", "key.txt"], id="unify-no_conn-with_bucket-rel_key"),
+        param(["kwargs_bucket", "key.txt"], id="provide-no_conn-with_bucket-rel_key"),
+        # with conn - no bucket - rel key
+        param(["conn_bucket", "key.txt"], id="provide-with_conn-no_bucket-rel_key"),
+        param(["conn_bucket", "key.txt"], id="unify-with_conn-no_bucket-rel_key"),
+        # with conn - with bucket - rel key
+        param(["kwargs_bucket", "key.txt"], id="provide-with_conn-with_bucket-rel_key"),
+        param(["kwargs_bucket", "key.txt"], id="unify-with_conn-with_bucket-rel_key"),
+    ],
+)
+def test_unify_and_provide_bucket_name_combination(mock_base, expected, request, caplog):
+    """
+    Verify what is the outcome when the unify_bucket_name_and_key and provide_bucket_name
+    decorators are combined.
+    """
+    tokens = request.node.callspec.id.split("-")
+    assert len(tokens) == 4
+    if "with_conn" in tokens:
+        c = Connection(schema="conn_bucket")
+    else:
+        c = Connection(schema=None)
+    key = "key.txt" if "rel_key" in tokens else "s3://key_bucket/key.txt"
+    if "with_bucket" in tokens:
+        kwargs = {"bucket_name": "kwargs_bucket", "key": key}
+    else:
+        kwargs = {"key": key}
+
+    mock_base.return_value = c
+    if "unify" in tokens:  # unify to be processed before provide
+
+        class MyHook(S3Hook):
+            @unify_bucket_name_and_key
+            @provide_bucket_name
+            def do_something(self, bucket_name=None, key=None):
+                return bucket_name, key
+
+    else:
+
+        with caplog.at_level("WARNING"):
+
+            class MyHook(S3Hook):
+                @provide_bucket_name
+                @unify_bucket_name_and_key
+                def do_something(self, bucket_name=None, key=None):
+                    return bucket_name, key
+
+        assert caplog.records[0].message == "`unify_bucket_name_and_key` should wrap `provide_bucket_name`."
+    hook = MyHook()
+    if expected == "__fail__":
+        with pytest.raises(Exception, match='Please provide a bucket name using a valid format: "key.txt"'):
+            hook.do_something(**kwargs)
+    else:
+        assert list(hook.do_something(**kwargs)) == expected

Review Comment:
   Ideally, we should not have if/else branches doing different kind of assertions.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@airflow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org