You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by GitBox <gi...@apache.org> on 2022/08/29 13:38:13 UTC

[GitHub] [airflow] josh-fell commented on a diff in pull request #26035: add emr steps sensor

josh-fell commented on code in PR #26035:
URL: https://github.com/apache/airflow/pull/26035#discussion_r957340552


##########
tests/providers/amazon/aws/sensors/test_emr_steps.py:
##########
@@ -0,0 +1,190 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import unittest
+from copy import deepcopy
+from datetime import datetime
+from unittest.mock import MagicMock, patch
+
+from dateutil.tz import tzlocal
+
+from airflow.exceptions import AirflowException
+from airflow.providers.amazon.aws.sensors.emr import EmrStepsSensor
+
+LIST_JOB_STEP_BASIC_RETURN = {
+    "ResponseMetadata": {"HTTPStatusCode": 200, "RequestId": "8dee8db2-3719-11e6-9e20-35b2f861a2a6"},
+    "Steps": [],
+}
+
+LIST_JOB_STEP_RUNNING_RETURN = {
+    "ActionOnFailure": "CONTINUE",
+    "Config": {
+        "Args": ["/usr/lib/spark/bin/run-example", "SparkPi", "10"],
+        "Jar": "command-runner.jar",
+        "Properties": {},
+    },
+    "Id": "s-VK57YR1Z9Z5N",
+    "Name": "calculate_pi",
+    "Status": {
+        "State": "RUNNING",
+        "StateChangeReason": {},
+        "Timeline": {
+            "CreationDateTime": datetime(2016, 6, 20, 19, 0, 18, tzinfo=tzlocal()),
+            "StartDateTime": datetime(2016, 6, 20, 19, 2, 34, tzinfo=tzlocal()),
+        },
+    },
+}
+
+DESCRIBE_JOB_STEP_CANCELLED_RETURN = {
+    "ActionOnFailure": "CONTINUE",
+    "Config": {
+        "Args": ["/usr/lib/spark/bin/run-example", "SparkPi", "10"],
+        "Jar": "command-runner.jar",
+        "Properties": {},
+    },
+    "Id": "s-VK57YR1Z9Z5N",
+    "Name": "calculate_pi",
+    "Status": {
+        "State": "CANCELLED",
+        "StateChangeReason": {},
+        "Timeline": {
+            "CreationDateTime": datetime(2016, 6, 20, 19, 0, 18, tzinfo=tzlocal()),
+            "StartDateTime": datetime(2016, 6, 20, 19, 2, 34, tzinfo=tzlocal()),
+        },
+    },
+}
+
+LIST_JOB_STEP_FAILED_RETURN = {
+    "ActionOnFailure": "CONTINUE",
+    "Config": {
+        "Args": ["/usr/lib/spark/bin/run-example", "SparkPi", "10"],
+        "Jar": "command-runner.jar",
+        "Properties": {},
+    },
+    "Id": "s-VK57YR1Z9Z5N",
+    "Name": "calculate_pi",
+    "Status": {
+        "State": "FAILED",
+        "StateChangeReason": {},
+        "FailureDetails": {
+            "LogFile": "s3://fake-log-files/emr-logs/j-8989898989/steps/s-VK57YR1Z9Z5N",
+            "Reason": "Unknown Error.",
+        },
+        "Timeline": {
+            "CreationDateTime": datetime(2016, 6, 20, 19, 0, 18, tzinfo=tzlocal()),
+            "StartDateTime": datetime(2016, 6, 20, 19, 2, 34, tzinfo=tzlocal()),
+        },
+    },
+}
+
+LIST_JOB_STEP_INTERRUPTED_RETURN = {
+    "ActionOnFailure": "CONTINUE",
+    "Config": {
+        "Args": ["/usr/lib/spark/bin/run-example", "SparkPi", "10"],
+        "Jar": "command-runner.jar",
+        "Properties": {},
+    },
+    "Id": "s-VK57YR1Z9Z5N",
+    "Name": "calculate_pi",
+    "Status": {
+        "State": "INTERRUPTED",
+        "StateChangeReason": {},
+        "Timeline": {
+            "CreationDateTime": datetime(2016, 6, 20, 19, 0, 18, tzinfo=tzlocal()),
+            "StartDateTime": datetime(2016, 6, 20, 19, 2, 34, tzinfo=tzlocal()),
+        },
+    },
+}
+
+LIST_JOB_STEP_COMPLETED_RETURN = {
+    "ActionOnFailure": "CONTINUE",
+    "Config": {
+        "Args": ["/usr/lib/spark/bin/run-example", "SparkPi", "10"],
+        "Jar": "command-runner.jar",
+        "Properties": {},
+    },
+    "Id": "s-VK57YR1Z9Z5N",
+    "Name": "calculate_pi",
+    "Status": {
+        "State": "COMPLETED",
+        "StateChangeReason": {},
+        "Timeline": {
+            "CreationDateTime": datetime(2016, 6, 20, 19, 0, 18, tzinfo=tzlocal()),
+            "StartDateTime": datetime(2016, 6, 20, 19, 2, 34, tzinfo=tzlocal()),
+        },
+    },
+}
+
+
+class TestEmrStepsSensor(unittest.TestCase):

Review Comment:
   The preference for new-new unit tests is to use `pytest` over `unittest`. Would you mind converting this to `pytest` please?



##########
airflow/providers/amazon/aws/sensors/emr.py:
##########
@@ -470,3 +470,102 @@ def failure_message_from_response(response: Dict[str, Any]) -> Optional[str]:
                 f"with message {fail_details.get('Message')} and log file {fail_details.get('LogFile')}"
             )
         return None
+
+
+class EmrStepsSensor(EmrBaseSensor):
+    """
+    Asks for the state of the step until it reaches any of the target states.
+    If it fails the sensor errors, failing the task.
+    With the default target states, sensor waits step to be completed.
+    .. seealso::
+        For more information on how to use this sensor, take a look at the guide:
+        :ref:`howto/sensor:EmrStepSensor`
+    :param job_flow_id: job_flow_id which contains the step check the state of
+    :param step_id: step to check the state of
+    :param target_states: the target states, sensor waits until
+        step reaches any of these states
+    :param failed_states: the failure states, sensor fails when
+        step reaches any of these states
+    """
+
+    template_fields: Sequence[str] = ("job_flow_id", "target_states", "failed_states")
+    template_ext: Sequence[str] = ()
+
+    def __init__(
+        self,
+        *,
+        job_flow_id: str,
+        target_states: Optional[Iterable[str]] = None,
+        failed_states: Optional[Iterable[str]] = None,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.job_flow_id = job_flow_id
+        self.target_states = target_states or ["COMPLETED"]
+        self.failed_states = failed_states or ["CANCELLED", "FAILED", "INTERRUPTED"]

Review Comment:
   WDYT about moving the effectively default values up to the constructor as actual default values? This would expose the values in the Python API documentation and in IDEs as well. Users might think that they must specify both `target_states` and `failed_states` when using this sensor otherwise.



##########
airflow/providers/amazon/aws/sensors/emr.py:
##########
@@ -470,3 +470,102 @@ def failure_message_from_response(response: Dict[str, Any]) -> Optional[str]:
                 f"with message {fail_details.get('Message')} and log file {fail_details.get('LogFile')}"
             )
         return None
+
+
+class EmrStepsSensor(EmrBaseSensor):
+    """
+    Asks for the state of the step until it reaches any of the target states.
+    If it fails the sensor errors, failing the task.
+    With the default target states, sensor waits step to be completed.
+    .. seealso::
+        For more information on how to use this sensor, take a look at the guide:
+        :ref:`howto/sensor:EmrStepSensor`
+    :param job_flow_id: job_flow_id which contains the step check the state of
+    :param step_id: step to check the state of
+    :param target_states: the target states, sensor waits until
+        step reaches any of these states
+    :param failed_states: the failure states, sensor fails when
+        step reaches any of these states
+    """
+
+    template_fields: Sequence[str] = ("job_flow_id", "target_states", "failed_states")
+    template_ext: Sequence[str] = ()

Review Comment:
   ```suggestion
   ```
   This is the default in `BaseOperator`. No need to specify an empty `template_ext` attr.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@airflow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org