You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by GitBox <gi...@apache.org> on 2022/01/22 13:53:03 UTC

[GitHub] [airflow] subkanthi commented on a change in pull request #20998: Add dbt Cloud provider

subkanthi commented on a change in pull request #20998:
URL: https://github.com/apache/airflow/pull/20998#discussion_r790144758



##########
File path: airflow/providers/dbt/cloud/hooks/dbt.py
##########
@@ -0,0 +1,488 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import json
+import sys
+import time
+from enum import Enum
+from functools import wraps
+from inspect import signature
+from typing import Any, Callable, Dict, List, Optional, Sequence, Set, Tuple, Union
+
+from requests import PreparedRequest, Session
+from requests.auth import AuthBase
+from requests.models import Response
+
+from airflow.exceptions import AirflowException
+from airflow.models import Connection
+from airflow.providers.http.hooks.http import HttpHook
+from airflow.typing_compat import TypedDict
+
+if sys.version_info >= (3, 8):
+    from functools import cached_property
+else:
+    from cached_property import cached_property
+
+
+def fallback_to_default_account(func: Callable) -> Callable:
+    """
+    Decorator which provides a fallback value for ``account_id``. If the ``account_id`` is None or not passed
+    to the decorated function, the value will be taken from the configured dbt Cloud Airflow Connection.
+    """
+    sig = signature(func)
+
+    @wraps(func)
+    def wrapper(*args, **kwargs) -> Callable:
+        bound_args = sig.bind(*args, **kwargs)
+
+        # Check if ``account_id`` was not included in the function signature or, if it is, the value is not
+        # provided.
+        if bound_args.arguments.get("account_id") is None:
+            self = args[0]
+            default_account_id = self.conn.login
+            if not default_account_id:
+                raise AirflowException("Could not determine the dbt Cloud account.")
+
+            bound_args.arguments["account_id"] = int(default_account_id)
+
+        return func(*bound_args.args, **bound_args.kwargs)
+
+    return wrapper
+
+
+class TokenAuth(AuthBase):
+    """Helper class for Auth when executing requests."""
+
+    def __init__(self, token: str) -> None:
+        self.token = token
+
+    def __call__(self, request: PreparedRequest) -> PreparedRequest:
+        request.headers["Content-Type"] = "application/json"
+        request.headers["Authorization"] = f"Token {self.token}"
+
+        return request
+
+
+class JobRunInfo(TypedDict):
+    """Type class for the ``job_run_info`` dictionary."""
+
+    account_id: int
+    run_id: int
+
+
+class DbtCloudJobRunStatus(Enum):
+    """dbt Cloud Job statuses."""
+
+    QUEUED = 1
+    STARTING = 2
+    RUNNING = 3
+    SUCCESS = 10
+    ERROR = 20
+    CANCELLED = 30
+    TERMINAL_STATUSES = (SUCCESS, ERROR, CANCELLED)
+
+    @classmethod
+    def check_is_valid(cls, statuses: Union[int, Sequence[int], Set[int]]):
+        """Validates input statuses are a known value."""
+        if isinstance(statuses, (Sequence, Set)):
+            for status in statuses:
+                cls(status)
+        else:
+            cls(statuses)
+
+    @classmethod
+    def is_terminal(cls, status: int) -> bool:
+        """Checks if the input status is that of a terminal type."""
+        cls.check_is_valid(statuses=status)
+
+        return status in cls.TERMINAL_STATUSES.value
+
+
+class DbtCloudJobRunException(AirflowException):
+    """An exception that indicates a job run failed to complete."""
+
+
+class DbtCloudHook(HttpHook):
+    """
+    Interact with dbt Cloud using the V2 API.
+
+    :param dbt_cloud_conn_id: The ID of the :ref:`dbt Cloud connection <howto/connection:dbt-cloud>`.
+    """
+
+    conn_name_attr = "dbt_cloud_conn_id"
+    default_conn_name = "dbt_cloud_default"
+    conn_type = "dbt_cloud"
+    hook_name = "dbt Cloud"
+
+    @staticmethod
+    def get_ui_field_behaviour() -> Dict[str, Any]:
+        """Builds custom field behavior for the dbt Cloud connection form in the Airflow UI."""
+        return {
+            "hidden_fields": ["host", "port", "schema", "extra"],
+            "relabeling": {"login": "Account ID", "password": "API Token"},
+        }
+
+    def __init__(self, dbt_cloud_conn_id: str = default_conn_name, *args, **kwargs) -> None:
+        super().__init__(auth_type=TokenAuth)
+        self.dbt_cloud_conn_id = dbt_cloud_conn_id
+        self.base_url = "https://cloud.getdbt.com/api/v2/accounts/"
+
+    @cached_property
+    def conn(self) -> Connection:
+        _conn = self.get_connection(self.dbt_cloud_conn_id)
+        if not _conn.password:
+            raise AirflowException("An API token is required to connect to dbt Cloud.")
+
+        return _conn
+
+    def get_conn(self, *args, **kwargs) -> Session:
+        session = Session()
+        session.auth = self.auth_type(self.conn.password)
+
+        return session
+
+    def _paginate(self, endpoint: str, payload: Optional[Dict[str, Any]] = None) -> List[Response]:
+        results = []
+        response = self.run(endpoint=endpoint, data=payload)
+        resp_json = response.json()
+        limit = resp_json["extra"]["filters"]["limit"]
+        num_total_results = resp_json["extra"]["pagination"]["total_count"]
+        num_current_results = resp_json["extra"]["pagination"]["count"]
+        results.append(response)
+
+        if not num_current_results == num_total_results:
+            _paginate_payload = payload.copy() if payload else {}
+            _paginate_payload["offset"] = limit
+
+            while True:
+                if num_current_results < num_total_results:
+                    response = self.run(endpoint=endpoint, data=_paginate_payload)
+                    resp_json = response.json()
+                    if resp_json["data"]:

Review comment:
       why is this check useful, it doesnt look like `resp_json['data']` is accessed below.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@airflow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org