You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by jo...@apache.org on 2023/02/02 16:42:33 UTC

[airflow] branch main updated: Drop Connection.schema use in DbtCloudHook (#29166)

This is an automated email from the ASF dual-hosted git repository.

joshfell pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 91c0ce7666 Drop Connection.schema use in DbtCloudHook (#29166)
91c0ce7666 is described below

commit 91c0ce7666f131176cb6368058dc1f259275b894
Author: Josh Fell <48...@users.noreply.github.com>
AuthorDate: Thu Feb 2 11:42:25 2023 -0500

    Drop Connection.schema use in DbtCloudHook (#29166)
    
    * Correct tenant eval within async logic of DbtCloudHook
    
    Related: #28890 #29014
    
    There was a recent enhancement of DbtCloudRunJobOperator to include deferrable/async functionality. Unfortunately the `tenant` evaluation in the DbtCloudHook was outdated and didn't include the most recent change to properly handle domain specification.
    
    This PR consolidates the tenant eval logic to a common method to be used by both sync and async methods in the hook.
    
    * Remove Connection.schema use
    
    * Update provider.yaml and changelog
---
 airflow/providers/dbt/cloud/CHANGELOG.rst | 16 ++++++++++++++++
 airflow/providers/dbt/cloud/hooks/dbt.py  | 29 ++++++++---------------------
 airflow/providers/dbt/cloud/provider.yaml |  1 +
 3 files changed, 25 insertions(+), 21 deletions(-)

diff --git a/airflow/providers/dbt/cloud/CHANGELOG.rst b/airflow/providers/dbt/cloud/CHANGELOG.rst
index b55c9b18d9..258839fc94 100644
--- a/airflow/providers/dbt/cloud/CHANGELOG.rst
+++ b/airflow/providers/dbt/cloud/CHANGELOG.rst
@@ -24,6 +24,22 @@
 Changelog
 ---------
 
+3.0.0
+.....
+
+Breaking changes
+~~~~~~~~~~~~~~~~
+
+Beginning with version 2.0.0, users could specify single-tenant dbt Cloud domains via the ``schema`` parameter
+in an Airflow connection. Subsequently in version 2.3.1, users could also connect to the dbt Cloud instances
+outside of the US region as well as private instances by using the ``host`` parameter of their Airflow
+connection to specify the entire tenant domain. Backwards compatibility for using ``schema`` was left in
+place. Version 3.0.0 removes support for using ``schema`` to specify the tenant domain of a dbt Cloud
+instance. If you wish to connect to a single-tenant, instance outside of the US, or a private instance, you
+must use the ``host`` parameter to specify the _entire_ tenant domain name in your Airflow connection.
+
+* ``Drop Connection.schema use in DbtCloudHook  (#29166)``
+
 2.3.1
 .....
 
diff --git a/airflow/providers/dbt/cloud/hooks/dbt.py b/airflow/providers/dbt/cloud/hooks/dbt.py
index 3ddeeb222b..c8f906aa5e 100644
--- a/airflow/providers/dbt/cloud/hooks/dbt.py
+++ b/airflow/providers/dbt/cloud/hooks/dbt.py
@@ -18,7 +18,6 @@ from __future__ import annotations
 
 import json
 import time
-import warnings
 from enum import Enum
 from functools import wraps
 from inspect import signature
@@ -181,6 +180,10 @@ class DbtCloudHook(HttpHook):
         super().__init__(auth_type=TokenAuth)
         self.dbt_cloud_conn_id = dbt_cloud_conn_id
 
+    @staticmethod
+    def _get_tenant_domain(conn: Connection) -> str:
+        return conn.host or "cloud.getdbt.com"
+
     @staticmethod
     def get_request_url_params(
         tenant: str, endpoint: str, include_related: list[str] | None = None
@@ -188,26 +191,22 @@ class DbtCloudHook(HttpHook):
         """
         Form URL from base url and endpoint url
 
-        :param tenant: The tenant name which is need to be replaced in base url.
+        :param tenant: The tenant domain name which is need to be replaced in base url.
         :param endpoint: Endpoint url to be requested.
         :param include_related: Optional. List of related fields to pull with the run.
             Valid values are "trigger", "job", "repository", and "environment".
         """
         data: dict[str, Any] = {}
-        base_url = f"https://{tenant}.getdbt.com/api/v2/accounts/"
         if include_related:
             data = {"include_related": include_related}
-        if base_url and not base_url.endswith("/") and endpoint and not endpoint.startswith("/"):
-            url = base_url + "/" + endpoint
-        else:
-            url = (base_url or "") + (endpoint or "")
+        url = f"https://{tenant}/api/v2/accounts/{endpoint or ''}"
         return url, data
 
     async def get_headers_tenants_from_connection(self) -> tuple[dict[str, Any], str]:
         """Get Headers, tenants from the connection details"""
         headers: dict[str, Any] = {}
         connection: Connection = await sync_to_async(self.get_connection)(self.dbt_cloud_conn_id)
-        tenant: str = connection.schema if connection.schema else "cloud"
+        tenant = self._get_tenant_domain(connection)
         package_name, provider_version = _get_provider_info()
         headers["User-Agent"] = f"{package_name}-v{provider_version}"
         headers["Content-Type"] = "application/json"
@@ -267,19 +266,7 @@ class DbtCloudHook(HttpHook):
         return _connection
 
     def get_conn(self, *args, **kwargs) -> Session:
-        if self.connection.schema:
-            warnings.warn(
-                "The `schema` parameter is deprecated and use within a dbt Cloud connection will be removed "
-                "in a future version. Please use `host` instead and specify the entire tenant domain name.",
-                category=DeprecationWarning,
-                stacklevel=2,
-            )
-            # Prior to deprecation, the connection.schema value could _only_ modify the third-level
-            # domain value while '.getdbt.com' was always used as the remainder of the domain name.
-            tenant = f"{self.connection.schema}.getdbt.com"
-        else:
-            tenant = self.connection.host or "cloud.getdbt.com"
-
+        tenant = self._get_tenant_domain(self.connection)
         self.base_url = f"https://{tenant}/api/v2/accounts/"
 
         session = Session()
diff --git a/airflow/providers/dbt/cloud/provider.yaml b/airflow/providers/dbt/cloud/provider.yaml
index 4315f9c272..7d62555393 100644
--- a/airflow/providers/dbt/cloud/provider.yaml
+++ b/airflow/providers/dbt/cloud/provider.yaml
@@ -22,6 +22,7 @@ description: |
     `dbt Cloud <https://www.getdbt.com/product/what-is-dbt/>`__
 
 versions:
+  - 3.0.0
   - 2.3.1
   - 2.3.0
   - 2.2.0