You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by po...@apache.org on 2022/12/27 07:24:16 UTC

[airflow] branch main updated: Move Hive macros to the provider (#28538)

This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 4e545c6e54 Move Hive macros to the provider (#28538)
4e545c6e54 is described below

commit 4e545c6e54712eedb6ca9cbb8333393ae3f6cba2
Author: Jarek Potiuk <ja...@potiuk.com>
AuthorDate: Tue Dec 27 08:24:08 2022 +0100

    Move Hive macros to the provider (#28538)
    
    The Hive macros are now moved to the apache.hive provider.
    
    Fixes: #19445
---
 airflow/macros/__init__.py                         | 11 ++++++++-
 airflow/provider.yaml.schema.json                  | 12 ++++++++++
 airflow/providers/apache/hive/CHANGELOG.rst        |  9 +++++++
 airflow/providers/apache/hive/macros/__init__.py   | 17 +++++++++++++
 airflow/{ => providers/apache/hive}/macros/hive.py |  0
 airflow/providers/apache/hive/plugins/__init__.py  | 17 +++++++++++++
 airflow/providers/apache/hive/plugins/hive.py      | 28 ++++++++++++++++++++++
 airflow/providers/apache/hive/provider.yaml        |  5 ++++
 .../apache/hive/sensors/metastore_partition.py     |  2 +-
 dev/provider_packages/SETUP_TEMPLATE.cfg.jinja2    |  7 ++++++
 dev/provider_packages/prepare_provider_packages.py | 20 ++++++++++++++++
 .../apache-airflow-providers-apache-hive/index.rst |  3 ++-
 .../macros.rst                                     | 26 ++++++++++++++++++++
 docs/apache-airflow/templates-ref.rst              |  3 ---
 newsfragments/28538.misc.rst                       |  1 +
 .../in_container/run_provider_yaml_files_check.py  | 17 +++++++++++++
 setup.py                                           |  4 ++++
 tests/providers/apache/hive/macros/__init__.py     | 17 +++++++++++++
 .../apache/hive}/macros/test_hive.py               |  2 +-
 19 files changed, 194 insertions(+), 7 deletions(-)

diff --git a/airflow/macros/__init__.py b/airflow/macros/__init__.py
index 4364d3278a..ca36dbe00a 100644
--- a/airflow/macros/__init__.py
+++ b/airflow/macros/__init__.py
@@ -26,7 +26,16 @@ from typing import Any
 import dateutil  # noqa
 from pendulum import DateTime
 
-from airflow.macros import hive  # noqa
+from airflow.utils.deprecation_tools import add_deprecated_classes
+
+__deprecated_classes = {
+    "hive": {
+        "closest_ds_partition": "airflow.providers.apache.hive.macros.hive.closest_ds_partition",
+        "max_partition": "airflow.providers.apache.hive.macros.hive.max_partition",
+    },
+}
+
+add_deprecated_classes(__deprecated_classes, __name__)
 
 
 def ds_add(ds: str, days: int) -> str:
diff --git a/airflow/provider.yaml.schema.json b/airflow/provider.yaml.schema.json
index ff0537db32..1a23f2a7eb 100644
--- a/airflow/provider.yaml.schema.json
+++ b/airflow/provider.yaml.schema.json
@@ -276,6 +276,18 @@
       "items": {
           "type": "string"
       }
+    },
+    "plugins": {
+      "type": "array",
+      "description": "Plugins exposed by the provider",
+      "items": {
+        "name": {
+           "type": "string"
+        },
+        "plugin-class": {
+           "type": "string"
+        }
+      }
     }
   },
   "additionalProperties": false,
diff --git a/airflow/providers/apache/hive/CHANGELOG.rst b/airflow/providers/apache/hive/CHANGELOG.rst
index 9a74537b6c..2accf2be2b 100644
--- a/airflow/providers/apache/hive/CHANGELOG.rst
+++ b/airflow/providers/apache/hive/CHANGELOG.rst
@@ -24,6 +24,15 @@
 Changelog
 ---------
 
+5.1.0
+.....
+
+Features
+~~~~~~~~
+
+The ``apache.hive`` provider provides now hive macros that used to be provided by Airflow. As of 5.1.0 version
+of ``apache.hive`` the hive macros are provided by the Provider.
+
 5.0.0
 .....
 
diff --git a/airflow/providers/apache/hive/macros/__init__.py b/airflow/providers/apache/hive/macros/__init__.py
new file mode 100644
index 0000000000..217e5db960
--- /dev/null
+++ b/airflow/providers/apache/hive/macros/__init__.py
@@ -0,0 +1,17 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/airflow/macros/hive.py b/airflow/providers/apache/hive/macros/hive.py
similarity index 100%
rename from airflow/macros/hive.py
rename to airflow/providers/apache/hive/macros/hive.py
diff --git a/airflow/providers/apache/hive/plugins/__init__.py b/airflow/providers/apache/hive/plugins/__init__.py
new file mode 100644
index 0000000000..217e5db960
--- /dev/null
+++ b/airflow/providers/apache/hive/plugins/__init__.py
@@ -0,0 +1,17 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/airflow/providers/apache/hive/plugins/hive.py b/airflow/providers/apache/hive/plugins/hive.py
new file mode 100644
index 0000000000..63a068be29
--- /dev/null
+++ b/airflow/providers/apache/hive/plugins/hive.py
@@ -0,0 +1,28 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import annotations
+
+from airflow.plugins_manager import AirflowPlugin
+from airflow.providers.apache.hive.macros.hive import closest_ds_partition, max_partition
+
+
+class HivePlugin(AirflowPlugin):
+    """Hive plugin - delivering macros used by users that use the provider."""
+
+    name = "hive"
+    macros = [max_partition, closest_ds_partition]
diff --git a/airflow/providers/apache/hive/provider.yaml b/airflow/providers/apache/hive/provider.yaml
index 926808f3d8..d26ffb73f5 100644
--- a/airflow/providers/apache/hive/provider.yaml
+++ b/airflow/providers/apache/hive/provider.yaml
@@ -22,6 +22,7 @@ description: |
   `Apache Hive <https://hive.apache.org/>`__
 
 versions:
+  - 5.1.0
   - 5.0.0
   - 4.1.1
   - 4.1.0
@@ -109,3 +110,7 @@ connection-types:
     connection-type: hiveserver2
   - hook-class-name: airflow.providers.apache.hive.hooks.hive.HiveMetastoreHook
     connection-type: hive_metastore
+
+plugins:
+  - name: hive
+    plugin-class: airflow.providers.apache.hive.plugins.hive.HivePlugin
diff --git a/airflow/providers/apache/hive/sensors/metastore_partition.py b/airflow/providers/apache/hive/sensors/metastore_partition.py
index 57e793849e..aaa0da12d7 100644
--- a/airflow/providers/apache/hive/sensors/metastore_partition.py
+++ b/airflow/providers/apache/hive/sensors/metastore_partition.py
@@ -31,7 +31,7 @@ class MetastorePartitionSensor(SqlSensor):
     MySQL db. This was created as a result of observing sub optimal
     queries generated by the Metastore thrift service when hitting
     subpartitioned tables. The Thrift service's queries were written in a
-    way that wouldn't leverage the indexes.
+    way that would not leverage the indexes.
 
     :param schema: the schema
     :param table: the table
diff --git a/dev/provider_packages/SETUP_TEMPLATE.cfg.jinja2 b/dev/provider_packages/SETUP_TEMPLATE.cfg.jinja2
index 0878b589e4..a4023b6758 100644
--- a/dev/provider_packages/SETUP_TEMPLATE.cfg.jinja2
+++ b/dev/provider_packages/SETUP_TEMPLATE.cfg.jinja2
@@ -71,6 +71,13 @@ install_requires = {{ INSTALL_REQUIREMENTS }}
 [options.entry_points]
 apache_airflow_provider=
     provider_info=airflow.providers.{{ PROVIDER_PACKAGE_ID }}.get_provider_info:get_provider_info
+{%- if PLUGINS %}
+airflow.plugins=
+{%- for plugin in PLUGINS %}
+    {{ plugin.name }}={{ plugin.package_name }}:{{ plugin.class_name }}
+{%- endfor %}
+{%- endif %}
+
 
 [files]
 packages = airflow.providers.{{ PROVIDER_PACKAGE_ID }}
diff --git a/dev/provider_packages/prepare_provider_packages.py b/dev/provider_packages/prepare_provider_packages.py
index f892c82774..4e6b6962f6 100755
--- a/dev/provider_packages/prepare_provider_packages.py
+++ b/dev/provider_packages/prepare_provider_packages.py
@@ -124,6 +124,12 @@ PY3 = sys.version_info[0] == 3
 console = Console(width=400, color_system="standard")
 
 
+class PluginInfo(NamedTuple):
+    name: str
+    package_name: str
+    class_name: str
+
+
 class ProviderPackageDetails(NamedTuple):
     provider_package_id: str
     full_package_name: str
@@ -133,6 +139,7 @@ class ProviderPackageDetails(NamedTuple):
     provider_description: str
     versions: list[str]
     excluded_python_versions: list[str]
+    plugins: list[PluginInfo]
 
 
 class EntityType(Enum):
@@ -1014,6 +1021,17 @@ def get_all_changes_for_package(
 
 def get_provider_details(provider_package_id: str) -> ProviderPackageDetails:
     provider_info = get_provider_info_from_provider_yaml(provider_package_id)
+    plugins: list[PluginInfo] = []
+    if "plugins" in provider_info:
+        for plugin in provider_info["plugins"]:
+            package_name, class_name = plugin["plugin-class"].rsplit(".", maxsplit=1)
+            plugins.append(
+                PluginInfo(
+                    name=plugin["name"],
+                    package_name=package_name,
+                    class_name=class_name,
+                )
+            )
     return ProviderPackageDetails(
         provider_package_id=provider_package_id,
         full_package_name=f"airflow.providers.{provider_package_id}",
@@ -1023,6 +1041,7 @@ def get_provider_details(provider_package_id: str) -> ProviderPackageDetails:
         provider_description=provider_info["description"],
         versions=provider_info["versions"],
         excluded_python_versions=provider_info.get("excluded-python-versions") or [],
+        plugins=plugins,
     )
 
 
@@ -1099,6 +1118,7 @@ def get_provider_jinja_context(
         "CHANGELOG": changelog,
         "SUPPORTED_PYTHON_VERSIONS": supported_python_versions,
         "PYTHON_REQUIRES": python_requires,
+        "PLUGINS": provider_details.plugins,
     }
     return context
 
diff --git a/docs/apache-airflow-providers-apache-hive/index.rst b/docs/apache-airflow-providers-apache-hive/index.rst
index be8089d9b2..99593c26e1 100644
--- a/docs/apache-airflow-providers-apache-hive/index.rst
+++ b/docs/apache-airflow-providers-apache-hive/index.rst
@@ -45,9 +45,10 @@ Content
     :maxdepth: 1
     :caption: Resources
 
-    Example DAGs <https://github.com/apache/airflow/tree/providers-apache-hive/|versrion|/tests/system/providers/apache/hive>
+    Example DAGs <https://github.com/apache/airflow/tree/providers-apache-hive/|version|/tests/system/providers/apache/hive>
     PyPI Repository <https://pypi.org/project/apache-airflow-providers-apache-hive/>
     Installing from sources <installing-providers-from-sources>
+    Macros <macros>
 
 .. THE REMAINDER OF THE FILE IS AUTOMATICALLY GENERATED. IT WILL BE OVERWRITTEN AT RELEASE TIME!
 
diff --git a/docs/apache-airflow-providers-apache-hive/macros.rst b/docs/apache-airflow-providers-apache-hive/macros.rst
new file mode 100644
index 0000000000..62b58158dd
--- /dev/null
+++ b/docs/apache-airflow-providers-apache-hive/macros.rst
@@ -0,0 +1,26 @@
+ .. Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+ ..   http://www.apache.org/licenses/LICENSE-2.0
+
+ .. Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+Hive Macros
+===========
+
+The following macros are available to use in Jinja2 templates. You need to prefix them with ``hive`` when
+you use them in your templates - for example ``hive.closest_ds_partition``.
+
+.. automodule:: airflow.providers.apache.hive.macros.hive
+    :members:
+    :noindex:
diff --git a/docs/apache-airflow/templates-ref.rst b/docs/apache-airflow/templates-ref.rst
index a48b591b75..5aabba042a 100644
--- a/docs/apache-airflow/templates-ref.rst
+++ b/docs/apache-airflow/templates-ref.rst
@@ -178,7 +178,4 @@ Some airflow specific macros are also defined:
 .. automodule:: airflow.macros
     :members:
 
-.. automodule:: airflow.macros.hive
-    :members:
-
 .. _pendulum.DateTime: https://pendulum.eustace.io/docs/#introduction
diff --git a/newsfragments/28538.misc.rst b/newsfragments/28538.misc.rst
new file mode 100644
index 0000000000..5b929d8448
--- /dev/null
+++ b/newsfragments/28538.misc.rst
@@ -0,0 +1 @@
+The Hive Macros (``hive.max_partition``, ``hive.closest_ds_partition``) are available only when Hive Provider is installed. Please install Hive Provider > 5.1.0 when using those macros.
diff --git a/scripts/in_container/run_provider_yaml_files_check.py b/scripts/in_container/run_provider_yaml_files_check.py
index b0edb333d4..bdf9a60f54 100755
--- a/scripts/in_container/run_provider_yaml_files_check.py
+++ b/scripts/in_container/run_provider_yaml_files_check.py
@@ -293,6 +293,22 @@ def check_hook_classes(yaml_files: dict[str, dict]):
             )
 
 
+def check_plugin_classes(yaml_files: dict[str, dict]):
+    print("Checking plugin classes belong to package, exist and are classes")
+    resource_type = "plugins"
+    for yaml_file_path, provider_data in yaml_files.items():
+        provider_package = pathlib.Path(yaml_file_path).parent.as_posix().replace("/", ".")
+        plugins = provider_data.get(resource_type)
+        if plugins:
+            check_if_objects_exist_and_belong_to_package(
+                {plugin["plugin-class"] for plugin in plugins},
+                provider_package,
+                yaml_file_path,
+                resource_type,
+                ObjectType.CLASS,
+            )
+
+
 def check_extra_link_classes(yaml_files: dict[str, dict]):
     print("Checking extra-links belong to package, exist and are classes")
     resource_type = "extra-links"
@@ -464,6 +480,7 @@ if __name__ == "__main__":
     check_completeness_of_list_of_transfers(all_parsed_yaml_files)
     check_duplicates_in_list_of_transfers(all_parsed_yaml_files)
     check_hook_classes(all_parsed_yaml_files)
+    check_plugin_classes(all_parsed_yaml_files)
     check_extra_link_classes(all_parsed_yaml_files)
     check_correctness_of_list_of_sensors_operators_hook_modules(all_parsed_yaml_files)
     check_unique_provider_name(all_parsed_yaml_files)
diff --git a/setup.py b/setup.py
index 852bd02a40..322440ab98 100644
--- a/setup.py
+++ b/setup.py
@@ -809,6 +809,10 @@ def replace_extra_dependencies_with_provider_packages(extra: str, providers: lis
         EXTRAS_DEPENDENCIES[extra].extend(
             [get_provider_package_name_from_package_id(package_name) for package_name in providers]
         )
+    elif extra == "apache.hive":
+        # We moved the hive macros to the hive provider, and they are available in hive provider only as of
+        # 5.1.0 version only, so we have to make sure minimum version is used
+        EXTRAS_DEPENDENCIES[extra] = ["apache-airflow-providers-hive>=5.1.0"]
     else:
         EXTRAS_DEPENDENCIES[extra] = [
             get_provider_package_name_from_package_id(package_name) for package_name in providers
diff --git a/tests/providers/apache/hive/macros/__init__.py b/tests/providers/apache/hive/macros/__init__.py
new file mode 100644
index 0000000000..217e5db960
--- /dev/null
+++ b/tests/providers/apache/hive/macros/__init__.py
@@ -0,0 +1,17 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/tests/macros/test_hive.py b/tests/providers/apache/hive/macros/test_hive.py
similarity index 97%
rename from tests/macros/test_hive.py
rename to tests/providers/apache/hive/macros/test_hive.py
index b865535bf6..b2bce20f8c 100644
--- a/tests/macros/test_hive.py
+++ b/tests/providers/apache/hive/macros/test_hive.py
@@ -19,7 +19,7 @@ from __future__ import annotations
 
 from datetime import datetime
 
-from airflow.macros import hive
+from airflow.providers.apache.hive.macros import hive
 
 
 class TestHive: