You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by as...@apache.org on 2020/11/09 22:04:15 UTC

[airflow] branch master updated: Adds automated installation of dependent packages (#11526)

This is an automated email from the ASF dual-hosted git repository.

ash pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/master by this push:
     new ea27f90  Adds automated installation of dependent packages (#11526)
ea27f90 is described below

commit ea27f90d299b9585e3d59c2ce4c98054545b34cc
Author: Jarek Potiuk <ja...@polidea.com>
AuthorDate: Mon Nov 9 23:01:19 2020 +0100

    Adds automated installation of dependent packages (#11526)
    
    When extras are specifying when airflow is installed, this one triggers
    installation of dependent packages. Each extra has a set of provider
    packages that are needed by the extra and they will be installed
    automatically if this extra is specified.
    
    For now we do not add any version specificatiion, until we agree the
    process in #11425 and then we should be able to implement an
    automated way of getting information about cross-package
    version dependencies.
    
    Fixes: #11464
---
 CONTRIBUTING.rst                                   |   6 +-
 INSTALL                                            |   6 +-
 .../ci/pre_commit/pre_commit_check_order_setup.py  |  49 +++++++-
 .../in_container/run_prepare_provider_packages.sh  |   2 +-
 setup.py                                           | 126 ++++++++++++++++++++-
 5 files changed, 172 insertions(+), 17 deletions(-)

diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
index cfd1c70..5a4066c 100644
--- a/CONTRIBUTING.rst
+++ b/CONTRIBUTING.rst
@@ -546,9 +546,9 @@ aws, azure, cassandra, celery, cgroups, cloudant, cncf.kubernetes, dask, databri
 devel_hadoop, doc, docker, druid, elasticsearch, exasol, facebook, gcp, gcp_api, github_enterprise,
 google, google_auth, grpc, hashicorp, hdfs, hive, jdbc, jira, kerberos, kubernetes, ldap,
 microsoft.azure, microsoft.mssql, microsoft.winrm, mongo, mssql, mysql, odbc, oracle, pagerduty,
-papermill, password, pinot, plexus, postgres, presto, qds, rabbitmq, redis, salesforce, samba,
-segment, sendgrid, sentry, singularity, slack, snowflake, spark, ssh, statsd, tableau, vertica,
-virtualenv, webhdfs, winrm, yandexcloud, all, devel_ci
+papermill, password, pinot, plexus, postgres, presto, qds, qubole, rabbitmq, redis, salesforce,
+samba, segment, sendgrid, sentry, singularity, slack, snowflake, spark, ssh, statsd, tableau,
+vertica, virtualenv, webhdfs, winrm, yandexcloud, all, devel_ci
 
   .. END EXTRAS HERE
 
diff --git a/INSTALL b/INSTALL
index da6d6ad..d393220 100644
--- a/INSTALL
+++ b/INSTALL
@@ -69,9 +69,9 @@ aws, azure, cassandra, celery, cgroups, cloudant, cncf.kubernetes, dask, databri
 devel_hadoop, doc, docker, druid, elasticsearch, exasol, facebook, gcp, gcp_api, github_enterprise,
 google, google_auth, grpc, hashicorp, hdfs, hive, jdbc, jira, kerberos, kubernetes, ldap,
 microsoft.azure, microsoft.mssql, microsoft.winrm, mongo, mssql, mysql, odbc, oracle, pagerduty,
-papermill, password, pinot, plexus, postgres, presto, qds, rabbitmq, redis, salesforce, samba,
-segment, sendgrid, sentry, singularity, slack, snowflake, spark, ssh, statsd, tableau, vertica,
-virtualenv, webhdfs, winrm, yandexcloud, all, devel_ci
+papermill, password, pinot, plexus, postgres, presto, qds, qubole, rabbitmq, redis, salesforce,
+samba, segment, sendgrid, sentry, singularity, slack, snowflake, spark, ssh, statsd, tableau,
+vertica, virtualenv, webhdfs, winrm, yandexcloud, all, devel_ci
 
 # END EXTRAS HERE
 
diff --git a/scripts/ci/pre_commit/pre_commit_check_order_setup.py b/scripts/ci/pre_commit/pre_commit_check_order_setup.py
index cfcb297..e94109c 100755
--- a/scripts/ci/pre_commit/pre_commit_check_order_setup.py
+++ b/scripts/ci/pre_commit/pre_commit_check_order_setup.py
@@ -19,7 +19,6 @@
 """
 Test for an order of dependencies in setup.py
 """
-
 import os
 import re
 import sys
@@ -28,6 +27,10 @@ from typing import List
 
 errors = []
 
+MY_DIR_PATH = os.path.dirname(__file__)
+SOURCE_DIR_PATH = os.path.abspath(os.path.join(MY_DIR_PATH, os.pardir, os.pardir, os.pardir))
+sys.path.insert(0, SOURCE_DIR_PATH)
+
 
 def _check_list_sorted(the_list: List[str], message: str) -> None:
     sorted_list = sorted(the_list)
@@ -122,9 +125,7 @@ def check_extras_require(setup_context: str) -> None:
     Test for an order of dependencies in function do_setup section
     extras_require in setup.py
     """
-    pattern_extras_requires = re.compile(
-        r'EXTRAS_REQUIREMENTS: Dict\[str, Iterable\[str\]] = {(.*?)}', re.DOTALL
-    )
+    pattern_extras_requires = re.compile(r'EXTRAS_REQUIREMENTS: Dict\[str, List\[str\]] = {(.*?)}', re.DOTALL)
     extras_requires = pattern_extras_requires.findall(setup_context)[0]
 
     pattern_dependent = re.compile('\'(.*?)\'')
@@ -137,16 +138,50 @@ def check_provider_requirements(setup_context: str) -> None:
     Test for an order of dependencies in function do_setup section
     providers_require in setup.py
     """
-    pattern_extras_requires = re.compile(
+    pattern_extras_providers_packages = re.compile(
         r'PROVIDERS_REQUIREMENTS: Dict\[str, Iterable\[str\]\] = {(.*?)}', re.DOTALL
     )
-    extras_requires = pattern_extras_requires.findall(setup_context)[0]
+    extras_requires = pattern_extras_providers_packages.findall(setup_context)[0]
 
     pattern_dependent = re.compile('"(.*?)"')
     src = pattern_dependent.findall(extras_requires)
     _check_list_sorted(src, "Order of dependencies in: providers_require")
 
 
+def check_extras_provider_packages(setup_context: str) -> None:
+    """
+    Test for an order of dependencies in function do_setup section
+    providers_require in setup.py
+    """
+    pattern_extras_requires = re.compile(
+        r'EXTRAS_PROVIDERS_PACKAGES: Dict\[str, Iterable\[str\]\] = {(.*?)}', re.DOTALL
+    )
+    extras_requires = pattern_extras_requires.findall(setup_context)[0]
+
+    pattern_dependent = re.compile('"(.*?)":')
+    src = pattern_dependent.findall(extras_requires)
+    _check_list_sorted(src, "Order of dependencies in: extras_provider_packages")
+
+
+def checks_extra_with_providers_exist() -> None:
+
+    from setup import EXTRAS_REQUIREMENTS, EXTRAS_PROVIDERS_PACKAGES  # noqa # isort:skip
+
+    message = 'Check if all extras have providers defined in: EXTRAS_PROVIDERS_PACKAGES'
+    local_error = False
+    for key in EXTRAS_REQUIREMENTS.keys():  # noqa
+        if key not in EXTRAS_PROVIDERS_PACKAGES.keys():  # noqa
+            if not local_error:
+                local_error = True
+                print(f"Extra {key} NOK")
+            errors.append(
+                f"ERROR in {message}. The {key} extras is missing there."
+                " If you do not want to install any providers with this extra set it to []"
+            )
+    if not local_error:
+        print(f"{message} is ok")
+
+
 if __name__ == '__main__':
     setup_context_main = setup()
     check_main_dependent_group(setup_context_main)
@@ -155,6 +190,8 @@ if __name__ == '__main__':
     check_install_and_setup_requires(setup_context_main)
     check_extras_require(setup_context_main)
     check_provider_requirements(setup_context_main)
+    check_extras_provider_packages(setup_context_main)
+    checks_extra_with_providers_exist()
 
     print()
     print()
diff --git a/scripts/in_container/run_prepare_provider_packages.sh b/scripts/in_container/run_prepare_provider_packages.sh
index f10f29b..75c1d09 100755
--- a/scripts/in_container/run_prepare_provider_packages.sh
+++ b/scripts/in_container/run_prepare_provider_packages.sh
@@ -24,7 +24,7 @@ LIST_OF_DIRS_FILE=$(mktemp)
 
 cd "${AIRFLOW_SOURCES}/airflow/providers" || exit 1
 
-find . -type d | sed 's/.\///; s/\//\./g' | grep -E 'hooks|operators|sensors|secrets' \
+find . -type d | sed 's/.\///; s/\//\./g' | grep -E 'hooks|operators|sensors|secrets|utils' \
     > "${LIST_OF_DIRS_FILE}"
 
 cd "${AIRFLOW_SOURCES}/provider_packages" || exit 1
diff --git a/setup.py b/setup.py
index cb5a3ef..5720cde 100644
--- a/setup.py
+++ b/setup.py
@@ -352,7 +352,7 @@ postgres = [
     'psycopg2-binary>=2.7.4',
 ]
 presto = ['presto-python-client>=0.7.0,<0.8']
-qds = [
+qubole = [
     'qds-sdk>=1.10.4',
 ]
 rabbitmq = [
@@ -540,11 +540,12 @@ PROVIDERS_REQUIREMENTS: Dict[str, Iterable[str]] = {
     "plexus": plexus,
     "postgres": postgres,
     "presto": presto,
-    "qubole": qds,
+    "qubole": qubole,
     "redis": redis,
     "salesforce": salesforce,
     "samba": samba,
     "segment": segment,
+    "sendgrid": sendgrid,
     "sftp": ssh,
     "singularity": singularity,
     "slack": slack,
@@ -556,7 +557,7 @@ PROVIDERS_REQUIREMENTS: Dict[str, Iterable[str]] = {
     "zendesk": zendesk,
 }
 
-EXTRAS_REQUIREMENTS: Dict[str, Iterable[str]] = {
+EXTRAS_REQUIREMENTS: Dict[str, List[str]] = {
     'all_dbs': all_dbs,
     'amazon': amazon,
     'apache.atlas': atlas,
@@ -619,7 +620,8 @@ EXTRAS_REQUIREMENTS: Dict[str, Iterable[str]] = {
     'plexus': plexus,
     'postgres': postgres,
     'presto': presto,
-    'qds': qds,
+    'qds': qubole,  # TODO: remove this in Airflow 2.1
+    'qubole': qubole,
     'rabbitmq': rabbitmq,
     'redis': redis,
     'salesforce': salesforce,
@@ -641,6 +643,111 @@ EXTRAS_REQUIREMENTS: Dict[str, Iterable[str]] = {
     'yandexcloud': yandexcloud,
 }
 
+EXTRAS_PROVIDERS_PACKAGES: Dict[str, Iterable[str]] = {
+    'all': list(PROVIDERS_REQUIREMENTS.keys()),
+    # this is not 100% accurate with devel_ci definition, but we really want to have all providers
+    # when devel_ci extra is installed!
+    'devel_ci': list(PROVIDERS_REQUIREMENTS.keys()),
+    'all_dbs': [
+        "apache.cassandra",
+        "apache.druid",
+        "apache.hdfs",
+        "apache.hive",
+        "apache.pinot",
+        "cloudant",
+        "exasol",
+        "mongo",
+        "microsoft.mssql",
+        "mysql",
+        "postgres",
+        "presto",
+        "vertica",
+    ],
+    'amazon': ["amazon"],
+    'apache.atlas': [],
+    'apache.beam': [],
+    "apache.cassandra": ["apache.cassandra"],
+    "apache.druid": ["apache.druid"],
+    "apache.hdfs": ["apache.hdfs"],
+    "apache.hive": ["apache.hive"],
+    "apache.kylin": ["apache.kylin"],
+    "apache.pinot": ["apache.pinot"],
+    "apache.presto": ["apache.presto"],
+    "apache.spark": ["apache.spark"],
+    "apache.webhdfs": ["apache.hdfs"],
+    'async': [],
+    'atlas': [],  # TODO: remove this in Airflow 2.1
+    'aws': ["amazon"],  # TODO: remove this in Airflow 2.1
+    'azure': ["microsoft.azure"],  # TODO: remove this in Airflow 2.1
+    'cassandra': ["apache.cassandra"],  # TODO: remove this in Airflow 2.1
+    'celery': ["celery"],
+    'cgroups': [],
+    'cloudant': ["cloudant"],
+    'cncf.kubernetes': ["cncf.kubernetes"],
+    'dask': ["dask"],
+    'databricks': ["databricks"],
+    'datadog': ["datadog"],
+    'devel': ["cncf.kubernetes", "mysql"],
+    'devel_hadoop': ["apache.hdfs", "apache.hive", "presto"],
+    'doc': [],
+    'docker': ["docker"],
+    'druid': ["apache.druid"],  # TODO: remove this in Airflow 2.1
+    'elasticsearch': ["elasticsearch"],
+    'exasol': ["exasol"],
+    'facebook': ["facebook"],
+    'gcp': ["google"],  # TODO: remove this in Airflow 2.1
+    'gcp_api': ["google"],  # TODO: remove this in Airflow 2.1
+    'github_enterprise': [],
+    'google': ["google"],
+    'google_auth': [],
+    'grpc': ["grpc"],
+    'hashicorp': ["hashicorp"],
+    'hdfs': ["apache.hdfs"],  # TODO: remove this in Airflow 2.1
+    'hive': ["apache.hive"],  # TODO: remove this in Airflow 2.1
+    'jdbc': ["jdbc"],
+    'jira': ["jira"],
+    'kerberos': [],
+    'kubernetes': ["cncf.kubernetes"],  # TODO: remove this in Airflow 2.1
+    'ldap': [],
+    "microsoft.azure": ["microsoft.azure"],
+    "microsoft.mssql": ["microsoft.mssql"],
+    "microsoft.winrm": ["microsoft.winrm"],
+    'mongo': ["mongo"],
+    'mssql': ["microsoft.mssql"],  # TODO: remove this in Airflow 2.1
+    'mysql': ["microsoft.mssql"],
+    'odbc': ["odbc"],
+    'oracle': ["oracle"],
+    'pagerduty': ["pagerduty"],
+    'papermill': ["papermill"],
+    'password': [],
+    'pinot': ["apache.pinot"],  # TODO: remove this in Airflow 2.1
+    'plexus': ["plexus"],
+    'postgres': ["postgres"],
+    'presto': ["presto"],
+    'qds': ["qubole"],  # TODO: remove this in Airflow 2.1
+    'qubole': ["qubole"],
+    'rabbitmq': ["rabbitmq"],
+    'redis': ["redis"],
+    'salesforce': ["salesforce"],
+    'samba': ["samba"],
+    'segment': ["segment"],
+    'sendgrid': ["sendgrid"],
+    'sentry': ["sentry"],
+    'singularity': ["singularity"],
+    'slack': ["slack"],
+    'snowflake': ["snowflake"],
+    'spark': ["spark"],
+    'ssh': ["ssh"],
+    'statsd': ["statsd"],
+    'tableau': ["tableau"],
+    'vertica': ["vertica"],
+    'virtualenv': ["virtualenv"],
+    'webhdfs': ["apache.hdfs"],  # TODO: remove this in Airflow 2.1
+    'winrm': ["microsoft.winrm"],  # TODO: remove this in Airflow 2.1
+    'yandexcloud': ["yandexcloud"],
+}
+
+
 # Make devel_all contain all providers + extras + unique
 devel_all = list(
     set(
@@ -759,6 +866,17 @@ INSTALL_REQUIREMENTS = [
 ]
 
 
+def get_provider_package_from_package_id(package_id: str):
+    """
+    Builds the name of provider package out of the package id provided/
+
+    :param package_id: id of the package (like amazon or microsoft.azure)
+    :return: full name of package in PyPI
+    """
+    package_suffix = package_id.replace(".", "-")
+    return f"apache-airflow-providers-{package_suffix}"
+
+
 def do_setup():
     """Perform the Airflow package setup."""
     install_providers_from_sources = os.getenv('INSTALL_PROVIDERS_FROM_SOURCES')