You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by po...@apache.org on 2021/06/22 19:25:18 UTC

[airflow] 40/47: Prepare for Python 3.9 support (#16536)

This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch v2-1-test
in repository https://gitbox.apache.org/repos/asf/airflow.git

commit f5d73cf406945d7680e810af3c30af0dc80cba0c
Author: Jarek Potiuk <ja...@potiuk.com>
AuthorDate: Sat Jun 19 22:10:17 2021 +0200

    Prepare for Python 3.9 support (#16536)
    
    This is the first step to add Python 3.9 support to Airflow.
    Hive should be excluded in this version because it requires sasl
    library which for now does not support Python 3.9.
    
    Until the https://github.com/dropbox/PyHive/issues/380 is solved
    we will exclude hive provider. This will be the next step
    to add full support and exclusion but we need to merge it first
    in order to be able to build image from `main`.
    
    Dockerfiles have been updated to remove some obsolete limits and
    test dask executor was disabled conditionally in case
    distributed framework cannot be imported.
    
    (cherry picked from commit 2d85a95e18afc8e4a0ac14968ede63df07a8f8f4)
---
 Dockerfile                            |  4 ++--
 Dockerfile.ci                         |  4 ++--
 setup.py                              | 25 +++++++++++++++++++++----
 tests/executors/test_dask_executor.py |  9 ++++++++-
 4 files changed, 33 insertions(+), 9 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 8b36e00..39a13dc 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -232,11 +232,11 @@ ARG INSTALL_FROM_DOCKER_CONTEXT_FILES=""
 ARG INSTALL_FROM_PYPI="true"
 # Those are additional constraints that are needed for some extras but we do not want to
 # Force them on the main Airflow package.
-# * chardet<4 - required to keep snowflake happy
+# * chardet<4 and certifi<2021.0.0 required to keep snowflake happy
 # * urllib3 - required to keep boto3 happy
 # * pyjwt<2.0.0: flask-jwt-extended requires it
 # * dill<0.3.3 required by apache-beam
-ARG EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS="chardet<4 urllib3<1.26 pyjwt<2.0.0 dill<0.3.3"
+ARG EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS="chardet<4 urllib3<1.26 pyjwt<2.0.0 dill<0.3.3 certifi<2021.0.0"
 ARG CONTINUE_ON_PIP_CHECK_FAILURE="false"
 
 
diff --git a/Dockerfile.ci b/Dockerfile.ci
index 14fe753..29438c7 100644
--- a/Dockerfile.ci
+++ b/Dockerfile.ci
@@ -251,13 +251,13 @@ ENV AIRFLOW_REPO=${AIRFLOW_REPO}\
 
 # Those are additional constraints that are needed for some extras but we do not want to
 # force them on the main Airflow package. Those limitations are:
-# * chardet<4: required by snowflake provider
+# * chardet<4 and certifi<2021.0.0: required by snowflake provider
 # * lazy-object-proxy<1.5.0: required by astroid
 # * pyOpenSSL: required by snowflake provider https://github.com/snowflakedb/snowflake-connector-python/blob/v2.3.6/setup.py#L201
 # * urllib3<1.26: Required to keep boto3 happy
 # * pyjwt<2.0.0: flask-jwt-extended requires it
 # * dill<0.3.3 required by apache-beam
-ARG EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS="chardet<4 lazy-object-proxy<1.5.0 pyOpenSSL<20.0.0 urllib3<1.26 pyjwt<2.0.0 dill<0.3.3"
+ARG EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS="chardet<4 lazy-object-proxy<1.5.0 pyOpenSSL<20.0.0 urllib3<1.26 pyjwt<2.0.0 dill<0.3.3 certifi<2021.0.0"
 ARG UPGRADE_TO_NEWER_DEPENDENCIES="false"
 ENV EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS=${EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS} \
     UPGRADE_TO_NEWER_DEPENDENCIES=${UPGRADE_TO_NEWER_DEPENDENCIES}
diff --git a/setup.py b/setup.py
index af6b63e..de1c405 100644
--- a/setup.py
+++ b/setup.py
@@ -20,6 +20,7 @@ import glob
 import logging
 import os
 import subprocess
+import sys
 import unittest
 from copy import deepcopy
 from distutils import log
@@ -36,6 +37,7 @@ from setuptools.command.install import install as install_orig
 # And it is particularly useful when you add a new provider and there is no
 # PyPI version to install the provider package from
 INSTALL_PROVIDERS_FROM_SOURCES = 'INSTALL_PROVIDERS_FROM_SOURCES'
+PY39 = sys.version_info >= (3, 9)
 
 logger = logging.getLogger(__name__)
 
@@ -241,7 +243,7 @@ cloudant = [
 dask = [
     'cloudpickle>=1.4.1, <1.5.0',
     'dask<2021.3.1;python_version<"3.7"',  # dask stopped supporting python 3.6 in 2021.3.1 version
-    'dask>=2.9.0;python_version>="3.7"',
+    'dask>=2.9.0, <2021.6.1;python_version>="3.7"',  # dask 2021.6.1 does not work with `distributed`
     'distributed>=2.11.1, <2.20',
 ]
 databricks = [
@@ -340,7 +342,7 @@ hdfs = [
 ]
 hive = [
     'hmsclient>=0.1.0',
-    'pyhive[hive]>=0.6.0',
+    'pyhive[hive]>=0.6.0;python_version<"3.9"',
     'thrift>=0.9.2',
 ]
 http = [
@@ -642,7 +644,6 @@ CORE_EXTRAS_REQUIREMENTS: Dict[str, List[str]] = {
     'virtualenv': virtualenv,
 }
 
-
 EXTRAS_REQUIREMENTS: Dict[str, List[str]] = deepcopy(CORE_EXTRAS_REQUIREMENTS)
 
 
@@ -847,9 +848,25 @@ def get_provider_package_from_package_id(package_id: str):
     return f"apache-airflow-providers-{package_suffix}"
 
 
+def get_excluded_providers():
+    """
+    Returns packages excluded for the current python version.
+
+    Currently the only excluded provider is apache hive for Python 3.9.
+    Until https://github.com/dropbox/PyHive/issues/380 is fixed.
+
+    """
+    return ['apache.hive'] if PY39 else []
+
+
 def get_all_provider_packages():
     """Returns all provider packages configured in setup.py"""
-    return " ".join([get_provider_package_from_package_id(package) for package in PROVIDERS_REQUIREMENTS])
+    excluded_providers = get_excluded_providers()
+    return " ".join(
+        get_provider_package_from_package_id(package)
+        for package in PROVIDERS_REQUIREMENTS
+        if package not in excluded_providers
+    )
 
 
 class AirflowDistribution(Distribution):
diff --git a/tests/executors/test_dask_executor.py b/tests/executors/test_dask_executor.py
index d23f94b..5239f41 100644
--- a/tests/executors/test_dask_executor.py
+++ b/tests/executors/test_dask_executor.py
@@ -19,6 +19,8 @@ import unittest
 from datetime import timedelta
 from unittest import mock
 
+import pytest
+
 from airflow.jobs.backfill_job import BackfillJob
 from airflow.models import DagBag
 from airflow.utils import timezone
@@ -32,8 +34,10 @@ try:
     from distributed.utils_test import cluster as dask_testing_cluster, get_cert, tls_security
 
     from airflow.executors.dask_executor import DaskExecutor
+
+    skip_tls_tests = False
 except ImportError:
-    pass
+    skip_tls_tests = True
 
 DEFAULT_DATE = timezone.datetime(2017, 1, 1)
 
@@ -98,6 +102,9 @@ class TestDaskExecutor(TestBaseDask):
         self.cluster.close(timeout=5)
 
 
+@pytest.mark.skipif(
+    skip_tls_tests, reason="The tests are skipped because distributed framework could not be imported"
+)
 class TestDaskExecutorTLS(TestBaseDask):
     def setUp(self):
         self.dagbag = DagBag(include_examples=True)