You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by po...@apache.org on 2021/01/16 11:53:14 UTC

[airflow] branch master updated: Switches to latest version of snowflake connector (#13654)

This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/master by this push:
     new 6e90dfc  Switches to latest version of snowflake connector (#13654)
6e90dfc is described below

commit 6e90dfc38b1bf222f47acc2beb1a6c7ceccdc8dc
Author: Jarek Potiuk <ja...@polidea.com>
AuthorDate: Sat Jan 16 12:52:56 2021 +0100

    Switches to latest version of snowflake connector (#13654)
    
    This should allow us to release a new version of snowflake
    provider that is not interacting with other providers via
    monkeypatching of SSL classes.
    
    Fixes #12881
---
 Dockerfile                                         | 18 +++++-----
 Dockerfile.ci                                      | 26 +++++++--------
 IMAGES.rst                                         |  3 --
 scripts/in_container/_in_container_utils.sh        | 14 ++++++++
 scripts/in_container/run_ci_tests.sh               |  2 ++
 .../run_install_and_test_provider_packages.sh      |  9 ++---
 .../in_container/run_prepare_provider_readme.sh    |  6 ++--
 setup.py                                           | 38 +++-------------------
 tests/providers/presto/hooks/test_presto.py        | 17 ----------
 9 files changed, 45 insertions(+), 88 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 50d5895..496fa20 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -192,20 +192,23 @@ ENV AIRFLOW_PRE_CACHED_PIP_PACKAGES=${AIRFLOW_PRE_CACHED_PIP_PACKAGES}
 ARG INSTALL_PROVIDERS_FROM_SOURCES="false"
 ENV INSTALL_PROVIDERS_FROM_SOURCES=${INSTALL_PROVIDERS_FROM_SOURCES}
 
-# Increase the value here to force reinstalling Apache Airflow pip dependencies
-ARG PIP_DEPENDENCIES_EPOCH_NUMBER="1"
-ENV PIP_DEPENDENCIES_EPOCH_NUMBER=${PIP_DEPENDENCIES_EPOCH_NUMBER}
-
 # Only copy install_airflow_from_latest_master.sh to not invalidate cache on other script changes
 COPY scripts/docker/install_airflow_from_latest_master.sh /scripts/docker/install_airflow_from_latest_master.sh
 # fix permission issue in Azure DevOps when running the script
 RUN chmod a+x /scripts/docker/install_airflow_from_latest_master.sh
 
+# By default we do not upgrade to latest dependencies
+ARG UPGRADE_TO_NEWER_DEPENDENCIES="false"
+ENV UPGRADE_TO_NEWER_DEPENDENCIES=${UPGRADE_TO_NEWER_DEPENDENCIES}
+
 # In case of Production build image segment we want to pre-install master version of airflow
 # dependencies from GitHub so that we do not have to always reinstall it from the scratch.
 # The Airflow (and providers in case INSTALL_PROVIDERS_FROM_SOURCES is "false")
 # are uninstalled, only dependencies remain
-RUN if [[ ${AIRFLOW_PRE_CACHED_PIP_PACKAGES} == "true" ]]; then \
+# the cache is only used when "upgrade to newer dependencies" is not set to automatically
+# account for removed dependencies (we do not install them in the first place)
+RUN if [[ ${AIRFLOW_PRE_CACHED_PIP_PACKAGES} == "true" && \
+          ${UPGRADE_TO_NEWER_DEPENDENCIES} == "false" ]]; then \
         /scripts/docker/install_airflow_from_latest_master.sh; \
     fi
 
@@ -255,13 +258,10 @@ ENV INSTALL_FROM_DOCKER_CONTEXT_FILES=${INSTALL_FROM_DOCKER_CONTEXT_FILES}
 ARG INSTALL_FROM_PYPI="true"
 ENV INSTALL_FROM_PYPI=${INSTALL_FROM_PYPI}
 
-# By default we do not upgrade to latest dependencies
-ARG UPGRADE_TO_NEWER_DEPENDENCIES="false"
-ENV UPGRADE_TO_NEWER_DEPENDENCIES=${UPGRADE_TO_NEWER_DEPENDENCIES}
-
 # Those are additional constraints that are needed for some extras but we do not want to
 # Force them on the main Airflow package.
 # * urllib3 - required to keep boto3 happy
+# * chardet<4 - required to keep snowflake happy
 ARG EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS="urllib3<1.26 chardet<4"
 
 WORKDIR /opt/airflow
diff --git a/Dockerfile.ci b/Dockerfile.ci
index 672ab67..c0ad778 100644
--- a/Dockerfile.ci
+++ b/Dockerfile.ci
@@ -286,22 +286,23 @@ RUN echo "Pip no cache dir: ${PIP_NO_CACHE_DIR}"
 
 RUN pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"
 
-# Increase the value here to force reinstalling Apache Airflow pip dependencies
-ARG PIP_DEPENDENCIES_EPOCH_NUMBER="5"
-ENV PIP_DEPENDENCIES_EPOCH_NUMBER=${PIP_DEPENDENCIES_EPOCH_NUMBER}
-
 # Only copy install_airflow_from_latest_master.sh to not invalidate cache on other script changes
 COPY scripts/docker/install_airflow_from_latest_master.sh /scripts/docker/install_airflow_from_latest_master.sh
 # fix permission issue in Azure DevOps when running the script
 RUN chmod a+x /scripts/docker/install_airflow_from_latest_master.sh
 
+ARG UPGRADE_TO_NEWER_DEPENDENCIES="false"
+ENV UPGRADE_TO_NEWER_DEPENDENCIES=${UPGRADE_TO_NEWER_DEPENDENCIES}
+
 # In case of CI builds we want to pre-install master version of airflow dependencies so that
 # We do not have to always reinstall it from the scratch.
-# This can be reinstalled from latest master by increasing PIP_DEPENDENCIES_EPOCH_NUMBER.
 # And is automatically reinstalled from the scratch every time patch release of python gets released
 # The Airflow (and providers in case INSTALL_PROVIDERS_FROM_SOURCES is "false")
-# are uninstalled, only dependencies remain
-RUN if [[ ${AIRFLOW_PRE_CACHED_PIP_PACKAGES} == "true" ]]; then \
+# are uninstalled, only dependencies remain.
+# the cache is only used when "upgrade to newer dependencies" is not set to automatically
+# account for removed dependencies (we do not install them in the first place)
+RUN if [[ ${AIRFLOW_PRE_CACHED_PIP_PACKAGES} == "true" && \
+          ${UPGRADE_TO_NEWER_DEPENDENCIES} == "false" ]]; then \
         /scripts/docker/install_airflow_from_latest_master.sh; \
     fi
 
@@ -331,18 +332,13 @@ COPY setup.cfg ${AIRFLOW_SOURCES}/setup.cfg
 
 COPY airflow/__init__.py ${AIRFLOW_SOURCES}/airflow/__init__.py
 
-ARG UPGRADE_TO_NEWER_DEPENDENCIES="false"
-ENV UPGRADE_TO_NEWER_DEPENDENCIES=${UPGRADE_TO_NEWER_DEPENDENCIES}
-
 # Those are additional constraints that are needed for some extras but we do not want to
 # Force them on the main Airflow package. Those limitations are:
-# * chardet,<4: required by snowflake provider https://github.com/snowflakedb/snowflake-connector-python/blob/v2.3.6/setup.py#L201
+# * chardet,<4: required by snowflake provider
 # * lazy-object-proxy<1.5.0: required by astroid
 # * pyOpenSSL: Imposed by snowflake provider https://github.com/snowflakedb/snowflake-connector-python/blob/v2.3.6/setup.py#L201
-# * requests>=2.20.0,<2.24.0: required by snowflake provider https://github.com/snowflakedb/snowflake-connector-python/blob/v2.3.6/setup.py#L201
-# * urllib3<1.26: Required to keep boto3 and snowflake happy
-
-ARG EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS="chardet<4 lazy-object-proxy<1.5.0 pyOpenSSL<20.0.0 requests>=2.20.0,<2.24.0 urllib3<1.26"
+# * urllib3<1.26: Required to keep boto3 happy
+ARG EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS="chardet<4 lazy-object-proxy<1.5.0 pyOpenSSL<20.0.0 urllib3<1.26"
 
 ARG CONTINUE_ON_PIP_CHECK_FAILURE="false"
 
diff --git a/IMAGES.rst b/IMAGES.rst
index cd72a06..2de6e99 100644
--- a/IMAGES.rst
+++ b/IMAGES.rst
@@ -401,9 +401,6 @@ The following build arguments (``--build-arg`` in docker build command) can be u
 +------------------------------------------+------------------------------------------+------------------------------------------+
 | ``AIRFLOW_SOURCES``                      | ``/opt/airflow``                         | Mounted sources of Airflow               |
 +------------------------------------------+------------------------------------------+------------------------------------------+
-| ``PIP_DEPENDENCIES_EPOCH_NUMBER``        | ``3``                                    | increasing that number will reinstall    |
-|                                          |                                          | all PIP dependencies                     |
-+------------------------------------------+------------------------------------------+------------------------------------------+
 | ``CASS_DRIVER_NO_CYTHON``                | ``1``                                    | if set to 1 no CYTHON compilation is     |
 |                                          |                                          | done for cassandra driver (much faster)  |
 +------------------------------------------+------------------------------------------+------------------------------------------+
diff --git a/scripts/in_container/_in_container_utils.sh b/scripts/in_container/_in_container_utils.sh
index cd3dbf5..a811e21 100644
--- a/scripts/in_container/_in_container_utils.sh
+++ b/scripts/in_container/_in_container_utils.sh
@@ -302,8 +302,22 @@ function install_airflow_from_sdist() {
     pip install "${airflow_package}${1}" >"${OUTPUT_PRINTED_ONLY_ON_ERROR}" 2>&1
 }
 
+function reinstall_azure_storage_blob() {
+    group_start "Reinstalls azure-storage-blob (temporary workaround)"
+    # Reinstall azure-storage-blob here until https://github.com/apache/airflow/pull/12188 is solved
+    # Azure-storage-blob need to be reinstalled to overwrite azure-storage-blob installed by old version
+    # of the `azure-storage` library
+    echo
+    echo "Reinstalling azure-storage-blob"
+    echo
+    pip install azure-storage-blob --no-deps --force-reinstall
+    group_end
+}
+
 function install_remaining_dependencies() {
+    group_start "Installs all remaining dependencies that are not installed by '${AIRFLOW_EXTRAS}' "
     pip install apache-beam[gcp] >"${OUTPUT_PRINTED_ONLY_ON_ERROR}" 2>&1
+    group_end
 }
 
 function uninstall_airflow() {
diff --git a/scripts/in_container/run_ci_tests.sh b/scripts/in_container/run_ci_tests.sh
index c5348f7..94a7879 100755
--- a/scripts/in_container/run_ci_tests.sh
+++ b/scripts/in_container/run_ci_tests.sh
@@ -18,6 +18,8 @@
 # shellcheck source=scripts/in_container/_in_container_script_init.sh
 . "$( dirname "${BASH_SOURCE[0]}" )/_in_container_script_init.sh"
 
+reinstall_azure_storage_blob
+
 echo
 echo "Starting the tests with those pytest arguments:" "${@}"
 echo
diff --git a/scripts/in_container/run_install_and_test_provider_packages.sh b/scripts/in_container/run_install_and_test_provider_packages.sh
index ef2227c..609d004 100755
--- a/scripts/in_container/run_install_and_test_provider_packages.sh
+++ b/scripts/in_container/run_install_and_test_provider_packages.sh
@@ -75,12 +75,6 @@ function install_airflow_as_specified() {
     group_end
 }
 
-function install_deps() {
-    group_start "Installs all remaining dependencies that are not installed by '${AIRFLOW_EXTRAS}' "
-    install_remaining_dependencies
-    group_end
-}
-
 function install_provider_packages() {
     group_start "Install provider packages"
     if [[ ${PACKAGE_FORMAT} == "wheel" ]]; then
@@ -202,7 +196,8 @@ function discover_all_field_behaviours() {
 setup_provider_packages
 verify_parameters
 install_airflow_as_specified
-install_deps
+install_remaining_dependencies
+reinstall_azure_storage_blob
 install_provider_packages
 import_all_provider_classes
 
diff --git a/scripts/in_container/run_prepare_provider_readme.sh b/scripts/in_container/run_prepare_provider_readme.sh
index d98de31..69c42fe 100755
--- a/scripts/in_container/run_prepare_provider_readme.sh
+++ b/scripts/in_container/run_prepare_provider_readme.sh
@@ -30,10 +30,8 @@ verify_suffix_versions_for_package_preparation
 pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"
 
 # TODO: remove it when devel_all == devel_ci
-echo
-echo "Installing remaining packages from 'all' extras"
-echo
-pip install ".[devel_all]"
+install_remaining_dependencies
+reinstall_azure_storage_blob
 
 cd "${AIRFLOW_SOURCES}/provider_packages" || exit 1
 
diff --git a/setup.py b/setup.py
index 46a2128..c571068 100644
--- a/setup.py
+++ b/setup.py
@@ -429,14 +429,10 @@ snowflake = [
     # once it is merged, we can move those two back to `azure` extra.
     'azure-storage-blob',
     'azure-storage-common',
-    # snowflake is not compatible with latest version.
-    # This library monkey patches the requests library, so SSL is broken globally.
-    # See: https://github.com/snowflakedb/snowflake-connector-python/issues/324
-    'requests<2.24.0',
-    # Newest version drop support for old version of azure-storage-blob
-    # Until #12188 is solved at least we need to limit maximum version.
-    # https://github.com/apache/airflow/pull/12188
-    'snowflake-connector-python>=1.5.2,<=2.3.6',
+    # Snowflake conector > 2.3.8 is needed because it has vendored urrllib3 and requests libraries which
+    # are monkey-patched. In earlier versions of the library, monkeypatching the libraries by snowflake
+    # caused other providers to fail (Google, Amazon etc.)
+    'snowflake-connector-python>=2.3.8',
     'snowflake-sqlalchemy>=1.1.0',
 ]
 spark = [
@@ -473,11 +469,6 @@ zendesk = [
 ]
 # End dependencies group
 
-############################################################################################################
-# IMPORTANT NOTE!!!!!!!!!!!!!!!
-# IF you are removing dependencies from this list, please make sure that you also increase
-# PIP_DEPENDENCIES_EPOCH_NUMBER in the Dockerfile.ci and Dockerfile
-############################################################################################################
 devel = [
     'beautifulsoup4~=4.7.1',
     'black',
@@ -522,26 +513,9 @@ devel = [
     'yamllint',
 ]
 
-############################################################################################################
-# IMPORTANT NOTE!!!!!!!!!!!!!!!
-# If you are removing dependencies from the above list, please make sure that you also increase
-# PIP_DEPENDENCIES_EPOCH_NUMBER in the Dockerfile.ci and Dockerfile
-############################################################################################################
-
 devel_minreq = cgroups + devel + doc + kubernetes + mysql + password
 devel_hadoop = devel_minreq + hdfs + hive + kerberos + presto + webhdfs
 
-############################################################################################################
-# IMPORTANT NOTE!!!!!!!!!!!!!!!
-# If you have a 'pip check' problem with dependencies, it might be because some dependency has been
-# installed via 'install_requires' in setup.cfg in higher version than required in one of the options below.
-# For example pip check was failing with requests=2.25.1 installed even if in some dependencies below
-# < 2.24.0 was specified for it. Solution in such case is to add such limiting requirement to
-# install_requires in setup.cfg (we've added requests<2.24.0 there to limit requests library).
-# This should be done with appropriate comment explaining why the requirement was added.
-############################################################################################################
-
-
 # Dict of all providers which are part of the Apache Airflow repository together with their requirements
 PROVIDERS_REQUIREMENTS: Dict[str, List[str]] = {
     'amazon': amazon,
@@ -642,7 +616,6 @@ def add_extras_for_all_providers() -> None:
 add_extras_for_all_providers()
 
 #############################################################################################################
-#############################################################################################################
 #  The whole section can be removed in Airflow 3.0 as those old aliases are deprecated in 2.* series
 #############################################################################################################
 
@@ -750,10 +723,9 @@ PACKAGES_EXCLUDED_FOR_ALL.extend(
 
 # Those packages are excluded because they break tests (downgrading mock and few other requirements)
 # and they are not needed to run our test suite. This can be removed as soon as we get non-conflicting
-# requirements for the apache-beam as well. This waits for azure + snowflake fixes:
+# requirements for the apache-beam as well. This waits for azure fixes:
 #
 # * Azure: https://github.com/apache/airflow/issues/11968
-# * Snowflake: https://github.com/apache/airflow/issues/12881
 #
 PACKAGES_EXCLUDED_FOR_CI = [
     'apache-beam',
diff --git a/tests/providers/presto/hooks/test_presto.py b/tests/providers/presto/hooks/test_presto.py
index a729d6f..02d2a62 100644
--- a/tests/providers/presto/hooks/test_presto.py
+++ b/tests/providers/presto/hooks/test_presto.py
@@ -217,23 +217,6 @@ class TestPrestoHookIntegration(unittest.TestCase):
         records = hook.get_records(sql)
         self.assertEqual([['Customer#000000001'], ['Customer#000000002'], ['Customer#000000003']], records)
 
-    @pytest.mark.xfail(
-        condition=True,
-        reason="""
-This test will fail when full suite of tests are run, because of Snowflake monkeypatching urllib3
-library as described in https://github.com/snowflakedb/snowflake-connector-python/issues/324
-the offending code is here:
-https://github.com/snowflakedb/snowflake-connector-python/blob/133d6215f7920d304c5f2d466bae38127c1b836d/src/snowflake/connector/network.py#L89-L92
-
-This test however runs fine when run in total isolation. We could move it to Heisentests, but then we would
-have to enable integrations there and make sure no snowflake gets imported.
-
-In the future Snowflake plans to get rid of the MonkeyPatching.
-
-Issue to keep track of it: https://github.com/apache/airflow/issues/12881
-
-""",
-    )
     @pytest.mark.integration("presto")
     @pytest.mark.integration("kerberos")
     def test_should_record_records_with_kerberos_auth(self):