You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by po...@apache.org on 2021/09/21 09:28:09 UTC

[airflow] branch main updated: Production-level support for MSSQL (#18382)

This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new ab7acfd  Production-level support for MSSQL (#18382)
ab7acfd is described below

commit ab7acfd7df25cd1a4c739bf99186267356f1eda6
Author: Jarek Potiuk <ja...@potiuk.com>
AuthorDate: Tue Sep 21 11:27:47 2021 +0200

    Production-level support for MSSQL (#18382)
    
    MSSQL has been somewhat experimental in the `main` branch, but as
    we near releasing for 2.2.0 version, the image should support
    the mssql at the level as it supports other databases.
    
    This PR adds proper support for both PROD and CI images.
---
 Dockerfile                                         | 15 +++++++--
 Dockerfile.ci                                      | 15 ++-------
 breeze                                             |  5 +++
 docs/apache-airflow/howto/set-up-database.rst      | 15 ++++++---
 docs/docker-stack/build-arg-ref.rst                |  2 ++
 docs/docker-stack/build.rst                        |  4 ++-
 .../restricted/restricted_environments.sh          |  1 +
 scripts/ci/libraries/_build_images.sh              |  2 ++
 scripts/ci/libraries/_initialization.sh            |  2 ++
 scripts/docker/common.sh                           |  1 +
 scripts/docker/install_mssql.sh                    | 36 ++++++++++++++++++++++
 scripts/docker/install_mysql.sh                    |  9 +-----
 scripts/in_container/prod/entrypoint_prod.sh       |  2 ++
 13 files changed, 81 insertions(+), 28 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 0bc435c..e1c7c0d 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -134,6 +134,7 @@ RUN mkdir -pv /usr/share/man/man1 \
     && rm -rf /var/lib/apt/lists/*
 
 ARG INSTALL_MYSQL_CLIENT="true"
+ARG INSTALL_MSSQL_CLIENT="true"
 ARG AIRFLOW_REPO=apache/airflow
 ARG AIRFLOW_BRANCH=main
 ARG AIRFLOW_EXTRAS
@@ -174,6 +175,7 @@ ARG AIRFLOW_SOURCES_FROM="empty"
 ARG AIRFLOW_SOURCES_TO="/empty"
 
 ENV INSTALL_MYSQL_CLIENT=${INSTALL_MYSQL_CLIENT} \
+    INSTALL_MSSQL_CLIENT=${INSTALL_MSSQL_CLIENT} \
     AIRFLOW_REPO=${AIRFLOW_REPO} \
     AIRFLOW_BRANCH=${AIRFLOW_BRANCH} \
     AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS}${ADDITIONAL_AIRFLOW_EXTRAS:+,}${ADDITIONAL_AIRFLOW_EXTRAS} \
@@ -192,7 +194,9 @@ ENV INSTALL_MYSQL_CLIENT=${INSTALL_MYSQL_CLIENT} \
     UPGRADE_TO_NEWER_DEPENDENCIES=${UPGRADE_TO_NEWER_DEPENDENCIES}
 
 COPY scripts/docker/*.sh /scripts/docker/
-RUN bash ./scripts/docker/install_mysql.sh dev
+RUN bash ./scripts/docker/install_mysql.sh dev \
+    && bash ./scripts/docker/install_mssql.sh
+ENV PATH=${PATH}:/opt/mssql-tools/bin
 
 COPY docker-context-files /docker-context-files
 
@@ -374,6 +378,7 @@ ARG RUNTIME_APT_COMMAND="echo"
 ARG ADDITIONAL_RUNTIME_APT_COMMAND=""
 ARG ADDITIONAL_RUNTIME_APT_ENV=""
 ARG INSTALL_MYSQL_CLIENT="true"
+ARG INSTALL_MSSQL_CLIENT="true"
 ARG AIRFLOW_USER_HOME_DIR=/home/airflow
 ARG AIRFLOW_HOME
 # Having the variable in final image allows to disable providers manager warnings when
@@ -391,6 +396,7 @@ ENV RUNTIME_APT_DEPS=${RUNTIME_APT_DEPS} \
     RUNTIME_APT_COMMAND=${RUNTIME_APT_COMMAND} \
     ADDITIONAL_RUNTIME_APT_COMMAND=${ADDITIONAL_RUNTIME_APT_COMMAND} \
     INSTALL_MYSQL_CLIENT=${INSTALL_MYSQL_CLIENT} \
+    INSTALL_MSSQL_CLIENT=${INSTALL_MSSQL_CLIENT} \
     AIRFLOW_UID=${AIRFLOW_UID} AIRFLOW_GID=${AIRFLOW_GID} \
     AIRFLOW__CORE__LOAD_EXAMPLES="false" \
     AIRFLOW_USER_HOME_DIR=${AIRFLOW_USER_HOME_DIR} \
@@ -418,12 +424,15 @@ RUN mkdir -pv /usr/share/man/man1 \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/*
 
-# Only copy install_mysql and install_pip_version.sh. We do not need any other scripts in the final image.
-COPY scripts/docker/install_mysql.sh scripts/docker/install_pip_version.sh /scripts/docker/
+# Only copy install_m(y/s)sql and install_pip_version.sh. We do not need any other scripts in the final image.
+COPY scripts/docker/install_mysql.sh /scripts/docker/install_mssql.sh scripts/docker/install_pip_version.sh \
+   /scripts/docker/
 
 # fix permission issue in Azure DevOps when running the scripts
 RUN chmod a+x /scripts/docker/install_mysql.sh && \
     /scripts/docker/install_mysql.sh prod && \
+    chmod a+x /scripts/docker/install_mssql.sh && \
+    /scripts/docker/install_mssql.sh && \
     addgroup --gid "${AIRFLOW_GID}" "airflow" && \
     adduser --quiet "airflow" --uid "${AIRFLOW_UID}" \
         --gid "${AIRFLOW_GID}" \
diff --git a/Dockerfile.ci b/Dockerfile.ci
index b96ab5d..c19e560 100644
--- a/Dockerfile.ci
+++ b/Dockerfile.ci
@@ -104,6 +104,7 @@ RUN mkdir -pv /usr/share/man/man1 \
 
 COPY scripts/docker/*.sh /scripts/docker/
 RUN bash /scripts/docker/install_mysql.sh dev \
+    && bash /scripts/docker/install_mssql.sh \
     && adduser airflow \
     && echo "airflow:airflow" | chpasswd \
     && echo "airflow ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/airflow \
@@ -178,17 +179,6 @@ RUN mkdir -pv /usr/share/man/man1 \
     && apt-get autoremove -yqq --purge \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/* \
-    && curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - \
-    && curl https://packages.microsoft.com/config/debian/9/prod.list > /etc/apt/sources.list.d/mssql-release.list \
-    && apt-get update -yqq \
-    && apt-get upgrade -yqq \
-    && ACCEPT_EULA=Y apt-get -yqq install -y --no-install-recommends \
-      gcc \
-      unixodbc-dev  \
-      g++ \
-      msodbcsql17 \
-      mssql-tools \
-    && rm -rf /var/lib/apt/lists/* \
     && curl https://download.docker.com/linux/static/stable/x86_64/docker-${DOCKER_CLI_VERSION}.tgz \
     |  tar -C /usr/bin --strip-components=1 -xvzf - docker/docker
 
@@ -262,12 +252,13 @@ ENV AIRFLOW_REPO=${AIRFLOW_REPO}\
     INSTALL_FROM_PYPI=${INSTALL_FROM_PYPI} \
     AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} \
 # In the CI image we always:
-# * install MySQL
+# * install MySQL, MsSQL
 # * install airflow from current sources, not from PyPI package
 # * install airflow without `--user` flag
 # * install airflow in editable mode
 # * install always current version of airflow
     INSTALL_MYSQL_CLIENT="true" \
+    INSTALL_MSSQL_CLIENT="true" \
     AIRFLOW_INSTALLATION_METHOD="." \
     AIRFLOW_INSTALL_USER_FLAG="" \
     AIRFLOW_INSTALL_EDITABLE_FLAG="--editable" \
diff --git a/breeze b/breeze
index e11f437..9a818de 100755
--- a/breeze
+++ b/breeze
@@ -1094,6 +1094,11 @@ function breeze::parse_arguments() {
             echo "Install MySQL client: ${INSTALL_MYSQL_CLIENT}"
             shift
             ;;
+        --disable-mssql-client-installation)
+            export INSTALL_MSSQL_CLIENT="false"
+            echo "Install MsSQL client: ${INSTALL_MSSQL_CLIENT}"
+            shift
+            ;;
         --constraints-location)
             export AIRFLOW_CONSTRAINTS_LOCATION="${2}"
             echo "Constraints location: ${AIRFLOW_CONSTRAINTS_LOCATION}"
diff --git a/docs/apache-airflow/howto/set-up-database.rst b/docs/apache-airflow/howto/set-up-database.rst
index 075e6c82..a77ff74 100644
--- a/docs/apache-airflow/howto/set-up-database.rst
+++ b/docs/apache-airflow/howto/set-up-database.rst
@@ -32,10 +32,10 @@ By default, Airflow uses **SQLite**, which is intended for development purposes
 
 Airflow supports the following database engine versions, so make sure which version you have. Old versions may not support all SQL statements.
 
-  * PostgreSQL:  9.6, 10, 11, 12, 13
-  * MySQL: 5.7, 8
-  * MsSQL: 2017, 2019
-  * SQLite: 3.15.0+
+* PostgreSQL:  9.6, 10, 11, 12, 13
+* MySQL: 5.7, 8
+* MsSQL: 2017, 2019
+* SQLite: 3.15.0+
 
 If you plan on running more than one scheduler, you have to meet additional requirements.
 For details, see :ref:`Scheduler HA Database Requirements <scheduler:ha:db_requirements>`.
@@ -261,6 +261,13 @@ You can read more about transaction isolation and snapshot features at
    GRANT ALL PRIVILEGES ON DATABASE airflow TO airflow_user;
 
 
+We recommend using the ``mssql+pyodbc`` driver and specifying it in your SqlAlchemy connection string.
+
+.. code-block:: text
+
+    mssql+pyodbc://<user>:<password>@<host>
+
+
 Other configuration options
 ---------------------------
 
diff --git a/docs/docker-stack/build-arg-ref.rst b/docs/docker-stack/build-arg-ref.rst
index 420ea94..d652ac3 100644
--- a/docs/docker-stack/build-arg-ref.rst
+++ b/docs/docker-stack/build-arg-ref.rst
@@ -124,6 +124,8 @@ for examples of using those arguments.
 |                                          |                                          | The mysql extra is removed from extras   |
 |                                          |                                          | if the client is not installed.          |
 +------------------------------------------+------------------------------------------+------------------------------------------+
+| ``INSTALL_MSSQL_CLIENT``                 | ``true``                                 | Whether MsSQL client should be installed |
++------------------------------------------+------------------------------------------+------------------------------------------+
 
 Installing Airflow using different methods
 ..........................................
diff --git a/docs/docker-stack/build.rst b/docs/docker-stack/build.rst
index d8dbb36..a4507e3 100644
--- a/docs/docker-stack/build.rst
+++ b/docs/docker-stack/build.rst
@@ -607,7 +607,9 @@ where you can build the image using the packages downloaded by passing those bui
 * ``AIRFLOW_PRE_CACHED_PIP_PACKAGES="false"``  - to not pre-cache packages from PyPI when building image
 * ``AIRFLOW_CONSTRAINTS_LOCATION=/docker-context-files/YOUR_CONSTRAINT_FILE.txt`` - to downloaded constraint files
 * (Optional) ``INSTALL_MYSQL_CLIENT="false"`` if you do not want to install ``MySQL``
-  client from the Oracle repositories. In this case also make sure that your
+  client from the Oracle repositories.
+* (Optional) ``INSTALL_MSSQL_CLIENT="false"`` if you do not want to install ``MsSQL``
+  client from the Microsoft repositories.
 
 Note, that the solution we have for installing python packages from local packages, only solves the problem
 of "air-gaped" python installation. The Docker image also downloads ``apt`` dependencies and ``node-modules``.
diff --git a/docs/docker-stack/docker-examples/restricted/restricted_environments.sh b/docs/docker-stack/docker-examples/restricted/restricted_environments.sh
index 4c96fb7..adb5091 100755
--- a/docs/docker-stack/docker-examples/restricted/restricted_environments.sh
+++ b/docs/docker-stack/docker-examples/restricted/restricted_environments.sh
@@ -41,6 +41,7 @@ docker build . \
     --build-arg AIRFLOW_INSTALLATION_METHOD="apache-airflow" \
     --build-arg AIRFLOW_VERSION="2.2.0.dev0" \
     --build-arg INSTALL_MYSQL_CLIENT="false" \
+    --build-arg INSTALL_MSSQL_CLIENT="false" \
     --build-arg AIRFLOW_PRE_CACHED_PIP_PACKAGES="false" \
     --build-arg INSTALL_FROM_DOCKER_CONTEXT_FILES="true" \
     --build-arg AIRFLOW_CONSTRAINTS_LOCATION="/docker-context-files/constraints-3.7.txt" \
diff --git a/scripts/ci/libraries/_build_images.sh b/scripts/ci/libraries/_build_images.sh
index 270d71b..157fb64 100644
--- a/scripts/ci/libraries/_build_images.sh
+++ b/scripts/ci/libraries/_build_images.sh
@@ -810,6 +810,7 @@ function build_images::build_prod_images() {
         "${EXTRA_DOCKER_PROD_BUILD_FLAGS[@]}" \
         --build-arg PYTHON_BASE_IMAGE="${AIRFLOW_PYTHON_BASE_IMAGE}" \
         --build-arg INSTALL_MYSQL_CLIENT="${INSTALL_MYSQL_CLIENT}" \
+        --build-arg INSTALL_MSSQL_CLIENT="${INSTALL_MSSQL_CLIENT}" \
         --build-arg AIRFLOW_VERSION="${AIRFLOW_VERSION}" \
         --build-arg AIRFLOW_BRANCH="${AIRFLOW_BRANCH_FOR_PYPI_PRELOADING}" \
         --build-arg AIRFLOW_EXTRAS="${AIRFLOW_EXTRAS}" \
@@ -845,6 +846,7 @@ function build_images::build_prod_images() {
         "${EXTRA_DOCKER_PROD_BUILD_FLAGS[@]}" \
         --build-arg PYTHON_BASE_IMAGE="${AIRFLOW_PYTHON_BASE_IMAGE}" \
         --build-arg INSTALL_MYSQL_CLIENT="${INSTALL_MYSQL_CLIENT}" \
+        --build-arg INSTALL_MSSQL_CLIENT="${INSTALL_MSSQL_CLIENT}" \
         --build-arg ADDITIONAL_AIRFLOW_EXTRAS="${ADDITIONAL_AIRFLOW_EXTRAS}" \
         --build-arg ADDITIONAL_PYTHON_DEPS="${ADDITIONAL_PYTHON_DEPS}" \
         --build-arg INSTALL_PROVIDERS_FROM_SOURCES="${INSTALL_PROVIDERS_FROM_SOURCES}" \
diff --git a/scripts/ci/libraries/_initialization.sh b/scripts/ci/libraries/_initialization.sh
index f9ad3c5..3a8aa3f 100644
--- a/scripts/ci/libraries/_initialization.sh
+++ b/scripts/ci/libraries/_initialization.sh
@@ -395,6 +395,8 @@ function initialization::initialize_image_build_variables() {
     export AIRFLOW_PRE_CACHED_PIP_PACKAGES="${AIRFLOW_PRE_CACHED_PIP_PACKAGES:="true"}"
     # by default install mysql client
     export INSTALL_MYSQL_CLIENT=${INSTALL_MYSQL_CLIENT:="true"}
+    # by default install mssql client
+    export INSTALL_MSSQL_CLIENT=${INSTALL_MSSQL_CLIENT:="true"}
     # additional tag for the image
     export IMAGE_TAG=${IMAGE_TAG:=""}
 
diff --git a/scripts/docker/common.sh b/scripts/docker/common.sh
index 5736ccb..d11715e 100755
--- a/scripts/docker/common.sh
+++ b/scripts/docker/common.sh
@@ -18,6 +18,7 @@
 set -euo pipefail
 
 test -v INSTALL_MYSQL_CLIENT
+test -v INSTALL_MSSQL_CLIENT
 test -v AIRFLOW_INSTALL_USER_FLAG
 test -v AIRFLOW_REPO
 test -v AIRFLOW_BRANCH
diff --git a/scripts/docker/install_mssql.sh b/scripts/docker/install_mssql.sh
new file mode 100755
index 0000000..b5f8b51
--- /dev/null
+++ b/scripts/docker/install_mssql.sh
@@ -0,0 +1,36 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+set -exuo pipefail
+function install_mssql_client() {
+    echo
+    echo Installing mssql client
+    echo
+    curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add -
+    curl https://packages.microsoft.com/config/debian/10/prod.list > /etc/apt/sources.list.d/mssql-release.list
+    apt-get update -yqq
+    apt-get upgrade -yqq
+    ACCEPT_EULA=Y apt-get -yqq install -y --no-install-recommends msodbcsql17 mssql-tools
+    rm -rf /var/lib/apt/lists/*
+    apt-get autoremove -yqq --purge
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+}
+
+# Install MsSQL client from Microsoft repositories
+if [[ ${INSTALL_MSSQL_CLIENT:="true"} == "true" ]]; then
+    install_mssql_client "${@}"
+fi
diff --git a/scripts/docker/install_mysql.sh b/scripts/docker/install_mysql.sh
index 534ed98..7983eb0 100755
--- a/scripts/docker/install_mysql.sh
+++ b/scripts/docker/install_mysql.sh
@@ -15,9 +15,7 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-set -euo pipefail
-
-set -x
+set -exuo pipefail
 declare -a packages
 
 MYSQL_VERSION="8.0"
@@ -63,11 +61,6 @@ install_mysql_client() {
     apt-get clean && rm -rf /var/lib/apt/lists/*
 }
 
-
-
-# Install MySQL Client during the container build
-set -euo pipefail
-
 # Install MySQL client from Oracle repositories (Debian installs mariadb)
 # But only if it is not disabled
 if [[ ${INSTALL_MYSQL_CLIENT:="true"} == "true" ]]; then
diff --git a/scripts/in_container/prod/entrypoint_prod.sh b/scripts/in_container/prod/entrypoint_prod.sh
index bcf57c0..bb0d6e6 100755
--- a/scripts/in_container/prod/entrypoint_prod.sh
+++ b/scripts/in_container/prod/entrypoint_prod.sh
@@ -99,6 +99,8 @@ function wait_for_connection {
             detected_port=5432
         elif [[ ${BACKEND} == "mysql"* ]]; then
             detected_port=3306
+        elif [[ ${BACKEND} == "mssql"* ]]; then
+            detected_port=1433
         elif [[ ${BACKEND} == "redis"* ]]; then
             detected_port=6379
         elif [[ ${BACKEND} == "amqp"* ]]; then