You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by po...@apache.org on 2021/09/20 09:17:10 UTC

[airflow] branch main updated: Fix random deadlocks in MSSQL database (#18362)

This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new f76eaec  Fix random deadlocks in MSSQL database (#18362)
f76eaec is described below

commit f76eaec65e834f9c8c3e4e0bec14bb63e76679b8
Author: Jarek Potiuk <ja...@potiuk.com>
AuthorDate: Mon Sep 20 11:16:44 2021 +0200

    Fix random deadlocks in MSSQL database (#18362)
    
    Default isolation level for the MSSQL database is READ_COMMITTED
    but its implementation in MSSQL might cause random deadlocks for
    select queries - because by default MSSQL uses shared locks
    for READ_COMMITTED transaction isolation that might cause
    deadlocks with the UPDATE statements which create exclusive
    locks.
    
    This change switches the MSSQL DB used during tests to have
    READ_COMMITTED_SNAPSHOTS enabled and it adds check at the Airflow
    startup to make sure that if the MSSQL database is used, the
    READ_COMMITTED_SNAPSHOTS is set and documentation is added to
    describe it.
    
    It also allows to revert removal of 2017 version of mssql which
    was done in the faith that removal of 2017 will remove the
    flaky random test failures.
    
    Also Fixes: #17018
---
 BREEZE.rst                                         |  4 +--
 README.md                                          |  2 +-
 airflow/settings.py                                | 30 +++++++++++++++++++++-
 breeze-complete                                    |  2 +-
 docs/apache-airflow/howto/set-up-database.rst      | 24 ++++++++++++++++-
 scripts/ci/docker-compose/backend-mssql.yml        | 17 ++++++++++--
 .../ci/docker-compose/mssql_create_airflow_db.sql  | 21 +++++++++++++++
 scripts/ci/libraries/_initialization.sh            |  2 +-
 tests/core/test_sqlalchemy_config.py               |  2 +-
 9 files changed, 94 insertions(+), 10 deletions(-)

diff --git a/BREEZE.rst b/BREEZE.rst
index 891454d..7408982 100644
--- a/BREEZE.rst
+++ b/BREEZE.rst
@@ -1834,7 +1834,7 @@ This is the current syntax for  `./breeze <./breeze>`_:
   --mssql-version MSSQL_VERSION
           MSSql version used. One of:
 
-                 2019-latest
+                 2017-latest 2019-latest
 
   -v, --verbose
           Show verbose information about executed docker, kind, kubectl, helm commands. Useful for
@@ -2308,7 +2308,7 @@ This is the current syntax for  `./breeze <./breeze>`_:
   --mssql-version MSSQL_VERSION
           MSSql version used. One of:
 
-                 2019-latest
+                 2017-latest 2019-latest
 
   ****************************************************************************************************
    Enable production image
diff --git a/README.md b/README.md
index cb18e88..872431c 100644
--- a/README.md
+++ b/README.md
@@ -89,7 +89,7 @@ Apache Airflow is tested with:
 | PostgreSQL           | 9.6, 10, 11, 12, 13       | 9.6, 10, 11, 12, 13      |
 | MySQL                | 5.7, 8                    | 5.7, 8                   |
 | SQLite               | 3.15.0+                   | 3.15.0+                  |
-| MSSQL(Experimental)  | 2019                      |                          |
+| MSSQL(Experimental)  | 2017, 2019                |                          |
 
 **Note**: MySQL 5.x versions are unable to or have limitations with
 running multiple schedulers -- please see the [Scheduler docs](https://airflow.apache.org/docs/apache-airflow/stable/scheduler.html).
diff --git a/airflow/settings.py b/airflow/settings.py
index 9a3ce28..bdc3ec0 100644
--- a/airflow/settings.py
+++ b/airflow/settings.py
@@ -25,6 +25,7 @@ import warnings
 from typing import Optional
 
 import pendulum
+import sqlalchemy
 from sqlalchemy import create_engine, exc
 from sqlalchemy.engine import Engine
 from sqlalchemy.orm import scoped_session, sessionmaker
@@ -234,6 +235,27 @@ def configure_orm(disable_connection_pool=False):
             expire_on_commit=False,
         )
     )
+    if engine.dialect.name == 'mssql':
+        session = Session()
+        try:
+            result = session.execute(
+                sqlalchemy.text(
+                    'SELECT is_read_committed_snapshot_on FROM sys.databases WHERE name=:database_name'
+                ),
+                params={"database_name": engine.url.database},
+            )
+            data = result.fetchone()[0]
+            if data != 1:
+                log.critical("MSSQL database MUST have READ_COMMITTED_SNAPSHOT enabled.")
+                log.critical(f"The database {engine.url.database} has it disabled.")
+                log.critical("This will cause random deadlocks, Refusing to start.")
+                log.critical(
+                    "See https://airflow.apache.org/docs/apache-airflow/stable/howto/"
+                    "set-up-database.html#setting-up-a-mssql-database"
+                )
+                raise Exception("MSSQL database MUST have READ_COMMITTED_SNAPSHOT enabled.")
+        finally:
+            session.close()
 
 
 def prepare_engine_args(disable_connection_pool=False):
@@ -292,7 +314,13 @@ def prepare_engine_args(disable_connection_pool=False):
     # 'READ COMMITTED' is the default value for PostgreSQL.
     # More information here:
     # https://dev.mysql.com/doc/refman/8.0/en/innodb-transaction-isolation-levels.html"
-    if SQL_ALCHEMY_CONN.startswith('mysql'):
+
+    # Similarly MSSQL default isolation level should be set to READ COMMITTED.
+    # We also make sure that READ_COMMITTED_SNAPSHOT option is on, in order to avoid deadlocks when
+    # Select queries are running. This is by default enforced during init/upgrade. More information:
+    # https://docs.microsoft.com/en-us/sql/t-sql/statements/set-transaction-isolation-level-transact-sql
+
+    if SQL_ALCHEMY_CONN.startswith(('mysql', 'mssql')):
         engine_args['isolation_level'] = 'READ COMMITTED'
 
     return engine_args
diff --git a/breeze-complete b/breeze-complete
index fb0eb96..b7e00b7 100644
--- a/breeze-complete
+++ b/breeze-complete
@@ -32,7 +32,7 @@ _breeze_allowed_kubernetes_versions="v1.20.2 v1.19.7 v1.18.15"
 _breeze_allowed_helm_versions="v3.6.3"
 _breeze_allowed_kind_versions="v0.11.1"
 _breeze_allowed_mysql_versions="5.7 8"
-_breeze_allowed_mssql_versions="2019-latest"
+_breeze_allowed_mssql_versions="2017-latest 2019-latest"
 _breeze_allowed_postgres_versions="9.6 10 11 12 13"
 _breeze_allowed_kind_operations="start stop restart status deploy test shell k9s"
 _breeze_allowed_executors="KubernetesExecutor CeleryExecutor LocalExecutor CeleryKubernetesExecutor"
diff --git a/docs/apache-airflow/howto/set-up-database.rst b/docs/apache-airflow/howto/set-up-database.rst
index 518cb53..b781277 100644
--- a/docs/apache-airflow/howto/set-up-database.rst
+++ b/docs/apache-airflow/howto/set-up-database.rst
@@ -27,13 +27,14 @@ The document below describes the database engine configurations, the necessary c
 Choosing database backend
 -------------------------
 
-If you want to take a real test drive of Airflow, you should consider setting up a database backend to **MySQL** and **PostgresSQL**.
+If you want to take a real test drive of Airflow, you should consider setting up a database backend to **MySQL**, **PostgresSQL** , **MsSQL**.
 By default, Airflow uses **SQLite**, which is intended for development purposes only.
 
 Airflow supports the following database engine versions, so make sure which version you have. Old versions may not support all SQL statements.
 
   * PostgreSQL:  9.6, 10, 11, 12, 13
   * MySQL: 5.7, 8
+  * MsSQL: 2017, 2019
   * SQLite: 3.15.0+
 
 If you plan on running more than one scheduler, you have to meet additional requirements.
@@ -226,6 +227,27 @@ For more information regarding setup of the PostgresSQL connection, see `Postgre
 
      hba
 
+Setting up a MsSQL Database
+---------------------------
+
+You need to create a database and a database user that Airflow will use to access this database.
+In the example below, a database ``airflow_db`` and user  with username ``airflow_user`` with password ``airflow_pass`` will be created.
+Note, that in case of MsSQL, Airflow uses ``READ COMMITTED`` transaction isolation and it must have
+``READ_COMMITTED_SNAPSHOT`` feature enabled, otherwise read transactions might generate deadlocks
+(especially in case of backfill). Airflow will refuse to use database that has the feature turned off.
+You can read more about transaction isolation and snapshot features at
+`Transaction isolation level <https://docs.microsoft.com/en-us/sql/t-sql/statements/set-transaction-isolation-level-transact-sql>`_
+
+.. code-block:: sql
+
+   CREATE DATABASE airflow;
+   ALTER DATABASE airflow SET READ_COMMITTED_SNAPSHOT ON;
+   CREATE LOGIN airflow_user WITH PASSWORD='airflow_pass123%';
+   USE airflow;
+   CREATE USER airflow_user FROM LOGIN airflow_user;
+   GRANT ALL PRIVILEGES ON DATABASE airflow TO airflow_user;
+
+
 Other configuration options
 ---------------------------
 
diff --git a/scripts/ci/docker-compose/backend-mssql.yml b/scripts/ci/docker-compose/backend-mssql.yml
index 06648f4..71880ed 100644
--- a/scripts/ci/docker-compose/backend-mssql.yml
+++ b/scripts/ci/docker-compose/backend-mssql.yml
@@ -20,12 +20,14 @@ services:
   airflow:
     environment:
       - BACKEND=mssql
-      - AIRFLOW__CORE__SQL_ALCHEMY_CONN=mssql+pyodbc://sa:Airflow123@mssql:1433/master?driver=ODBC+Driver+17+for+SQL+Server
-      - AIRFLOW__CELERY__RESULT_BACKEND=db+mssql+pyodbc://sa:Airflow123@mssql:1433/master?driver=ODBC+Driver+17+for+SQL+Server
+      - AIRFLOW__CORE__SQL_ALCHEMY_CONN=mssql+pyodbc://sa:Airflow123@mssql:1433/airflow?driver=ODBC+Driver+17+for+SQL+Server
+      - AIRFLOW__CELERY__RESULT_BACKEND=db+mssql+pyodbc://sa:Airflow123@mssql:1433/airflow?driver=ODBC+Driver+17+for+SQL+Server
       - AIRFLOW__CORE__EXECUTOR=LocalExecutor
     depends_on:
       mssql:
         condition: service_healthy
+      mssqlsetup:
+        condition: service_completed_successfully
   mssql:
     image: mcr.microsoft.com/mssql/server:${MSSQL_VERSION}
     environment:
@@ -38,3 +40,14 @@ services:
       timeout: 10s
       retries: 10
     restart: always
+  mssqlsetup:
+    image: mcr.microsoft.com/mssql/server:${MSSQL_VERSION}
+    depends_on:
+      mssql:
+        condition: service_healthy
+    entrypoint:
+      - bash
+      - -c
+      - opt/mssql-tools/bin/sqlcmd -S mssql -U sa -P Airflow123 -i /mssql_create_airflow_db.sql || true
+    volumes:
+      - ./mssql_create_airflow_db.sql:/mssql_create_airflow_db.sql:ro
diff --git a/scripts/ci/docker-compose/mssql_create_airflow_db.sql b/scripts/ci/docker-compose/mssql_create_airflow_db.sql
new file mode 100644
index 0000000..39ebe48
--- /dev/null
+++ b/scripts/ci/docker-compose/mssql_create_airflow_db.sql
@@ -0,0 +1,21 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied.  See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+CREATE DATABASE airflow;
+ALTER DATABASE airflow SET READ_COMMITTED_SNAPSHOT ON;
diff --git a/scripts/ci/libraries/_initialization.sh b/scripts/ci/libraries/_initialization.sh
index 569ac20..f9ad3c5 100644
--- a/scripts/ci/libraries/_initialization.sh
+++ b/scripts/ci/libraries/_initialization.sh
@@ -123,7 +123,7 @@ function initialization::initialize_base_variables() {
     export CURRENT_MYSQL_VERSIONS
 
     # Currently supported versions of MSSQL
-    CURRENT_MSSQL_VERSIONS+=("2019-latest")
+    CURRENT_MSSQL_VERSIONS+=("2017-latest" "2019-latest")
     export CURRENT_MSSQL_VERSIONS
 
     BACKEND=${BACKEND:="sqlite"}
diff --git a/tests/core/test_sqlalchemy_config.py b/tests/core/test_sqlalchemy_config.py
index de62141..39b45ec 100644
--- a/tests/core/test_sqlalchemy_config.py
+++ b/tests/core/test_sqlalchemy_config.py
@@ -77,7 +77,7 @@ class TestSqlAlchemySettings(unittest.TestCase):
         with conf_vars(config):
             settings.configure_orm()
             engine_args = {}
-            if settings.SQL_ALCHEMY_CONN.startswith('mysql'):
+            if settings.SQL_ALCHEMY_CONN.startswith(('mysql', 'mssql')):
                 engine_args['isolation_level'] = 'READ COMMITTED'
             mock_create_engine.assert_called_once_with(
                 settings.SQL_ALCHEMY_CONN,