You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by bo...@apache.org on 2018/01/27 08:01:25 UTC

incubator-airflow git commit: [AIRFLOW-1895] Fix primary key integrity for mysql

Repository: incubator-airflow
Updated Branches:
  refs/heads/master 0565bdc4e -> a1d555177


[AIRFLOW-1895] Fix primary key integrity for mysql

sla_miss and task_instances cannot have NULL
execution_dates. The timezone
 migration scripts forgot to set this properly. In
addition to make sure
MySQL does not set "ON UPDATE CURRENT_TIMESTAMP"
or MariaDB "DEFAULT
0000-00-00 00:00:00" we now check if
explicit_defaults_for_timestamp is turned
on and otherwise fail an database upgrade.

Closes #2969, #2857

Closes #2979 from bolkedebruin/AIRFLOW-1895


Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/a1d55517
Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/a1d55517
Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/a1d55517

Branch: refs/heads/master
Commit: a1d5551777739a166da8c770262f90ed47802857
Parents: 0565bdc
Author: Bolke de Bruin <bo...@xs4all.nl>
Authored: Sat Jan 27 09:01:10 2018 +0100
Committer: Bolke de Bruin <bo...@xs4all.nl>
Committed: Sat Jan 27 09:01:10 2018 +0100

----------------------------------------------------------------------
 .travis.yml                                         |  8 +++++---
 UPDATING.md                                         |  7 ++++++-
 .../0e2a74e0fc9f_add_time_zone_awareness.py         | 10 ++++++++--
 docs/configuration.rst                              |  4 ++++
 scripts/ci/my.cnf                                   | 16 ++++++++++++++++
 5 files changed, 39 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/a1d55517/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index dec9181..f437c08 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -72,13 +72,13 @@ matrix:
       env: TOX_ENV=py35-backend_postgres
     - python: "2.7"
       env: TOX_ENV=flake8
-    - python: "3.5"  
+    - python: "3.5"
       env: TOX_ENV=py27-backend_postgres KUBERNETES_VERSION=v1.7.0
     - python: "3.5"
-      env: TOX_ENV=py27-backend_postgres KUBERNETES_VERSION=v1.8.0 
+      env: TOX_ENV=py27-backend_postgres KUBERNETES_VERSION=v1.8.0
   allow_failures:
     - env: TOX_ENV=py27-backend_postgres KUBERNETES_VERSION=v1.7.0
-    - env: TOX_ENV=py27-backend_postgres KUBERNETES_VERSION=v1.8.0  
+    - env: TOX_ENV=py27-backend_postgres KUBERNETES_VERSION=v1.8.0
 cache:
   directories:
     - $HOME/.wheelhouse/
@@ -94,7 +94,9 @@ install:
   - pip install tox
   - pip install codecov
 before_script:
+  - cat "$TRAVIS_BUILD_DIR/scripts/ci/my.cnf" | sudo tee -a /etc/mysql/my.cnf
   - mysql -e 'drop database if exists airflow; create database airflow' -u root
+  - sudo service mysql restart
   - psql -c 'create database airflow;' -U postgres
   - export PATH=${PATH}:/tmp/hive/bin
 script:

http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/a1d55517/UPDATING.md
----------------------------------------------------------------------
diff --git a/UPDATING.md b/UPDATING.md
index 7a801e5..e03243f 100644
--- a/UPDATING.md
+++ b/UPDATING.md
@@ -3,7 +3,12 @@
 This file documents any backwards-incompatible changes in Airflow and
 assists people when migrating to a new version.
 
-## Airflow 1.9.1
+## Airflow Master
+
+### MySQL setting required
+
+We now rely on more strict ANSI SQL settings for MySQL in order to have sane defaults. Make sure
+to have specified `explicit_defaults_for_timestamp=1` in your my.cnf under `[mysqld]`
 
 ### Celery config
 

http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/a1d55517/airflow/migrations/versions/0e2a74e0fc9f_add_time_zone_awareness.py
----------------------------------------------------------------------
diff --git a/airflow/migrations/versions/0e2a74e0fc9f_add_time_zone_awareness.py b/airflow/migrations/versions/0e2a74e0fc9f_add_time_zone_awareness.py
index bb65c1c..72b2163 100644
--- a/airflow/migrations/versions/0e2a74e0fc9f_add_time_zone_awareness.py
+++ b/airflow/migrations/versions/0e2a74e0fc9f_add_time_zone_awareness.py
@@ -1,3 +1,4 @@
+# flake8: noqa
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -34,6 +35,11 @@ def upgrade():
     conn = op.get_bind()
     if conn.dialect.name == 'mysql':
         conn.execute("SET time_zone = '+00:00'")
+        cur = conn.execute("SELECT @@explicit_defaults_for_timestamp")
+        res = cur.fetchall()
+        if res[0][0] == 0:
+            raise Exception("Global variable explicit_defaults_for_timestamp needs to be on (1) for mysql")
+
         op.alter_column(table_name='chart', column_name='last_modified', type_=mysql.TIMESTAMP(fsp=6))
 
         op.alter_column(table_name='dag', column_name='last_scheduler_run', type_=mysql.TIMESTAMP(fsp=6))
@@ -58,14 +64,14 @@ def upgrade():
         op.alter_column(table_name='log', column_name='dttm', type_=mysql.TIMESTAMP(fsp=6))
         op.alter_column(table_name='log', column_name='execution_date', type_=mysql.TIMESTAMP(fsp=6))
 
-        op.alter_column(table_name='sla_miss', column_name='execution_date', type_=mysql.TIMESTAMP(fsp=6))
+        op.alter_column(table_name='sla_miss', column_name='execution_date', type_=mysql.TIMESTAMP(fsp=6), nullable=False)
         op.alter_column(table_name='sla_miss', column_name='timestamp', type_=mysql.TIMESTAMP(fsp=6))
 
         op.alter_column(table_name='task_fail', column_name='execution_date', type_=mysql.TIMESTAMP(fsp=6))
         op.alter_column(table_name='task_fail', column_name='start_date', type_=mysql.TIMESTAMP(fsp=6))
         op.alter_column(table_name='task_fail', column_name='end_date', type_=mysql.TIMESTAMP(fsp=6))
 
-        op.alter_column(table_name='task_instance', column_name='execution_date', type_=mysql.TIMESTAMP(fsp=6))
+        op.alter_column(table_name='task_instance', column_name='execution_date', type_=mysql.TIMESTAMP(fsp=6), nullable=False)
         op.alter_column(table_name='task_instance', column_name='start_date', type_=mysql.TIMESTAMP(fsp=6))
         op.alter_column(table_name='task_instance', column_name='end_date', type_=mysql.TIMESTAMP(fsp=6))
         op.alter_column(table_name='task_instance', column_name='queued_dttm', type_=mysql.TIMESTAMP(fsp=6))

http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/a1d55517/docs/configuration.rst
----------------------------------------------------------------------
diff --git a/docs/configuration.rst b/docs/configuration.rst
index 61f5511..f5e81d6 100644
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@@ -51,6 +51,10 @@ As Airflow was built to interact with its metadata using the great SqlAlchemy
 library, you should be able to use any database backend supported as a
 SqlAlchemy backend. We recommend using **MySQL** or **Postgres**.
 
+.. note:: We rely on more strict ANSI SQL settings for MySQL in order to have 
+   sane defaults. Make sure to have specified `explicit_defaults_for_timestamp=1` 
+   in your my.cnf under `[mysqld]`
+
 .. note:: If you decide to use **Postgres**, we recommend using the ``psycopg2``
    driver and specifying it in your SqlAlchemy connection string.
    Also note that since SqlAlchemy does not expose a way to target a

http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/a1d55517/scripts/ci/my.cnf
----------------------------------------------------------------------
diff --git a/scripts/ci/my.cnf b/scripts/ci/my.cnf
new file mode 100644
index 0000000..c8e0602
--- /dev/null
+++ b/scripts/ci/my.cnf
@@ -0,0 +1,16 @@
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+[mysqld]
+explicit_defaults_for_timestamp=1