You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by as...@apache.org on 2019/11/27 19:10:14 UTC
[airflow-site] 01/30: Initial commit
This is an automated email from the ASF dual-hosted git repository.
ash pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/airflow-site.git
commit 9e19165cac0531b3f00dff4281c9910620b824c6
Author: Maxime Beauchemin <ma...@gmail.com>
AuthorDate: Fri Jun 3 11:44:05 2016 -0700
Initial commit
---
_images/adhoc.png | Bin 0 -> 182773 bytes
_images/airflow.gif | Bin 0 -> 622963 bytes
_images/apache.jpg | Bin 0 -> 43364 bytes
_images/branch_bad.png | Bin 0 -> 8825 bytes
_images/branch_good.png | Bin 0 -> 12035 bytes
_images/chart.png | Bin 0 -> 169382 bytes
_images/chart_form.png | Bin 0 -> 203224 bytes
_images/code.png | Bin 0 -> 218215 bytes
_images/connections.png | Bin 0 -> 93057 bytes
_images/context.png | Bin 0 -> 238360 bytes
_images/dags.png | Bin 0 -> 115068 bytes
_images/duration.png | Bin 0 -> 254497 bytes
_images/gantt.png | Bin 0 -> 213845 bytes
_images/graph.png | Bin 0 -> 333294 bytes
_images/incubator.jpg | Bin 0 -> 91227 bytes
_images/pin_large.png | Bin 0 -> 358276 bytes
_images/subdag_after.png | Bin 0 -> 30245 bytes
_images/subdag_before.png | Bin 0 -> 70382 bytes
_images/subdag_zoom.png | Bin 0 -> 150185 bytes
_images/tree.png | Bin 0 -> 163147 bytes
_images/variable_hidden.png | Bin 0 -> 154299 bytes
_modules/S3_hook.html | 604 ++
.../contrib/operators/hipchat_operator.html | 330 +
_modules/airflow/executors/celery_executor.html | 298 +
_modules/airflow/executors/local_executor.html | 276 +
.../airflow/executors/sequential_executor.html | 238 +
_modules/airflow/macros.html | 255 +
_modules/airflow/macros/hive.html | 298 +
_modules/airflow/models.html | 3802 +++++++
_modules/airflow/operators/docker_operator.html | 383 +
_modules/airflow/operators/sensors.html | 721 ++
_modules/bash_operator.html | 290 +
_modules/cloudant_hook.html | 274 +
_modules/dagrun_operator.html | 260 +
_modules/dbapi_hook.html | 426 +
_modules/druid_hook.html | 369 +
_modules/dummy_operator.html | 219 +
_modules/email_operator.html | 240 +
_modules/ftp_hook.html | 427 +
_modules/gcs_hook.html | 296 +
_modules/generic_transfer.html | 264 +
_modules/hive_hooks.html | 743 ++
_modules/hive_operator.html | 272 +
_modules/hive_to_druid.html | 316 +
_modules/hive_to_mysql.html | 294 +
_modules/hive_to_samba_operator.html | 246 +
_modules/http_hook.html | 310 +
_modules/http_operator.html | 265 +
_modules/index.html | 245 +
_modules/mssql_hook.html | 228 +
_modules/mssql_operator.html | 234 +
_modules/mssql_to_hive.html | 312 +
_modules/mysql_hook.html | 267 +
_modules/mysql_operator.html | 240 +
_modules/mysql_to_hive.html | 316 +
_modules/postgres_hook.html | 236 +
_modules/postgres_operator.html | 239 +
_modules/presto_check_operator.html | 303 +
_modules/presto_hook.html | 298 +
_modules/python_operator.html | 338 +
_modules/s3_to_hive_operator.html | 353 +
_modules/sensors.html | 721 ++
_modules/slack_operator.html | 304 +
_modules/sqlite_hook.html | 222 +
_modules/ssh_execute_operator.html | 343 +
_modules/ssh_hook.html | 353 +
_modules/vertica_hook.html | 247 +
_modules/vertica_operator.html | 233 +
_modules/vertica_to_hive.html | 316 +
_modules/webhdfs_hook.html | 287 +
_sources/cli.txt | 11 +
_sources/code.txt | 243 +
_sources/concepts.txt | 758 ++
_sources/configuration.txt | 230 +
_sources/faq.txt | 100 +
_sources/index.txt | 75 +
_sources/installation.txt | 90 +
_sources/license.txt | 211 +
_sources/plugins.txt | 139 +
_sources/profiling.txt | 39 +
_sources/project.txt | 58 +
_sources/scheduler.txt | 101 +
_sources/security.txt | 249 +
_sources/start.txt | 49 +
_sources/tutorial.txt | 429 +
_sources/ui.txt | 102 +
_static/ajax-loader.gif | Bin 0 -> 673 bytes
_static/apache.jpg | Bin 0 -> 43364 bytes
_static/basic.css | 608 ++
_static/comment-bright.png | Bin 0 -> 3500 bytes
_static/comment-close.png | Bin 0 -> 3578 bytes
_static/comment.png | Bin 0 -> 3445 bytes
_static/css/badge_only.css | 2 +
_static/css/theme.css | 5 +
_static/doctools.js | 287 +
_static/down-pressed.png | Bin 0 -> 347 bytes
_static/down.png | Bin 0 -> 347 bytes
_static/file.png | Bin 0 -> 358 bytes
_static/fonts/Inconsolata-Bold.ttf | Bin 0 -> 66352 bytes
_static/fonts/Inconsolata-Regular.ttf | Bin 0 -> 84548 bytes
_static/fonts/Lato-Bold.ttf | Bin 0 -> 121788 bytes
_static/fonts/Lato-Regular.ttf | Bin 0 -> 120196 bytes
_static/fonts/RobotoSlab-Bold.ttf | Bin 0 -> 170616 bytes
_static/fonts/RobotoSlab-Regular.ttf | Bin 0 -> 169064 bytes
_static/fonts/fontawesome-webfont.eot | Bin 0 -> 56006 bytes
_static/fonts/fontawesome-webfont.svg | 520 +
_static/fonts/fontawesome-webfont.ttf | Bin 0 -> 112160 bytes
_static/fonts/fontawesome-webfont.woff | Bin 0 -> 65452 bytes
_static/incubator.jpg | Bin 0 -> 91227 bytes
_static/jquery-1.11.1.js | 10308 +++++++++++++++++++
_static/jquery.js | 4 +
_static/js/modernizr.min.js | 4 +
_static/js/theme.js | 153 +
_static/minus.png | Bin 0 -> 173 bytes
_static/plus.png | Bin 0 -> 173 bytes
_static/pygments.css | 65 +
_static/searchtools.js | 651 ++
_static/underscore-1.3.1.js | 999 ++
_static/underscore.js | 31 +
_static/up-pressed.png | Bin 0 -> 345 bytes
_static/up.png | Bin 0 -> 345 bytes
_static/websupport.js | 808 ++
cli.html | 1035 ++
code.html | 3517 +++++++
concepts.html | 897 ++
configuration.html | 419 +
faq.html | 293 +
genindex.html | 1258 +++
index.html | 417 +
installation.html | 358 +
license.html | 418 +
objects.inv | Bin 0 -> 2103 bytes
plugins.html | 343 +
profiling.html | 250 +
project.html | 268 +
py-modindex.html | 262 +
scheduler.html | 328 +
search.html | 214 +
searchindex.js | 1 +
security.html | 436 +
start.html | 256 +
tutorial.html | 622 ++
ui.html | 296 +
143 files changed, 48568 insertions(+)
diff --git a/_images/adhoc.png b/_images/adhoc.png
new file mode 100644
index 0000000..77ea780
Binary files /dev/null and b/_images/adhoc.png differ
diff --git a/_images/airflow.gif b/_images/airflow.gif
new file mode 100644
index 0000000..1889b86
Binary files /dev/null and b/_images/airflow.gif differ
diff --git a/_images/apache.jpg b/_images/apache.jpg
new file mode 100644
index 0000000..312251f
Binary files /dev/null and b/_images/apache.jpg differ
diff --git a/_images/branch_bad.png b/_images/branch_bad.png
new file mode 100644
index 0000000..586844f
Binary files /dev/null and b/_images/branch_bad.png differ
diff --git a/_images/branch_good.png b/_images/branch_good.png
new file mode 100644
index 0000000..fbd4650
Binary files /dev/null and b/_images/branch_good.png differ
diff --git a/_images/chart.png b/_images/chart.png
new file mode 100644
index 0000000..bfca26b
Binary files /dev/null and b/_images/chart.png differ
diff --git a/_images/chart_form.png b/_images/chart_form.png
new file mode 100644
index 0000000..f73daf5
Binary files /dev/null and b/_images/chart_form.png differ
diff --git a/_images/code.png b/_images/code.png
new file mode 100644
index 0000000..ac49291
Binary files /dev/null and b/_images/code.png differ
diff --git a/_images/connections.png b/_images/connections.png
new file mode 100644
index 0000000..d07a130
Binary files /dev/null and b/_images/connections.png differ
diff --git a/_images/context.png b/_images/context.png
new file mode 100644
index 0000000..de75e48
Binary files /dev/null and b/_images/context.png differ
diff --git a/_images/dags.png b/_images/dags.png
new file mode 100644
index 0000000..a551f02
Binary files /dev/null and b/_images/dags.png differ
diff --git a/_images/duration.png b/_images/duration.png
new file mode 100644
index 0000000..18d723c
Binary files /dev/null and b/_images/duration.png differ
diff --git a/_images/gantt.png b/_images/gantt.png
new file mode 100644
index 0000000..c462adb
Binary files /dev/null and b/_images/gantt.png differ
diff --git a/_images/graph.png b/_images/graph.png
new file mode 100644
index 0000000..cbc58e6
Binary files /dev/null and b/_images/graph.png differ
diff --git a/_images/incubator.jpg b/_images/incubator.jpg
new file mode 100644
index 0000000..6f34a85
Binary files /dev/null and b/_images/incubator.jpg differ
diff --git a/_images/pin_large.png b/_images/pin_large.png
new file mode 100644
index 0000000..986c88b
Binary files /dev/null and b/_images/pin_large.png differ
diff --git a/_images/subdag_after.png b/_images/subdag_after.png
new file mode 100644
index 0000000..166a6de
Binary files /dev/null and b/_images/subdag_after.png differ
diff --git a/_images/subdag_before.png b/_images/subdag_before.png
new file mode 100644
index 0000000..ebc3e58
Binary files /dev/null and b/_images/subdag_before.png differ
diff --git a/_images/subdag_zoom.png b/_images/subdag_zoom.png
new file mode 100644
index 0000000..08fcf5c
Binary files /dev/null and b/_images/subdag_zoom.png differ
diff --git a/_images/tree.png b/_images/tree.png
new file mode 100644
index 0000000..f3796b0
Binary files /dev/null and b/_images/tree.png differ
diff --git a/_images/variable_hidden.png b/_images/variable_hidden.png
new file mode 100644
index 0000000..e081ca3
Binary files /dev/null and b/_images/variable_hidden.png differ
diff --git a/_modules/S3_hook.html b/_modules/S3_hook.html
new file mode 100644
index 0000000..e18ec5f
--- /dev/null
+++ b/_modules/S3_hook.html
@@ -0,0 +1,604 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+ <meta charset="utf-8">
+
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+ <title>S3_hook — Airflow Documentation</title>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
+
+
+
+
+
+ <link rel="top" title="Airflow Documentation" href="../index.html"/>
+ <link rel="up" title="Module code" href="index.html"/>
+
+
+ <script src="../_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+ <div class="wy-grid-for-nav">
+
+
+ <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+ <div class="wy-side-scroll">
+ <div class="wy-side-nav-search">
+
+
+
+ <a href="../index.html" class="icon icon-home"> Airflow
+
+
+
+ </a>
+
+
+
+
+
+
+
+<div role="search">
+ <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
+ <input type="text" name="q" placeholder="Search docs" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+</div>
+
+
+ </div>
+
+ <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+
+
+
+ <ul>
+<li class="toctree-l1"><a class="reference internal" href="../project.html">Project</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../license.html">License</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../start.html">Quick Start</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../tutorial.html">Tutorial</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../configuration.html">Configuration</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../ui.html">UI / Screenshots</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../concepts.html">Concepts</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../profiling.html">Data Profiling</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../cli.html">Command Line Interface</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../scheduler.html">Scheduling & Triggers</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../plugins.html">Plugins</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../security.html">Security</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../faq.html">FAQ</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../code.html">API Reference</a></li>
+</ul>
+
+
+
+ </div>
+ </div>
+ </nav>
+
+ <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+
+ <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+ <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+ <a href="../index.html">Airflow</a>
+ </nav>
+
+
+
+ <div class="wy-nav-content">
+ <div class="rst-content">
+
+
+
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+ <ul class="wy-breadcrumbs">
+ <li><a href="../index.html">Docs</a> »</li>
+
+ <li><a href="index.html">Module code</a> »</li>
+
+ <li>S3_hook</li>
+ <li class="wy-breadcrumbs-aside">
+
+
+
+ </li>
+ </ul>
+ <hr/>
+</div>
+ <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+ <div itemprop="articleBody">
+
+ <h1>Source code for S3_hook</h1><div class="highlight"><pre>
+<span></span><span class="c1"># -*- coding: utf-8 -*-</span>
+<span class="c1">#</span>
+<span class="c1"># Licensed under the Apache License, Version 2.0 (the "License");</span>
+<span class="c1"># you may not use this file except in compliance with the License.</span>
+<span class="c1"># You may obtain a copy of the License at</span>
+<span class="c1">#</span>
+<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
+<span class="c1">#</span>
+<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
+<span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span>
+<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
+<span class="c1"># See the License for the specific language governing permissions and</span>
+<span class="c1"># limitations under the License.</span>
+
+<span class="kn">from</span> <span class="nn">future</span> <span class="kn">import</span> <span class="n">standard_library</span>
+<span class="n">standard_library</span><span class="o">.</span><span class="n">install_aliases</span><span class="p">()</span>
+<span class="kn">import</span> <span class="nn">logging</span>
+<span class="kn">import</span> <span class="nn">re</span>
+<span class="kn">import</span> <span class="nn">fnmatch</span>
+<span class="kn">import</span> <span class="nn">configparser</span>
+<span class="kn">import</span> <span class="nn">math</span>
+<span class="kn">import</span> <span class="nn">os</span>
+<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlparse</span>
+<span class="kn">import</span> <span class="nn">warnings</span>
+
+<span class="kn">import</span> <span class="nn">boto</span>
+<span class="kn">from</span> <span class="nn">boto.s3.connection</span> <span class="kn">import</span> <span class="n">S3Connection</span>
+<span class="kn">from</span> <span class="nn">boto.sts</span> <span class="kn">import</span> <span class="n">STSConnection</span>
+<span class="n">boto</span><span class="o">.</span><span class="n">set_stream_logger</span><span class="p">(</span><span class="s1">'boto'</span><span class="p">)</span>
+<span class="n">logging</span><span class="o">.</span><span class="n">getLogger</span><span class="p">(</span><span class="s2">"boto"</span><span class="p">)</span><span class="o">.</span><span class="n">setLevel</span><span class="p">(</span><span class="n">logging</span><span class="o">.</span><span class="n">INFO</span><span class="p">)</span>
+
+<span class="kn">from</span> <span class="nn">airflow.exceptions</span> <span class="kn">import</span> <span class="n">AirflowException</span>
+<span class="kn">from</span> <span class="nn">airflow.hooks.base_hook</span> <span class="kn">import</span> <span class="n">BaseHook</span>
+
+
+<span class="k">def</span> <span class="nf">_parse_s3_config</span><span class="p">(</span><span class="n">config_file_name</span><span class="p">,</span> <span class="n">config_format</span><span class="o">=</span><span class="s1">'boto'</span><span class="p">,</span> <span class="n">profile</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Parses a config file for s3 credentials. Can currently</span>
+<span class="sd"> parse boto, s3cmd.conf and AWS SDK config formats</span>
+
+<span class="sd"> :param config_file_name: path to the config file</span>
+<span class="sd"> :type config_file_name: str</span>
+<span class="sd"> :param config_format: config type. One of "boto", "s3cmd" or "aws".</span>
+<span class="sd"> Defaults to "boto"</span>
+<span class="sd"> :type config_format: str</span>
+<span class="sd"> :param profile: profile name in AWS type config file</span>
+<span class="sd"> :type profile: str</span>
+<span class="sd"> """</span>
+ <span class="n">Config</span> <span class="o">=</span> <span class="n">configparser</span><span class="o">.</span><span class="n">ConfigParser</span><span class="p">()</span>
+ <span class="k">if</span> <span class="n">Config</span><span class="o">.</span><span class="n">read</span><span class="p">(</span><span class="n">config_file_name</span><span class="p">):</span> <span class="c1"># pragma: no cover</span>
+ <span class="n">sections</span> <span class="o">=</span> <span class="n">Config</span><span class="o">.</span><span class="n">sections</span><span class="p">()</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span><span class="s2">"Couldn't read {0}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">config_file_name</span><span class="p">))</span>
+ <span class="c1"># Setting option names depending on file format</span>
+ <span class="k">if</span> <span class="n">config_format</span> <span class="ow">is</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="n">config_format</span> <span class="o">=</span> <span class="s1">'boto'</span>
+ <span class="n">conf_format</span> <span class="o">=</span> <span class="n">config_format</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span>
+ <span class="k">if</span> <span class="n">conf_format</span> <span class="o">==</span> <span class="s1">'boto'</span><span class="p">:</span> <span class="c1"># pragma: no cover</span>
+ <span class="k">if</span> <span class="n">profile</span> <span class="ow">is</span> <span class="ow">not</span> <span class="bp">None</span> <span class="ow">and</span> <span class="s1">'profile '</span> <span class="o">+</span> <span class="n">profile</span> <span class="ow">in</span> <span class="n">sections</span><span class="p">:</span>
+ <span class="n">cred_section</span> <span class="o">=</span> <span class="s1">'profile '</span> <span class="o">+</span> <span class="n">profile</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">cred_section</span> <span class="o">=</span> <span class="s1">'Credentials'</span>
+ <span class="k">elif</span> <span class="n">conf_format</span> <span class="o">==</span> <span class="s1">'aws'</span> <span class="ow">and</span> <span class="n">profile</span> <span class="ow">is</span> <span class="ow">not</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="n">cred_section</span> <span class="o">=</span> <span class="n">profile</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">cred_section</span> <span class="o">=</span> <span class="s1">'default'</span>
+ <span class="c1"># Option names</span>
+ <span class="k">if</span> <span class="n">conf_format</span> <span class="ow">in</span> <span class="p">(</span><span class="s1">'boto'</span><span class="p">,</span> <span class="s1">'aws'</span><span class="p">):</span> <span class="c1"># pragma: no cover</span>
+ <span class="n">key_id_option</span> <span class="o">=</span> <span class="s1">'aws_access_key_id'</span>
+ <span class="n">secret_key_option</span> <span class="o">=</span> <span class="s1">'aws_secret_access_key'</span>
+ <span class="c1"># security_token_option = 'aws_security_token'</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">key_id_option</span> <span class="o">=</span> <span class="s1">'access_key'</span>
+ <span class="n">secret_key_option</span> <span class="o">=</span> <span class="s1">'secret_key'</span>
+ <span class="c1"># Actual Parsing</span>
+ <span class="k">if</span> <span class="n">cred_section</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">sections</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span><span class="s2">"This config file format is not recognized"</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="n">access_key</span> <span class="o">=</span> <span class="n">Config</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">cred_section</span><span class="p">,</span> <span class="n">key_id_option</span><span class="p">)</span>
+ <span class="n">secret_key</span> <span class="o">=</span> <span class="n">Config</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">cred_section</span><span class="p">,</span> <span class="n">secret_key_option</span><span class="p">)</span>
+ <span class="n">calling_format</span> <span class="o">=</span> <span class="bp">None</span>
+ <span class="k">if</span> <span class="n">Config</span><span class="o">.</span><span class="n">has_option</span><span class="p">(</span><span class="n">cred_section</span><span class="p">,</span> <span class="s1">'calling_format'</span><span class="p">):</span>
+ <span class="n">calling_format</span> <span class="o">=</span> <span class="n">Config</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">cred_section</span><span class="p">,</span> <span class="s1">'calling_format'</span><span class="p">)</span>
+ <span class="k">except</span><span class="p">:</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s2">"Option Error in parsing s3 config file"</span><span class="p">)</span>
+ <span class="k">raise</span>
+ <span class="k">return</span> <span class="p">(</span><span class="n">access_key</span><span class="p">,</span> <span class="n">secret_key</span><span class="p">,</span> <span class="n">calling_format</span><span class="p">)</span>
+
+
+<div class="viewcode-block" id="S3Hook"><a class="viewcode-back" href="../code.html#airflow.hooks.S3Hook">[docs]</a><span class="k">class</span> <span class="nc">S3Hook</span><span class="p">(</span><span class="n">BaseHook</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Interact with S3. This class is a wrapper around the boto library.</span>
+<span class="sd"> """</span>
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span>
+ <span class="n">s3_conn_id</span><span class="o">=</span><span class="s1">'s3_default'</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">s3_conn_id</span> <span class="o">=</span> <span class="n">s3_conn_id</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">s3_conn</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_connection</span><span class="p">(</span><span class="n">s3_conn_id</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">extra_params</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">s3_conn</span><span class="o">.</span><span class="n">extra_dejson</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">profile</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">extra_params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'profile'</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">calling_format</span> <span class="o">=</span> <span class="bp">None</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_creds_in_conn</span> <span class="o">=</span> <span class="s1">'aws_secret_access_key'</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">extra_params</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_creds_in_config_file</span> <span class="o">=</span> <span class="s1">'s3_config_file'</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">extra_params</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_default_to_boto</span> <span class="o">=</span> <span class="bp">False</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_creds_in_conn</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_a_key</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">extra_params</span><span class="p">[</span><span class="s1">'aws_access_key_id'</span><span class="p">]</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_s_key</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">extra_params</span><span class="p">[</span><span class="s1">'aws_secret_access_key'</span><span class="p">]</span>
+ <span class="k">if</span> <span class="s1">'calling_format'</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">extra_params</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">calling_format</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">extra_params</span><span class="p">[</span><span class="s1">'calling_format'</span><span class="p">]</span>
+ <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">_creds_in_config_file</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">s3_config_file</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">extra_params</span><span class="p">[</span><span class="s1">'s3_config_file'</span><span class="p">]</span>
+ <span class="c1"># The format can be None and will default to boto in the parser</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">s3_config_format</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">extra_params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'s3_config_format'</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_default_to_boto</span> <span class="o">=</span> <span class="bp">True</span>
+ <span class="c1"># STS support for cross account resource access</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_sts_conn_required</span> <span class="o">=</span> <span class="p">(</span><span class="s1">'aws_account_id'</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">extra_params</span> <span class="ow">or</span>
+ <span class="s1">'role_arn'</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">extra_params</span><span class="p">)</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_sts_conn_required</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">role_arn</span> <span class="o">=</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">extra_params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'role_arn'</span><span class="p">)</span> <span class="ow">or</span>
+ <span class="s2">"arn:aws:iam::"</span> <span class="o">+</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">extra_params</span><span class="p">[</span><span class="s1">'aws_account_id'</span><span class="p">]</span> <span class="o">+</span>
+ <span class="s2">":role/"</span> <span class="o">+</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">extra_params</span><span class="p">[</span><span class="s1">'aws_iam_role'</span><span class="p">])</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">connection</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span>
+
+ <span class="k">def</span> <span class="nf">__getstate__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="n">pickled_dict</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">__dict__</span><span class="p">)</span>
+ <span class="k">del</span> <span class="n">pickled_dict</span><span class="p">[</span><span class="s1">'connection'</span><span class="p">]</span>
+ <span class="k">return</span> <span class="n">pickled_dict</span>
+
+ <span class="k">def</span> <span class="nf">__setstate__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">d</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">__dict__</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">d</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">__dict__</span><span class="p">[</span><span class="s1">'connection'</span><span class="p">]</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span>
+
+ <span class="k">def</span> <span class="nf">_parse_s3_url</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">s3url</span><span class="p">):</span>
+ <span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span>
+ <span class="s1">'Please note: S3Hook._parse_s3_url() is now '</span>
+ <span class="s1">'S3Hook.parse_s3_url() (no leading underscore).'</span><span class="p">,</span>
+ <span class="ne">DeprecationWarning</span><span class="p">)</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_s3_url</span><span class="p">(</span><span class="n">s3url</span><span class="p">)</span>
+
+ <span class="nd">@staticmethod</span>
+ <span class="k">def</span> <span class="nf">parse_s3_url</span><span class="p">(</span><span class="n">s3url</span><span class="p">):</span>
+ <span class="n">parsed_url</span> <span class="o">=</span> <span class="n">urlparse</span><span class="p">(</span><span class="n">s3url</span><span class="p">)</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">netloc</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span><span class="s1">'Please provide a bucket_name'</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">bucket_name</span> <span class="o">=</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">netloc</span>
+ <span class="n">key</span> <span class="o">=</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">strip</span><span class="p">(</span><span class="s1">'/'</span><span class="p">)</span>
+ <span class="k">return</span> <span class="p">(</span><span class="n">bucket_name</span><span class="p">,</span> <span class="n">key</span><span class="p">)</span>
+
+<div class="viewcode-block" id="S3Hook.get_conn"><a class="viewcode-back" href="../code.html#airflow.hooks.S3Hook.get_conn">[docs]</a> <span class="k">def</span> <span class="nf">get_conn</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Returns the boto S3Connection object.</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_default_to_boto</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">S3Connection</span><span class="p">(</span><span class="n">profile_name</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">profile</span><span class="p">)</span>
+ <span class="n">a_key</span> <span class="o">=</span> <span class="n">s_key</span> <span class="o">=</span> <span class="bp">None</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_creds_in_config_file</span><span class="p">:</span>
+ <span class="n">a_key</span><span class="p">,</span> <span class="n">s_key</span><span class="p">,</span> <span class="n">calling_format</span> <span class="o">=</span> <span class="n">_parse_s3_config</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">s3_config_file</span><span class="p">,</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">s3_config_format</span><span class="p">,</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">profile</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">_creds_in_conn</span><span class="p">:</span>
+ <span class="n">a_key</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_a_key</span>
+ <span class="n">s_key</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_s_key</span>
+ <span class="n">calling_format</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">calling_format</span>
+
+ <span class="k">if</span> <span class="n">calling_format</span> <span class="ow">is</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="n">calling_format</span> <span class="o">=</span> <span class="s1">'boto.s3.connection.SubdomainCallingFormat'</span>
+
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_sts_conn_required</span><span class="p">:</span>
+ <span class="n">sts_connection</span> <span class="o">=</span> <span class="n">STSConnection</span><span class="p">(</span><span class="n">aws_access_key_id</span><span class="o">=</span><span class="n">a_key</span><span class="p">,</span>
+ <span class="n">aws_secret_access_key</span><span class="o">=</span><span class="n">s_key</span><span class="p">,</span>
+ <span class="n">profile_name</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">profile</span><span class="p">)</span>
+ <span class="n">assumed_role_object</span> <span class="o">=</span> <span class="n">sts_connection</span><span class="o">.</span><span class="n">assume_role</span><span class="p">(</span>
+ <span class="n">role_arn</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">role_arn</span><span class="p">,</span>
+ <span class="n">role_session_name</span><span class="o">=</span><span class="s2">"Airflow_"</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">s3_conn_id</span>
+ <span class="p">)</span>
+ <span class="n">creds</span> <span class="o">=</span> <span class="n">assumed_role_object</span><span class="o">.</span><span class="n">credentials</span>
+ <span class="n">connection</span> <span class="o">=</span> <span class="n">S3Connection</span><span class="p">(</span>
+ <span class="n">aws_access_key_id</span><span class="o">=</span><span class="n">creds</span><span class="o">.</span><span class="n">access_key</span><span class="p">,</span>
+ <span class="n">aws_secret_access_key</span><span class="o">=</span><span class="n">creds</span><span class="o">.</span><span class="n">secret_key</span><span class="p">,</span>
+ <span class="n">calling_format</span><span class="o">=</span><span class="n">calling_format</span><span class="p">,</span>
+ <span class="n">security_token</span><span class="o">=</span><span class="n">creds</span><span class="o">.</span><span class="n">session_token</span>
+ <span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">connection</span> <span class="o">=</span> <span class="n">S3Connection</span><span class="p">(</span><span class="n">aws_access_key_id</span><span class="o">=</span><span class="n">a_key</span><span class="p">,</span>
+ <span class="n">aws_secret_access_key</span><span class="o">=</span><span class="n">s_key</span><span class="p">,</span>
+ <span class="n">calling_format</span><span class="o">=</span><span class="n">calling_format</span><span class="p">,</span>
+ <span class="n">profile_name</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">profile</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">connection</span></div>
+
+<div class="viewcode-block" id="S3Hook.check_for_bucket"><a class="viewcode-back" href="../code.html#airflow.hooks.S3Hook.check_for_bucket">[docs]</a> <span class="k">def</span> <span class="nf">check_for_bucket</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">bucket_name</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Check if bucket_name exists.</span>
+
+<span class="sd"> :param bucket_name: the name of the bucket</span>
+<span class="sd"> :type bucket_name: str</span>
+<span class="sd"> """</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">connection</span><span class="o">.</span><span class="n">lookup</span><span class="p">(</span><span class="n">bucket_name</span><span class="p">)</span> <span class="ow">is</span> <span class="ow">not</span> <span class="bp">None</span></div>
+
+<div class="viewcode-block" id="S3Hook.get_bucket"><a class="viewcode-back" href="../code.html#airflow.hooks.S3Hook.get_bucket">[docs]</a> <span class="k">def</span> <span class="nf">get_bucket</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">bucket_name</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Returns a boto.s3.bucket.Bucket object</span>
+
+<span class="sd"> :param bucket_name: the name of the bucket</span>
+<span class="sd"> :type bucket_name: str</span>
+<span class="sd"> """</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">connection</span><span class="o">.</span><span class="n">get_bucket</span><span class="p">(</span><span class="n">bucket_name</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="S3Hook.list_keys"><a class="viewcode-back" href="../code.html#airflow.hooks.S3Hook.list_keys">[docs]</a> <span class="k">def</span> <span class="nf">list_keys</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">bucket_name</span><span class="p">,</span> <span class="n">prefix</span><span class="o">=</span><span class="s1">''</span><span class="p">,</span> <span class="n">delimiter</span><span class= [...]
+ <span class="sd">"""</span>
+<span class="sd"> Lists keys in a bucket under prefix and not containing delimiter</span>
+
+<span class="sd"> :param bucket_name: the name of the bucket</span>
+<span class="sd"> :type bucket_name: str</span>
+<span class="sd"> :param prefix: a key prefix</span>
+<span class="sd"> :type prefix: str</span>
+<span class="sd"> :param delimiter: the delimiter marks key hierarchy.</span>
+<span class="sd"> :type delimiter: str</span>
+<span class="sd"> """</span>
+ <span class="n">b</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_bucket</span><span class="p">(</span><span class="n">bucket_name</span><span class="p">)</span>
+ <span class="n">keylist</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">b</span><span class="o">.</span><span class="n">list</span><span class="p">(</span><span class="n">prefix</span><span class="o">=</span><span class="n">prefix</span><span class="p">,</span> <span class="n">delimiter</span><span class="o">=</span><span class="n">delimiter</span><span class="p">))</span>
+ <span class="k">return</span> <span class="p">[</span><span class="n">k</span><span class="o">.</span><span class="n">name</span> <span class="k">for</span> <span class="n">k</span> <span class="ow">in</span> <span class="n">keylist</span><span class="p">]</span> <span class="k">if</span> <span class="n">keylist</span> <span class="o">!=</span> <span class="p">[]</span> <span class="k">else</span> <span class="bp">None</span></div>
+
+<div class="viewcode-block" id="S3Hook.list_prefixes"><a class="viewcode-back" href="../code.html#airflow.hooks.S3Hook.list_prefixes">[docs]</a> <span class="k">def</span> <span class="nf">list_prefixes</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">bucket_name</span><span class="p">,</span> <span class="n">prefix</span><span class="o">=</span><span class="s1">''</span><span class="p">,</span> <span class="n">delimiter</span> [...]
+ <span class="sd">"""</span>
+<span class="sd"> Lists prefixes in a bucket under prefix</span>
+
+<span class="sd"> :param bucket_name: the name of the bucket</span>
+<span class="sd"> :type bucket_name: str</span>
+<span class="sd"> :param prefix: a key prefix</span>
+<span class="sd"> :type prefix: str</span>
+<span class="sd"> :param delimiter: the delimiter marks key hierarchy.</span>
+<span class="sd"> :type delimiter: str</span>
+<span class="sd"> """</span>
+ <span class="n">b</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_bucket</span><span class="p">(</span><span class="n">bucket_name</span><span class="p">)</span>
+ <span class="n">plist</span> <span class="o">=</span> <span class="n">b</span><span class="o">.</span><span class="n">list</span><span class="p">(</span><span class="n">prefix</span><span class="o">=</span><span class="n">prefix</span><span class="p">,</span> <span class="n">delimiter</span><span class="o">=</span><span class="n">delimiter</span><span class="p">)</span>
+ <span class="n">prefix_names</span> <span class="o">=</span> <span class="p">[</span><span class="n">p</span><span class="o">.</span><span class="n">name</span> <span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">plist</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">p</span><span class="p">,</span> <span class="n">boto</span><span class="o">.</span><span class="n">s3</span><span class="o">.</span><span class="n">prefix</span><span class="o">.</span><span class="n">Prefix</span><span class="p">)]</span>
+ <span class="k">return</span> <span class="n">prefix_names</span> <span class="k">if</span> <span class="n">prefix_names</span> <span class="o">!=</span> <span class="p">[]</span> <span class="k">else</span> <span class="bp">None</span></div>
+
+<div class="viewcode-block" id="S3Hook.check_for_key"><a class="viewcode-back" href="../code.html#airflow.hooks.S3Hook.check_for_key">[docs]</a> <span class="k">def</span> <span class="nf">check_for_key</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">,</span> <span class="n">bucket_name</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Checks that a key exists in a bucket</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">bucket_name</span><span class="p">:</span>
+ <span class="p">(</span><span class="n">bucket_name</span><span class="p">,</span> <span class="n">key</span><span class="p">)</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_s3_url</span><span class="p">(</span><span class="n">key</span><span class="p">)</span>
+ <span class="n">bucket</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_bucket</span><span class="p">(</span><span class="n">bucket_name</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">bucket</span><span class="o">.</span><span class="n">get_key</span><span class="p">(</span><span class="n">key</span><span class="p">)</span> <span class="ow">is</span> <span class="ow">not</span> <span class="bp">None</span></div>
+
+<div class="viewcode-block" id="S3Hook.get_key"><a class="viewcode-back" href="../code.html#airflow.hooks.S3Hook.get_key">[docs]</a> <span class="k">def</span> <span class="nf">get_key</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">,</span> <span class="n">bucket_name</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Returns a boto.s3.key.Key object</span>
+
+<span class="sd"> :param key: the path to the key</span>
+<span class="sd"> :type key: str</span>
+<span class="sd"> :param bucket_name: the name of the bucket</span>
+<span class="sd"> :type bucket_name: str</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">bucket_name</span><span class="p">:</span>
+ <span class="p">(</span><span class="n">bucket_name</span><span class="p">,</span> <span class="n">key</span><span class="p">)</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_s3_url</span><span class="p">(</span><span class="n">key</span><span class="p">)</span>
+ <span class="n">bucket</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_bucket</span><span class="p">(</span><span class="n">bucket_name</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">bucket</span><span class="o">.</span><span class="n">get_key</span><span class="p">(</span><span class="n">key</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="S3Hook.check_for_wildcard_key"><a class="viewcode-back" href="../code.html#airflow.hooks.S3Hook.check_for_wildcard_key">[docs]</a> <span class="k">def</span> <span class="nf">check_for_wildcard_key</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
+ <span class="n">wildcard_key</span><span class="p">,</span> <span class="n">bucket_name</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="n">delimiter</span><span class="o">=</span><span class="s1">''</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Checks that a key matching a wildcard expression exists in a bucket</span>
+<span class="sd"> """</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_wildcard_key</span><span class="p">(</span><span class="n">wildcard_key</span><span class="o">=</span><span class="n">wildcard_key</span><span class="p">,</span>
+ <span class="n">bucket_name</span><span class="o">=</span><span class="n">bucket_name</span><span class="p">,</span>
+ <span class="n">delimiter</span><span class="o">=</span><span class="n">delimiter</span><span class="p">)</span> <span class="ow">is</span> <span class="ow">not</span> <span class="bp">None</span></div>
+
+<div class="viewcode-block" id="S3Hook.get_wildcard_key"><a class="viewcode-back" href="../code.html#airflow.hooks.S3Hook.get_wildcard_key">[docs]</a> <span class="k">def</span> <span class="nf">get_wildcard_key</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">wildcard_key</span><span class="p">,</span> <span class="n">bucket_name</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="n">delimit [...]
+ <span class="sd">"""</span>
+<span class="sd"> Returns a boto.s3.key.Key object matching the regular expression</span>
+
+<span class="sd"> :param regex_key: the path to the key</span>
+<span class="sd"> :type regex_key: str</span>
+<span class="sd"> :param bucket_name: the name of the bucket</span>
+<span class="sd"> :type bucket_name: str</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">bucket_name</span><span class="p">:</span>
+ <span class="p">(</span><span class="n">bucket_name</span><span class="p">,</span> <span class="n">wildcard_key</span><span class="p">)</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_s3_url</span><span class="p">(</span><span class="n">wildcard_key</span><span class="p">)</span>
+ <span class="n">bucket</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_bucket</span><span class="p">(</span><span class="n">bucket_name</span><span class="p">)</span>
+ <span class="n">prefix</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">r'[*]'</span><span class="p">,</span> <span class="n">wildcard_key</span><span class="p">,</span> <span class="mi">1</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
+ <span class="n">klist</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">list_keys</span><span class="p">(</span><span class="n">bucket_name</span><span class="p">,</span> <span class="n">prefix</span><span class="o">=</span><span class="n">prefix</span><span class="p">,</span> <span class="n">delimiter</span><span class="o">=</span><span class="n">delimiter</span><span class="p">)</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">klist</span><span class="p">:</span>
+ <span class="k">return</span> <span class="bp">None</span>
+ <span class="n">key_matches</span> <span class="o">=</span> <span class="p">[</span><span class="n">k</span> <span class="k">for</span> <span class="n">k</span> <span class="ow">in</span> <span class="n">klist</span> <span class="k">if</span> <span class="n">fnmatch</span><span class="o">.</span><span class="n">fnmatch</span><span class="p">(</span><span class="n">k</span><span class="p">,</span> <span class="n">wildcard_key</span><span class="p">)]</span>
+ <span class="k">return</span> <span class="n">bucket</span><span class="o">.</span><span class="n">get_key</span><span class="p">(</span><span class="n">key_matches</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span> <span class="k">if</span> <span class="n">key_matches</span> <span class="k">else</span> <span class="bp">None</span></div>
+
+<div class="viewcode-block" id="S3Hook.check_for_prefix"><a class="viewcode-back" href="../code.html#airflow.hooks.S3Hook.check_for_prefix">[docs]</a> <span class="k">def</span> <span class="nf">check_for_prefix</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">bucket_name</span><span class="p">,</span> <span class="n">prefix</span><span class="p">,</span> <span class="n">delimiter</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Checks that a prefix exists in a bucket</span>
+<span class="sd"> """</span>
+ <span class="n">prefix</span> <span class="o">=</span> <span class="n">prefix</span> <span class="o">+</span> <span class="n">delimiter</span> <span class="k">if</span> <span class="n">prefix</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="o">!=</span> <span class="n">delimiter</span> <span class="k">else</span> <span class="n">prefix</span>
+ <span class="n">prefix_split</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">r'(\w+[{d}])$'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">d</span><span class="o">=</span><span class="n">delimiter</span><span class="p">),</span> <span class="n">prefix</span><span class="p">,</span> <span class="mi">1</span><span class="p [...]
+ <span class="n">previous_level</span> <span class="o">=</span> <span class="n">prefix_split</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
+ <span class="n">plist</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">list_prefixes</span><span class="p">(</span><span class="n">bucket_name</span><span class="p">,</span> <span class="n">previous_level</span><span class="p">,</span> <span class="n">delimiter</span><span class="p">)</span>
+ <span class="k">return</span> <span class="bp">False</span> <span class="k">if</span> <span class="n">plist</span> <span class="ow">is</span> <span class="bp">None</span> <span class="k">else</span> <span class="n">prefix</span> <span class="ow">in</span> <span class="n">plist</span></div>
+
+<div class="viewcode-block" id="S3Hook.load_file"><a class="viewcode-back" href="../code.html#airflow.hooks.S3Hook.load_file">[docs]</a> <span class="k">def</span> <span class="nf">load_file</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span>
+ <span class="n">filename</span><span class="p">,</span>
+ <span class="n">key</span><span class="p">,</span>
+ <span class="n">bucket_name</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">replace</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="n">multipart_bytes</span><span class="o">=</span><span class="mi">5</span> <span class="o">*</span> <span class="p">(</span><span class="mi">1024</span> <span class="o">**</span> <span class="mi">3</span><span class="p">)):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Loads a local file to S3</span>
+
+<span class="sd"> :param filename: name of the file to load.</span>
+<span class="sd"> :type filename: str</span>
+<span class="sd"> :param key: S3 key that will point to the file</span>
+<span class="sd"> :type key: str</span>
+<span class="sd"> :param bucket_name: Name of the bucket in which to store the file</span>
+<span class="sd"> :type bucket_name: str</span>
+<span class="sd"> :param replace: A flag to decide whether or not to overwrite the key</span>
+<span class="sd"> if it already exists. If replace is False and the key exists, an</span>
+<span class="sd"> error will be raised.</span>
+<span class="sd"> :type replace: bool</span>
+<span class="sd"> :param multipart_bytes: If provided, the file is uploaded in parts of</span>
+<span class="sd"> this size (minimum 5242880). The default value is 5GB, since S3</span>
+<span class="sd"> cannot accept non-multipart uploads for files larger than 5GB. If</span>
+<span class="sd"> the file is smaller than the specified limit, the option will be</span>
+<span class="sd"> ignored.</span>
+<span class="sd"> :type multipart_bytes: int</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">bucket_name</span><span class="p">:</span>
+ <span class="p">(</span><span class="n">bucket_name</span><span class="p">,</span> <span class="n">key</span><span class="p">)</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_s3_url</span><span class="p">(</span><span class="n">key</span><span class="p">)</span>
+ <span class="n">bucket</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_bucket</span><span class="p">(</span><span class="n">bucket_name</span><span class="p">)</span>
+ <span class="n">key_obj</span> <span class="o">=</span> <span class="n">bucket</span><span class="o">.</span><span class="n">get_key</span><span class="p">(</span><span class="n">key</span><span class="p">)</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">replace</span> <span class="ow">and</span> <span class="n">key_obj</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"The key {key} already exists."</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
+ <span class="o">**</span><span class="nb">locals</span><span class="p">()))</span>
+
+ <span class="n">key_size</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">getsize</span><span class="p">(</span><span class="n">filename</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">multipart_bytes</span> <span class="ow">and</span> <span class="n">key_size</span> <span class="o">>=</span> <span class="n">multipart_bytes</span><span class="p">:</span>
+ <span class="c1"># multipart upload</span>
+ <span class="kn">from</span> <span class="nn">filechunkio</span> <span class="kn">import</span> <span class="n">FileChunkIO</span>
+ <span class="n">mp</span> <span class="o">=</span> <span class="n">bucket</span><span class="o">.</span><span class="n">initiate_multipart_upload</span><span class="p">(</span><span class="n">key_name</span><span class="o">=</span><span class="n">key</span><span class="p">)</span>
+ <span class="n">total_chunks</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">math</span><span class="o">.</span><span class="n">ceil</span><span class="p">(</span><span class="n">key_size</span> <span class="o">/</span> <span class="n">multipart_bytes</span><span class="p">))</span>
+ <span class="n">sent_bytes</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="k">for</span> <span class="n">chunk</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">total_chunks</span><span class="p">):</span>
+ <span class="n">offset</span> <span class="o">=</span> <span class="n">chunk</span> <span class="o">*</span> <span class="n">multipart_bytes</span>
+ <span class="nb">bytes</span> <span class="o">=</span> <span class="nb">min</span><span class="p">(</span><span class="n">multipart_bytes</span><span class="p">,</span> <span class="n">key_size</span> <span class="o">-</span> <span class="n">offset</span><span class="p">)</span>
+ <span class="k">with</span> <span class="n">FileChunkIO</span><span class="p">(</span>
+ <span class="n">filename</span><span class="p">,</span> <span class="s1">'r'</span><span class="p">,</span> <span class="n">offset</span><span class="o">=</span><span class="n">offset</span><span class="p">,</span> <span class="nb">bytes</span><span class="o">=</span><span class="nb">bytes</span><span class="p">)</span> <span class="k">as</span> <span class="n">fp</span><span class="p">:</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Sending chunk {c} of {tc}...'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
+ <span class="n">c</span><span class="o">=</span><span class="n">chunk</span> <span class="o">+</span> <span class="mi">1</span><span class="p">,</span> <span class="n">tc</span><span class="o">=</span><span class="n">total_chunks</span><span class="p">))</span>
+ <span class="n">mp</span><span class="o">.</span><span class="n">upload_part_from_file</span><span class="p">(</span><span class="n">fp</span><span class="p">,</span> <span class="n">part_num</span><span class="o">=</span><span class="n">chunk</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span>
+ <span class="k">except</span><span class="p">:</span>
+ <span class="n">mp</span><span class="o">.</span><span class="n">cancel_upload</span><span class="p">()</span>
+ <span class="k">raise</span>
+ <span class="n">mp</span><span class="o">.</span><span class="n">complete_upload</span><span class="p">()</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="c1"># regular upload</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">key_obj</span><span class="p">:</span>
+ <span class="n">key_obj</span> <span class="o">=</span> <span class="n">bucket</span><span class="o">.</span><span class="n">new_key</span><span class="p">(</span><span class="n">key_name</span><span class="o">=</span><span class="n">key</span><span class="p">)</span>
+ <span class="n">key_size</span> <span class="o">=</span> <span class="n">key_obj</span><span class="o">.</span><span class="n">set_contents_from_filename</span><span class="p">(</span><span class="n">filename</span><span class="p">,</span>
+ <span class="n">replace</span><span class="o">=</span><span class="n">replace</span><span class="p">)</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">"The key {key} now contains"</span>
+ <span class="s2">" {key_size} bytes"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="nb">locals</span><span class="p">()))</span></div>
+
+<div class="viewcode-block" id="S3Hook.load_string"><a class="viewcode-back" href="../code.html#airflow.hooks.S3Hook.load_string">[docs]</a> <span class="k">def</span> <span class="nf">load_string</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">string_data</span><span class="p">,</span>
+ <span class="n">key</span><span class="p">,</span> <span class="n">bucket_name</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">replace</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="n">encrypt</span><span class="o">=</span><span class="bp">False</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Loads a local file to S3</span>
+
+<span class="sd"> This is provided as a convenience to drop a file in S3. It uses the</span>
+<span class="sd"> boto infrastructure to ship a file to s3. It is currently using only</span>
+<span class="sd"> a single part download, and should not be used to move large files.</span>
+
+<span class="sd"> :param string_data: string to set as content for the key.</span>
+<span class="sd"> :type string_data: str</span>
+<span class="sd"> :param key: S3 key that will point to the file</span>
+<span class="sd"> :type key: str</span>
+<span class="sd"> :param bucket_name: Name of the bucket in which to store the file</span>
+<span class="sd"> :type bucket_name: str</span>
+<span class="sd"> :param replace: A flag to decide whether or not to overwrite the key</span>
+<span class="sd"> if it already exists</span>
+<span class="sd"> :type replace: bool</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">bucket_name</span><span class="p">:</span>
+ <span class="p">(</span><span class="n">bucket_name</span><span class="p">,</span> <span class="n">key</span><span class="p">)</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_s3_url</span><span class="p">(</span><span class="n">key</span><span class="p">)</span>
+ <span class="n">bucket</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_bucket</span><span class="p">(</span><span class="n">bucket_name</span><span class="p">)</span>
+ <span class="n">key_obj</span> <span class="o">=</span> <span class="n">bucket</span><span class="o">.</span><span class="n">get_key</span><span class="p">(</span><span class="n">key</span><span class="p">)</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">replace</span> <span class="ow">and</span> <span class="n">key_obj</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"The key {key} already exists."</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
+ <span class="o">**</span><span class="nb">locals</span><span class="p">()))</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">key_obj</span><span class="p">:</span>
+ <span class="n">key_obj</span> <span class="o">=</span> <span class="n">bucket</span><span class="o">.</span><span class="n">new_key</span><span class="p">(</span><span class="n">key_name</span><span class="o">=</span><span class="n">key</span><span class="p">)</span>
+ <span class="n">key_size</span> <span class="o">=</span> <span class="n">key_obj</span><span class="o">.</span><span class="n">set_contents_from_string</span><span class="p">(</span><span class="n">string_data</span><span class="p">,</span>
+ <span class="n">replace</span><span class="o">=</span><span class="n">replace</span><span class="p">,</span>
+ <span class="n">encrypt_key</span><span class="o">=</span><span class="n">encrypt</span><span class="p">)</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">"The key {key} now contains"</span>
+ <span class="s2">" {key_size} bytes"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="nb">locals</span><span class="p">()))</span></div></div>
+</pre></div>
+
+ </div>
+ </div>
+ <footer>
+
+
+ <hr/>
+
+ <div role="contentinfo">
+ <p>
+ © Copyright 2014, Maxime Beauchemin, Airbnb.
+
+ </p>
+ </div>
+ Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
+
+</footer>
+
+ </div>
+ </div>
+
+ </section>
+
+ </div>
+
+
+
+
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT:'../',
+ VERSION:'',
+ COLLAPSE_INDEX:false,
+ FILE_SUFFIX:'.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../_static/jquery.js"></script>
+ <script type="text/javascript" src="../_static/underscore.js"></script>
+ <script type="text/javascript" src="../_static/doctools.js"></script>
+
+
+
+
+
+ <script type="text/javascript" src="../_static/js/theme.js"></script>
+
+
+
+
+ <script type="text/javascript">
+ jQuery(function () {
+ SphinxRtdTheme.StickyNav.enable();
+ });
+ </script>
+
+
+</body>
+</html>
\ No newline at end of file
diff --git a/_modules/airflow/contrib/operators/hipchat_operator.html b/_modules/airflow/contrib/operators/hipchat_operator.html
new file mode 100644
index 0000000..370d812
--- /dev/null
+++ b/_modules/airflow/contrib/operators/hipchat_operator.html
@@ -0,0 +1,330 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+ <meta charset="utf-8">
+
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+ <title>airflow.contrib.operators.hipchat_operator — Airflow Documentation</title>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <link rel="stylesheet" href="../../../../_static/css/theme.css" type="text/css" />
+
+
+
+
+
+ <link rel="top" title="Airflow Documentation" href="../../../../index.html"/>
+ <link rel="up" title="Module code" href="../../../index.html"/>
+
+
+ <script src="../../../../_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+ <div class="wy-grid-for-nav">
+
+
+ <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+ <div class="wy-side-scroll">
+ <div class="wy-side-nav-search">
+
+
+
+ <a href="../../../../index.html" class="icon icon-home"> Airflow
+
+
+
+ </a>
+
+
+
+
+
+
+
+<div role="search">
+ <form id="rtd-search-form" class="wy-form" action="../../../../search.html" method="get">
+ <input type="text" name="q" placeholder="Search docs" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+</div>
+
+
+ </div>
+
+ <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+
+
+
+ <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../../project.html">Project</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../license.html">License</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../start.html">Quick Start</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../tutorial.html">Tutorial</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../configuration.html">Configuration</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../ui.html">UI / Screenshots</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../concepts.html">Concepts</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../profiling.html">Data Profiling</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../cli.html">Command Line Interface</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../scheduler.html">Scheduling & Triggers</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../plugins.html">Plugins</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../security.html">Security</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../faq.html">FAQ</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../../code.html">API Reference</a></li>
+</ul>
+
+
+
+ </div>
+ </div>
+ </nav>
+
+ <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+
+ <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+ <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+ <a href="../../../../index.html">Airflow</a>
+ </nav>
+
+
+
+ <div class="wy-nav-content">
+ <div class="rst-content">
+
+
+
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+ <ul class="wy-breadcrumbs">
+ <li><a href="../../../../index.html">Docs</a> »</li>
+
+ <li><a href="../../../index.html">Module code</a> »</li>
+
+ <li>airflow.contrib.operators.hipchat_operator</li>
+ <li class="wy-breadcrumbs-aside">
+
+
+
+ </li>
+ </ul>
+ <hr/>
+</div>
+ <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+ <div itemprop="articleBody">
+
+ <h1>Source code for airflow.contrib.operators.hipchat_operator</h1><div class="highlight"><pre>
+<span></span><span class="c1"># -*- coding: utf-8 -*-</span>
+<span class="c1">#</span>
+<span class="c1"># Licensed under the Apache License, Version 2.0 (the "License");</span>
+<span class="c1"># you may not use this file except in compliance with the License.</span>
+<span class="c1"># You may obtain a copy of the License at</span>
+<span class="c1">#</span>
+<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
+<span class="c1">#</span>
+<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
+<span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span>
+<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
+<span class="c1"># See the License for the specific language governing permissions and</span>
+<span class="c1"># limitations under the License.</span>
+
+<span class="kn">from</span> <span class="nn">airflow.utils.decorators</span> <span class="kn">import</span> <span class="n">apply_defaults</span>
+<span class="kn">from</span> <span class="nn">airflow.models</span> <span class="kn">import</span> <span class="n">BaseOperator</span>
+<span class="kn">from</span> <span class="nn">airflow.exceptions</span> <span class="kn">import</span> <span class="n">AirflowException</span>
+<span class="kn">import</span> <span class="nn">logging</span>
+<span class="kn">import</span> <span class="nn">requests</span>
+<span class="kn">import</span> <span class="nn">json</span>
+
+
+<div class="viewcode-block" id="HipChatAPIOperator"><a class="viewcode-back" href="../../../../code.html#airflow.contrib.operators.hipchat_operator.HipChatAPIOperator">[docs]</a><span class="k">class</span> <span class="nc">HipChatAPIOperator</span><span class="p">(</span><span class="n">BaseOperator</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Base HipChat Operator.</span>
+<span class="sd"> All derived HipChat operators reference from HipChat's official REST API documentation</span>
+<span class="sd"> at https://www.hipchat.com/docs/apiv2. Before using any HipChat API operators you need</span>
+<span class="sd"> to get an authentication token at https://www.hipchat.com/docs/apiv2/auth.</span>
+<span class="sd"> In the future additional HipChat operators will be derived from this class as well.</span>
+
+<span class="sd"> :param token: HipChat REST API authentication token</span>
+<span class="sd"> :type token: str</span>
+<span class="sd"> :param base_url: HipChat REST API base url.</span>
+<span class="sd"> :type base_url: str</span>
+<span class="sd"> """</span>
+ <span class="nd">@apply_defaults</span>
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
+ <span class="n">token</span><span class="p">,</span>
+ <span class="n">base_url</span><span class="o">=</span><span class="s1">'https://api.hipchat.com/v2'</span><span class="p">,</span>
+ <span class="o">*</span><span class="n">args</span><span class="p">,</span>
+ <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+ <span class="nb">super</span><span class="p">(</span><span class="n">HipChatAPIOperator</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">token</span> <span class="o">=</span> <span class="n">token</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">base_url</span> <span class="o">=</span> <span class="n">base_url</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">method</span> <span class="o">=</span> <span class="bp">None</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">url</span> <span class="o">=</span> <span class="bp">None</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">body</span> <span class="o">=</span> <span class="bp">None</span>
+
+ <span class="k">def</span> <span class="nf">prepare_request</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Used by the execute function. Set the request method, url, and body of HipChat's</span>
+<span class="sd"> REST API call.</span>
+
+<span class="sd"> Override in child class. Each HipChatAPI child operator is responsible for having</span>
+<span class="sd"> a prepare_request method call which sets self.method, self.url, and self.body.</span>
+<span class="sd"> """</span>
+ <span class="k">pass</span>
+
+ <span class="k">def</span> <span class="nf">execute</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">context</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">prepare_request</span><span class="p">()</span>
+
+ <span class="n">response</span> <span class="o">=</span> <span class="n">requests</span><span class="o">.</span><span class="n">request</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">method</span><span class="p">,</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">url</span><span class="p">,</span>
+ <span class="n">headers</span><span class="o">=</span><span class="p">{</span>
+ <span class="s1">'Content-Type'</span><span class="p">:</span> <span class="s1">'application/json'</span><span class="p">,</span>
+ <span class="s1">'Authorization'</span><span class="p">:</span> <span class="s1">'Bearer </span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="n">token</span><span class="p">},</span>
+ <span class="n">data</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">body</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">response</span><span class="o">.</span><span class="n">status_code</span> <span class="o">>=</span> <span class="mi">400</span><span class="p">:</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s1">'HipChat API call failed: </span><span class="si">%s</span><span class="s1"> </span><span class="si">%s</span><span class="s1">'</span><span class="p">,</span>
+ <span class="n">response</span><span class="o">.</span><span class="n">status_code</span><span class="p">,</span> <span class="n">response</span><span class="o">.</span><span class="n">reason</span><span class="p">)</span>
+ <span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span><span class="s1">'HipChat API call failed: </span><span class="si">%s</span><span class="s1"> </span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span>
+ <span class="p">(</span><span class="n">response</span><span class="o">.</span><span class="n">status_code</span><span class="p">,</span> <span class="n">response</span><span class="o">.</span><span class="n">reason</span><span class="p">))</span></div>
+
+
+<div class="viewcode-block" id="HipChatAPISendRoomNotificationOperator"><a class="viewcode-back" href="../../../../code.html#airflow.contrib.operators.hipchat_operator.HipChatAPISendRoomNotificationOperator">[docs]</a><span class="k">class</span> <span class="nc">HipChatAPISendRoomNotificationOperator</span><span class="p">(</span><span class="n">HipChatAPIOperator</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Send notification to a specific HipChat room.</span>
+<span class="sd"> More info: https://www.hipchat.com/docs/apiv2/method/send_room_notification</span>
+
+<span class="sd"> :param room_id: Room in which to send notification on HipChat</span>
+<span class="sd"> :type room_id: str</span>
+<span class="sd"> :param message: The message body</span>
+<span class="sd"> :type message: str</span>
+<span class="sd"> :param frm: Label to be shown in addition to sender's name</span>
+<span class="sd"> :type frm: str</span>
+<span class="sd"> :param message_format: How the notification is rendered: html or text</span>
+<span class="sd"> :type message_format: str</span>
+<span class="sd"> :param color: Background color of the msg: yellow, green, red, purple, gray, or random</span>
+<span class="sd"> :type color: str</span>
+<span class="sd"> :param attach_to: The message id to attach this notification to</span>
+<span class="sd"> :type attach_to: str</span>
+<span class="sd"> :param notify: Whether this message should trigger a user notification</span>
+<span class="sd"> :type notify: bool</span>
+<span class="sd"> :param card: HipChat-defined card object</span>
+<span class="sd"> :type card: dict</span>
+<span class="sd"> """</span>
+ <span class="n">template_fields</span> <span class="o">=</span> <span class="p">(</span><span class="s1">'token'</span><span class="p">,</span> <span class="s1">'room_id'</span><span class="p">,</span> <span class="s1">'message'</span><span class="p">)</span>
+ <span class="n">ui_color</span> <span class="o">=</span> <span class="s1">'#2980b9'</span>
+
+ <span class="nd">@apply_defaults</span>
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">room_id</span><span class="p">,</span> <span class="n">message</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+ <span class="nb">super</span><span class="p">(</span><span class="n">HipChatAPISendRoomNotificationOperator</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">room_id</span> <span class="o">=</span> <span class="n">room_id</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">message</span> <span class="o">=</span> <span class="n">message</span>
+ <span class="n">default_options</span> <span class="o">=</span> <span class="p">{</span>
+ <span class="s1">'message_format'</span><span class="p">:</span> <span class="s1">'html'</span><span class="p">,</span>
+ <span class="s1">'color'</span><span class="p">:</span> <span class="s1">'yellow'</span><span class="p">,</span>
+ <span class="s1">'frm'</span><span class="p">:</span> <span class="s1">'airflow'</span><span class="p">,</span>
+ <span class="s1">'attach_to'</span><span class="p">:</span> <span class="bp">None</span><span class="p">,</span>
+ <span class="s1">'notify'</span><span class="p">:</span> <span class="bp">False</span><span class="p">,</span>
+ <span class="s1">'card'</span><span class="p">:</span> <span class="bp">None</span>
+ <span class="p">}</span>
+ <span class="k">for</span> <span class="p">(</span><span class="n">prop</span><span class="p">,</span> <span class="n">default</span><span class="p">)</span> <span class="ow">in</span> <span class="n">default_options</span><span class="o">.</span><span class="n">iteritems</span><span class="p">():</span>
+ <span class="nb">setattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">prop</span><span class="p">,</span> <span class="n">kwargs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">prop</span><span class="p">,</span> <span class="n">default</span><span class="p">))</span>
+
+ <span class="k">def</span> <span class="nf">prepare_request</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="n">params</span> <span class="o">=</span> <span class="p">{</span>
+ <span class="s1">'message'</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">message</span><span class="p">,</span>
+ <span class="s1">'message_format'</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">message_format</span><span class="p">,</span>
+ <span class="s1">'color'</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">color</span><span class="p">,</span>
+ <span class="s1">'from'</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">frm</span><span class="p">,</span>
+ <span class="s1">'attach_to'</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">attach_to</span><span class="p">,</span>
+ <span class="s1">'notify'</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">notify</span><span class="p">,</span>
+ <span class="s1">'card'</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">card</span>
+ <span class="p">}</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">method</span> <span class="o">=</span> <span class="s1">'POST'</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">url</span> <span class="o">=</span> <span class="s1">'</span><span class="si">%s</span><span class="s1">/room/</span><span class="si">%s</span><span class="s1">/notification'</span> <span class="o">%</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">base_url</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">r [...]
+ <span class="bp">self</span><span class="o">.</span><span class="n">body</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="nb">dict</span><span class="p">(</span>
+ <span class="p">(</span><span class="n">k</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="s1">'utf-8'</span><span class="p">),</span> <span class="n">v</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="s1">'utf-8'</span><span class="p">))</span> <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</sp [...]
+</pre></div>
+
+ </div>
+ </div>
+ <footer>
+
+
+ <hr/>
+
+ <div role="contentinfo">
+ <p>
+ © Copyright 2014, Maxime Beauchemin, Airbnb.
+
+ </p>
+ </div>
+ Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
+
+</footer>
+
+ </div>
+ </div>
+
+ </section>
+
+ </div>
+
+
+
+
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT:'../../../../',
+ VERSION:'',
+ COLLAPSE_INDEX:false,
+ FILE_SUFFIX:'.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../../../_static/doctools.js"></script>
+
+
+
+
+
+ <script type="text/javascript" src="../../../../_static/js/theme.js"></script>
+
+
+
+
+ <script type="text/javascript">
+ jQuery(function () {
+ SphinxRtdTheme.StickyNav.enable();
+ });
+ </script>
+
+
+</body>
+</html>
\ No newline at end of file
diff --git a/_modules/airflow/executors/celery_executor.html b/_modules/airflow/executors/celery_executor.html
new file mode 100644
index 0000000..4f91594
--- /dev/null
+++ b/_modules/airflow/executors/celery_executor.html
@@ -0,0 +1,298 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+ <meta charset="utf-8">
+
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+ <title>airflow.executors.celery_executor — Airflow Documentation</title>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" />
+
+
+
+
+
+ <link rel="top" title="Airflow Documentation" href="../../../index.html"/>
+ <link rel="up" title="Module code" href="../../index.html"/>
+
+
+ <script src="../../../_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+ <div class="wy-grid-for-nav">
+
+
+ <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+ <div class="wy-side-scroll">
+ <div class="wy-side-nav-search">
+
+
+
+ <a href="../../../index.html" class="icon icon-home"> Airflow
+
+
+
+ </a>
+
+
+
+
+
+
+
+<div role="search">
+ <form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
+ <input type="text" name="q" placeholder="Search docs" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+</div>
+
+
+ </div>
+
+ <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+
+
+
+ <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../project.html">Project</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../license.html">License</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../start.html">Quick Start</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../tutorial.html">Tutorial</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../configuration.html">Configuration</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../ui.html">UI / Screenshots</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../concepts.html">Concepts</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../profiling.html">Data Profiling</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../cli.html">Command Line Interface</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../scheduler.html">Scheduling & Triggers</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../plugins.html">Plugins</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../security.html">Security</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../faq.html">FAQ</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../code.html">API Reference</a></li>
+</ul>
+
+
+
+ </div>
+ </div>
+ </nav>
+
+ <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+
+ <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+ <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+ <a href="../../../index.html">Airflow</a>
+ </nav>
+
+
+
+ <div class="wy-nav-content">
+ <div class="rst-content">
+
+
+
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+ <ul class="wy-breadcrumbs">
+ <li><a href="../../../index.html">Docs</a> »</li>
+
+ <li><a href="../../index.html">Module code</a> »</li>
+
+ <li>airflow.executors.celery_executor</li>
+ <li class="wy-breadcrumbs-aside">
+
+
+
+ </li>
+ </ul>
+ <hr/>
+</div>
+ <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+ <div itemprop="articleBody">
+
+ <h1>Source code for airflow.executors.celery_executor</h1><div class="highlight"><pre>
+<span></span><span class="kn">from</span> <span class="nn">builtins</span> <span class="kn">import</span> <span class="nb">object</span>
+<span class="kn">import</span> <span class="nn">logging</span>
+<span class="kn">import</span> <span class="nn">subprocess</span>
+<span class="kn">import</span> <span class="nn">time</span>
+
+<span class="kn">from</span> <span class="nn">celery</span> <span class="kn">import</span> <span class="n">Celery</span>
+<span class="kn">from</span> <span class="nn">celery</span> <span class="kn">import</span> <span class="n">states</span> <span class="k">as</span> <span class="n">celery_states</span>
+
+<span class="kn">from</span> <span class="nn">airflow.exceptions</span> <span class="kn">import</span> <span class="n">AirflowException</span>
+<span class="kn">from</span> <span class="nn">airflow.executors.base_executor</span> <span class="kn">import</span> <span class="n">BaseExecutor</span>
+<span class="kn">from</span> <span class="nn">airflow</span> <span class="kn">import</span> <span class="n">configuration</span>
+
+<span class="n">PARALLELISM</span> <span class="o">=</span> <span class="n">configuration</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'core'</span><span class="p">,</span> <span class="s1">'PARALLELISM'</span><span class="p">)</span>
+
+<span class="sd">'''</span>
+<span class="sd">To start the celery worker, run the command:</span>
+<span class="sd">airflow worker</span>
+<span class="sd">'''</span>
+
+<span class="n">DEFAULT_QUEUE</span> <span class="o">=</span> <span class="n">configuration</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'celery'</span><span class="p">,</span> <span class="s1">'DEFAULT_QUEUE'</span><span class="p">)</span>
+
+
+<span class="k">class</span> <span class="nc">CeleryConfig</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
+ <span class="n">CELERY_ACCEPT_CONTENT</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'json'</span><span class="p">,</span> <span class="s1">'pickle'</span><span class="p">]</span>
+ <span class="n">CELERYD_PREFETCH_MULTIPLIER</span> <span class="o">=</span> <span class="mi">1</span>
+ <span class="n">CELERY_ACKS_LATE</span> <span class="o">=</span> <span class="bp">True</span>
+ <span class="n">BROKER_URL</span> <span class="o">=</span> <span class="n">configuration</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'celery'</span><span class="p">,</span> <span class="s1">'BROKER_URL'</span><span class="p">)</span>
+ <span class="n">CELERY_RESULT_BACKEND</span> <span class="o">=</span> <span class="n">configuration</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'celery'</span><span class="p">,</span> <span class="s1">'CELERY_RESULT_BACKEND'</span><span class="p">)</span>
+ <span class="n">CELERYD_CONCURRENCY</span> <span class="o">=</span> <span class="n">configuration</span><span class="o">.</span><span class="n">getint</span><span class="p">(</span><span class="s1">'celery'</span><span class="p">,</span> <span class="s1">'CELERYD_CONCURRENCY'</span><span class="p">)</span>
+ <span class="n">CELERY_DEFAULT_QUEUE</span> <span class="o">=</span> <span class="n">DEFAULT_QUEUE</span>
+ <span class="n">CELERY_DEFAULT_EXCHANGE</span> <span class="o">=</span> <span class="n">DEFAULT_QUEUE</span>
+
+<span class="n">app</span> <span class="o">=</span> <span class="n">Celery</span><span class="p">(</span>
+ <span class="n">configuration</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'celery'</span><span class="p">,</span> <span class="s1">'CELERY_APP_NAME'</span><span class="p">),</span>
+ <span class="n">config_source</span><span class="o">=</span><span class="n">CeleryConfig</span><span class="p">)</span>
+
+
+<span class="nd">@app.task</span>
+<span class="k">def</span> <span class="nf">execute_command</span><span class="p">(</span><span class="n">command</span><span class="p">):</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">"Executing command in Celery "</span> <span class="o">+</span> <span class="n">command</span><span class="p">)</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="n">subprocess</span><span class="o">.</span><span class="n">check_call</span><span class="p">(</span><span class="n">command</span><span class="p">,</span> <span class="n">shell</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+ <span class="k">except</span> <span class="n">subprocess</span><span class="o">.</span><span class="n">CalledProcessError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="n">e</span><span class="p">)</span>
+ <span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span><span class="s1">'Celery command failed'</span><span class="p">)</span>
+
+
+<div class="viewcode-block" id="CeleryExecutor"><a class="viewcode-back" href="../../../code.html#airflow.executors.CeleryExecutor">[docs]</a><span class="k">class</span> <span class="nc">CeleryExecutor</span><span class="p">(</span><span class="n">BaseExecutor</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> CeleryExecutor is recommended for production use of Airflow. It allows</span>
+<span class="sd"> distributing the execution of task instances to multiple worker nodes.</span>
+
+<span class="sd"> Celery is a simple, flexible and reliable distributed system to process</span>
+<span class="sd"> vast amounts of messages, while providing operations with the tools</span>
+<span class="sd"> required to maintain such a system.</span>
+<span class="sd"> """</span>
+
+ <span class="k">def</span> <span class="nf">start</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">tasks</span> <span class="o">=</span> <span class="p">{}</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">last_state</span> <span class="o">=</span> <span class="p">{}</span>
+
+ <span class="k">def</span> <span class="nf">execute_async</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">,</span> <span class="n">command</span><span class="p">,</span> <span class="n">queue</span><span class="o">=</span><span class="n">DEFAULT_QUEUE</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span> <span class="s2">"[celery] queuing {key} through celery, "</span>
+ <span class="s2">"queue={queue}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="nb">locals</span><span class="p">()))</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">tasks</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="n">execute_command</span><span class="o">.</span><span class="n">apply_async</span><span class="p">(</span>
+ <span class="n">args</span><span class="o">=</span><span class="p">[</span><span class="n">command</span><span class="p">],</span> <span class="n">queue</span><span class="o">=</span><span class="n">queue</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">last_state</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="n">celery_states</span><span class="o">.</span><span class="n">PENDING</span>
+
+ <span class="k">def</span> <span class="nf">sync</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span>
+ <span class="s2">"Inquiring about {} celery task(s)"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">tasks</span><span class="p">)))</span>
+ <span class="k">for</span> <span class="n">key</span><span class="p">,</span> <span class="n">async</span> <span class="ow">in</span> <span class="nb">list</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">tasks</span><span class="o">.</span><span class="n">items</span><span class="p">()):</span>
+ <span class="n">state</span> <span class="o">=</span> <span class="n">async</span><span class="o">.</span><span class="n">state</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">last_state</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="o">!=</span> <span class="n">state</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">state</span> <span class="o">==</span> <span class="n">celery_states</span><span class="o">.</span><span class="n">SUCCESS</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">success</span><span class="p">(</span><span class="n">key</span><span class="p">)</span>
+ <span class="k">del</span> <span class="bp">self</span><span class="o">.</span><span class="n">tasks</span><span class="p">[</span><span class="n">key</span><span class="p">]</span>
+ <span class="k">del</span> <span class="bp">self</span><span class="o">.</span><span class="n">last_state</span><span class="p">[</span><span class="n">key</span><span class="p">]</span>
+ <span class="k">elif</span> <span class="n">state</span> <span class="o">==</span> <span class="n">celery_states</span><span class="o">.</span><span class="n">FAILURE</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">fail</span><span class="p">(</span><span class="n">key</span><span class="p">)</span>
+ <span class="k">del</span> <span class="bp">self</span><span class="o">.</span><span class="n">tasks</span><span class="p">[</span><span class="n">key</span><span class="p">]</span>
+ <span class="k">del</span> <span class="bp">self</span><span class="o">.</span><span class="n">last_state</span><span class="p">[</span><span class="n">key</span><span class="p">]</span>
+ <span class="k">elif</span> <span class="n">state</span> <span class="o">==</span> <span class="n">celery_states</span><span class="o">.</span><span class="n">REVOKED</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">fail</span><span class="p">(</span><span class="n">key</span><span class="p">)</span>
+ <span class="k">del</span> <span class="bp">self</span><span class="o">.</span><span class="n">tasks</span><span class="p">[</span><span class="n">key</span><span class="p">]</span>
+ <span class="k">del</span> <span class="bp">self</span><span class="o">.</span><span class="n">last_state</span><span class="p">[</span><span class="n">key</span><span class="p">]</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">"Unexpected state: "</span> <span class="o">+</span> <span class="n">async</span><span class="o">.</span><span class="n">state</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">last_state</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="n">async</span><span class="o">.</span><span class="n">state</span>
+
+ <span class="k">def</span> <span class="nf">end</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">synchronous</span><span class="o">=</span><span class="bp">False</span><span class="p">):</span>
+ <span class="k">if</span> <span class="n">synchronous</span><span class="p">:</span>
+ <span class="k">while</span> <span class="nb">any</span><span class="p">([</span>
+ <span class="n">async</span><span class="o">.</span><span class="n">state</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">celery_states</span><span class="o">.</span><span class="n">READY_STATES</span>
+ <span class="k">for</span> <span class="n">async</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">tasks</span><span class="o">.</span><span class="n">values</span><span class="p">()]):</span>
+ <span class="n">time</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="mi">5</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">sync</span><span class="p">()</span></div>
+</pre></div>
+
+ </div>
+ </div>
+ <footer>
+
+
+ <hr/>
+
+ <div role="contentinfo">
+ <p>
+ © Copyright 2014, Maxime Beauchemin, Airbnb.
+
+ </p>
+ </div>
+ Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
+
+</footer>
+
+ </div>
+ </div>
+
+ </section>
+
+ </div>
+
+
+
+
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT:'../../../',
+ VERSION:'',
+ COLLAPSE_INDEX:false,
+ FILE_SUFFIX:'.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../../_static/doctools.js"></script>
+
+
+
+
+
+ <script type="text/javascript" src="../../../_static/js/theme.js"></script>
+
+
+
+
+ <script type="text/javascript">
+ jQuery(function () {
+ SphinxRtdTheme.StickyNav.enable();
+ });
+ </script>
+
+
+</body>
+</html>
\ No newline at end of file
diff --git a/_modules/airflow/executors/local_executor.html b/_modules/airflow/executors/local_executor.html
new file mode 100644
index 0000000..efd28ef
--- /dev/null
+++ b/_modules/airflow/executors/local_executor.html
@@ -0,0 +1,276 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+ <meta charset="utf-8">
+
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+ <title>airflow.executors.local_executor — Airflow Documentation</title>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" />
+
+
+
+
+
+ <link rel="top" title="Airflow Documentation" href="../../../index.html"/>
+ <link rel="up" title="Module code" href="../../index.html"/>
+
+
+ <script src="../../../_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+ <div class="wy-grid-for-nav">
+
+
+ <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+ <div class="wy-side-scroll">
+ <div class="wy-side-nav-search">
+
+
+
+ <a href="../../../index.html" class="icon icon-home"> Airflow
+
+
+
+ </a>
+
+
+
+
+
+
+
+<div role="search">
+ <form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
+ <input type="text" name="q" placeholder="Search docs" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+</div>
+
+
+ </div>
+
+ <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+
+
+
+ <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../project.html">Project</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../license.html">License</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../start.html">Quick Start</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../tutorial.html">Tutorial</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../configuration.html">Configuration</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../ui.html">UI / Screenshots</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../concepts.html">Concepts</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../profiling.html">Data Profiling</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../cli.html">Command Line Interface</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../scheduler.html">Scheduling & Triggers</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../plugins.html">Plugins</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../security.html">Security</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../faq.html">FAQ</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../code.html">API Reference</a></li>
+</ul>
+
+
+
+ </div>
+ </div>
+ </nav>
+
+ <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+
+ <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+ <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+ <a href="../../../index.html">Airflow</a>
+ </nav>
+
+
+
+ <div class="wy-nav-content">
+ <div class="rst-content">
+
+
+
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+ <ul class="wy-breadcrumbs">
+ <li><a href="../../../index.html">Docs</a> »</li>
+
+ <li><a href="../../index.html">Module code</a> »</li>
+
+ <li>airflow.executors.local_executor</li>
+ <li class="wy-breadcrumbs-aside">
+
+
+
+ </li>
+ </ul>
+ <hr/>
+</div>
+ <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+ <div itemprop="articleBody">
+
+ <h1>Source code for airflow.executors.local_executor</h1><div class="highlight"><pre>
+<span></span><span class="kn">import</span> <span class="nn">multiprocessing</span>
+<span class="kn">import</span> <span class="nn">subprocess</span>
+<span class="kn">import</span> <span class="nn">time</span>
+
+<span class="kn">from</span> <span class="nn">builtins</span> <span class="kn">import</span> <span class="nb">range</span>
+
+<span class="kn">from</span> <span class="nn">airflow</span> <span class="kn">import</span> <span class="n">configuration</span>
+<span class="kn">from</span> <span class="nn">airflow.executors.base_executor</span> <span class="kn">import</span> <span class="n">BaseExecutor</span>
+<span class="kn">from</span> <span class="nn">airflow.utils.state</span> <span class="kn">import</span> <span class="n">State</span>
+<span class="kn">from</span> <span class="nn">airflow.utils.logging</span> <span class="kn">import</span> <span class="n">LoggingMixin</span>
+
+<span class="n">PARALLELISM</span> <span class="o">=</span> <span class="n">configuration</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'core'</span><span class="p">,</span> <span class="s1">'PARALLELISM'</span><span class="p">)</span>
+
+
+<span class="k">class</span> <span class="nc">LocalWorker</span><span class="p">(</span><span class="n">multiprocessing</span><span class="o">.</span><span class="n">Process</span><span class="p">,</span> <span class="n">LoggingMixin</span><span class="p">):</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">task_queue</span><span class="p">,</span> <span class="n">result_queue</span><span class="p">):</span>
+ <span class="n">multiprocessing</span><span class="o">.</span><span class="n">Process</span><span class="o">.</span><span class="n">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">task_queue</span> <span class="o">=</span> <span class="n">task_queue</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">result_queue</span> <span class="o">=</span> <span class="n">result_queue</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">daemon</span> <span class="o">=</span> <span class="bp">True</span>
+
+ <span class="k">def</span> <span class="nf">run</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">while</span> <span class="bp">True</span><span class="p">:</span>
+ <span class="n">key</span><span class="p">,</span> <span class="n">command</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">task_queue</span><span class="o">.</span><span class="n">get</span><span class="p">()</span>
+ <span class="k">if</span> <span class="n">key</span> <span class="ow">is</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="c1"># Received poison pill, no more tasks to run</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">task_queue</span><span class="o">.</span><span class="n">task_done</span><span class="p">()</span>
+ <span class="k">break</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">"{} running {}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">__class__</span><span class="o">.</span><span class="n">__name__</span><span class="p">,</span> <span class="n">command</span><span class="p">))</span>
+ <span class="n">command</span> <span class="o">=</span> <span class="s2">"exec bash -c '{0}'"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">command</span><span class="p">)</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="n">subprocess</span><span class="o">.</span><span class="n">check_call</span><span class="p">(</span><span class="n">command</span><span class="p">,</span> <span class="n">shell</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+ <span class="n">state</span> <span class="o">=</span> <span class="n">State</span><span class="o">.</span><span class="n">SUCCESS</span>
+ <span class="k">except</span> <span class="n">subprocess</span><span class="o">.</span><span class="n">CalledProcessError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
+ <span class="n">state</span> <span class="o">=</span> <span class="n">State</span><span class="o">.</span><span class="n">FAILED</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s2">"failed to execute task {}:"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">)))</span>
+ <span class="c1"># raise e</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">result_queue</span><span class="o">.</span><span class="n">put</span><span class="p">((</span><span class="n">key</span><span class="p">,</span> <span class="n">state</span><span class="p">))</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">task_queue</span><span class="o">.</span><span class="n">task_done</span><span class="p">()</span>
+ <span class="n">time</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
+
+
+<div class="viewcode-block" id="LocalExecutor"><a class="viewcode-back" href="../../../code.html#airflow.executors.LocalExecutor">[docs]</a><span class="k">class</span> <span class="nc">LocalExecutor</span><span class="p">(</span><span class="n">BaseExecutor</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> LocalExecutor executes tasks locally in parallel. It uses the</span>
+<span class="sd"> multiprocessing Python library and queues to parallelize the execution</span>
+<span class="sd"> of tasks.</span>
+<span class="sd"> """</span>
+
+ <span class="k">def</span> <span class="nf">start</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">queue</span> <span class="o">=</span> <span class="n">multiprocessing</span><span class="o">.</span><span class="n">JoinableQueue</span><span class="p">()</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">result_queue</span> <span class="o">=</span> <span class="n">multiprocessing</span><span class="o">.</span><span class="n">Queue</span><span class="p">()</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">workers</span> <span class="o">=</span> <span class="p">[</span>
+ <span class="n">LocalWorker</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">queue</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">result_queue</span><span class="p">)</span>
+ <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">parallelism</span><span class="p">)</span>
+ <span class="p">]</span>
+
+ <span class="k">for</span> <span class="n">w</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">workers</span><span class="p">:</span>
+ <span class="n">w</span><span class="o">.</span><span class="n">start</span><span class="p">()</span>
+
+ <span class="k">def</span> <span class="nf">execute_async</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">,</span> <span class="n">command</span><span class="p">,</span> <span class="n">queue</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">queue</span><span class="o">.</span><span class="n">put</span><span class="p">((</span><span class="n">key</span><span class="p">,</span> <span class="n">command</span><span class="p">))</span>
+
+ <span class="k">def</span> <span class="nf">sync</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">while</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">result_queue</span><span class="o">.</span><span class="n">empty</span><span class="p">():</span>
+ <span class="n">results</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">result_queue</span><span class="o">.</span><span class="n">get</span><span class="p">()</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">change_state</span><span class="p">(</span><span class="o">*</span><span class="n">results</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">end</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="c1"># Sending poison pill to all worker</span>
+ <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">queue</span><span class="o">.</span><span class="n">put</span><span class="p">((</span><span class="bp">None</span><span class="p">,</span> <span class="bp">None</span><span class="p">))</span> <span class="k">for</span> <span class="n">w</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">workers</span><span class="p">]</span>
+ <span class="c1"># Wait for commands to finish</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">queue</span><span class="o">.</span><span class="n">join</span><span class="p">()</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">sync</span><span class="p">()</span></div>
+</pre></div>
+
+ </div>
+ </div>
+ <footer>
+
+
+ <hr/>
+
+ <div role="contentinfo">
+ <p>
+ © Copyright 2014, Maxime Beauchemin, Airbnb.
+
+ </p>
+ </div>
+ Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
+
+</footer>
+
+ </div>
+ </div>
+
+ </section>
+
+ </div>
+
+
+
+
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT:'../../../',
+ VERSION:'',
+ COLLAPSE_INDEX:false,
+ FILE_SUFFIX:'.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../../_static/doctools.js"></script>
+
+
+
+
+
+ <script type="text/javascript" src="../../../_static/js/theme.js"></script>
+
+
+
+
+ <script type="text/javascript">
+ jQuery(function () {
+ SphinxRtdTheme.StickyNav.enable();
+ });
+ </script>
+
+
+</body>
+</html>
\ No newline at end of file
diff --git a/_modules/airflow/executors/sequential_executor.html b/_modules/airflow/executors/sequential_executor.html
new file mode 100644
index 0000000..2d90305
--- /dev/null
+++ b/_modules/airflow/executors/sequential_executor.html
@@ -0,0 +1,238 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+ <meta charset="utf-8">
+
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+ <title>airflow.executors.sequential_executor — Airflow Documentation</title>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" />
+
+
+
+
+
+ <link rel="top" title="Airflow Documentation" href="../../../index.html"/>
+ <link rel="up" title="Module code" href="../../index.html"/>
+
+
+ <script src="../../../_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+ <div class="wy-grid-for-nav">
+
+
+ <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+ <div class="wy-side-scroll">
+ <div class="wy-side-nav-search">
+
+
+
+ <a href="../../../index.html" class="icon icon-home"> Airflow
+
+
+
+ </a>
+
+
+
+
+
+
+
+<div role="search">
+ <form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
+ <input type="text" name="q" placeholder="Search docs" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+</div>
+
+
+ </div>
+
+ <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+
+
+
+ <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../project.html">Project</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../license.html">License</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../start.html">Quick Start</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../tutorial.html">Tutorial</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../configuration.html">Configuration</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../ui.html">UI / Screenshots</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../concepts.html">Concepts</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../profiling.html">Data Profiling</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../cli.html">Command Line Interface</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../scheduler.html">Scheduling & Triggers</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../plugins.html">Plugins</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../security.html">Security</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../faq.html">FAQ</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../code.html">API Reference</a></li>
+</ul>
+
+
+
+ </div>
+ </div>
+ </nav>
+
+ <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+
+ <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+ <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+ <a href="../../../index.html">Airflow</a>
+ </nav>
+
+
+
+ <div class="wy-nav-content">
+ <div class="rst-content">
+
+
+
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+ <ul class="wy-breadcrumbs">
+ <li><a href="../../../index.html">Docs</a> »</li>
+
+ <li><a href="../../index.html">Module code</a> »</li>
+
+ <li>airflow.executors.sequential_executor</li>
+ <li class="wy-breadcrumbs-aside">
+
+
+
+ </li>
+ </ul>
+ <hr/>
+</div>
+ <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+ <div itemprop="articleBody">
+
+ <h1>Source code for airflow.executors.sequential_executor</h1><div class="highlight"><pre>
+<span></span><span class="kn">from</span> <span class="nn">builtins</span> <span class="kn">import</span> <span class="nb">str</span>
+<span class="kn">import</span> <span class="nn">subprocess</span>
+
+<span class="kn">from</span> <span class="nn">airflow.executors.base_executor</span> <span class="kn">import</span> <span class="n">BaseExecutor</span>
+<span class="kn">from</span> <span class="nn">airflow.utils.state</span> <span class="kn">import</span> <span class="n">State</span>
+
+
+<div class="viewcode-block" id="SequentialExecutor"><a class="viewcode-back" href="../../../code.html#airflow.executors.SequentialExecutor">[docs]</a><span class="k">class</span> <span class="nc">SequentialExecutor</span><span class="p">(</span><span class="n">BaseExecutor</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> This executor will only run one task instance at a time, can be used</span>
+<span class="sd"> for debugging. It is also the only executor that can be used with sqlite</span>
+<span class="sd"> since sqlite doesn't support multiple connections.</span>
+
+<span class="sd"> Since we want airflow to work out of the box, it defaults to this</span>
+<span class="sd"> SequentialExecutor alongside sqlite as you first install it.</span>
+<span class="sd"> """</span>
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="nb">super</span><span class="p">(</span><span class="n">SequentialExecutor</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">__init__</span><span class="p">()</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">commands_to_run</span> <span class="o">=</span> <span class="p">[]</span>
+
+ <span class="k">def</span> <span class="nf">execute_async</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">,</span> <span class="n">command</span><span class="p">,</span> <span class="n">queue</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">commands_to_run</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="n">key</span><span class="p">,</span> <span class="n">command</span><span class="p">,))</span>
+
+ <span class="k">def</span> <span class="nf">sync</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">for</span> <span class="n">key</span><span class="p">,</span> <span class="n">command</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">commands_to_run</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">"Executing command: {}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">command</span><span class="p">))</span>
+
+ <span class="k">try</span><span class="p">:</span>
+ <span class="n">subprocess</span><span class="o">.</span><span class="n">check_call</span><span class="p">(</span><span class="n">command</span><span class="p">,</span> <span class="n">shell</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">change_state</span><span class="p">(</span><span class="n">key</span><span class="p">,</span> <span class="n">State</span><span class="o">.</span><span class="n">SUCCESS</span><span class="p">)</span>
+ <span class="k">except</span> <span class="n">subprocess</span><span class="o">.</span><span class="n">CalledProcessError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">change_state</span><span class="p">(</span><span class="n">key</span><span class="p">,</span> <span class="n">State</span><span class="o">.</span><span class="n">FAILED</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s2">"Failed to execute task {}:"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">)))</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">commands_to_run</span> <span class="o">=</span> <span class="p">[]</span>
+
+ <span class="k">def</span> <span class="nf">end</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">heartbeat</span><span class="p">()</span></div>
+</pre></div>
+
+ </div>
+ </div>
+ <footer>
+
+
+ <hr/>
+
+ <div role="contentinfo">
+ <p>
+ © Copyright 2014, Maxime Beauchemin, Airbnb.
+
+ </p>
+ </div>
+ Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
+
+</footer>
+
+ </div>
+ </div>
+
+ </section>
+
+ </div>
+
+
+
+
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT:'../../../',
+ VERSION:'',
+ COLLAPSE_INDEX:false,
+ FILE_SUFFIX:'.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../../_static/doctools.js"></script>
+
+
+
+
+
+ <script type="text/javascript" src="../../../_static/js/theme.js"></script>
+
+
+
+
+ <script type="text/javascript">
+ jQuery(function () {
+ SphinxRtdTheme.StickyNav.enable();
+ });
+ </script>
+
+
+</body>
+</html>
\ No newline at end of file
diff --git a/_modules/airflow/macros.html b/_modules/airflow/macros.html
new file mode 100644
index 0000000..be420df
--- /dev/null
+++ b/_modules/airflow/macros.html
@@ -0,0 +1,255 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+ <meta charset="utf-8">
+
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+ <title>airflow.macros — Airflow Documentation</title>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
+
+
+
+
+
+ <link rel="top" title="Airflow Documentation" href="../../index.html"/>
+ <link rel="up" title="Module code" href="../index.html"/>
+
+
+ <script src="../../_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+ <div class="wy-grid-for-nav">
+
+
+ <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+ <div class="wy-side-scroll">
+ <div class="wy-side-nav-search">
+
+
+
+ <a href="../../index.html" class="icon icon-home"> Airflow
+
+
+
+ </a>
+
+
+
+
+
+
+
+<div role="search">
+ <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
+ <input type="text" name="q" placeholder="Search docs" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+</div>
+
+
+ </div>
+
+ <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+
+
+
+ <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../project.html">Project</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../license.html">License</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../start.html">Quick Start</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../tutorial.html">Tutorial</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../configuration.html">Configuration</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../ui.html">UI / Screenshots</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../concepts.html">Concepts</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../profiling.html">Data Profiling</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../cli.html">Command Line Interface</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../scheduler.html">Scheduling & Triggers</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../plugins.html">Plugins</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../security.html">Security</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../faq.html">FAQ</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../code.html">API Reference</a></li>
+</ul>
+
+
+
+ </div>
+ </div>
+ </nav>
+
+ <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+
+ <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+ <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+ <a href="../../index.html">Airflow</a>
+ </nav>
+
+
+
+ <div class="wy-nav-content">
+ <div class="rst-content">
+
+
+
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+ <ul class="wy-breadcrumbs">
+ <li><a href="../../index.html">Docs</a> »</li>
+
+ <li><a href="../index.html">Module code</a> »</li>
+
+ <li>airflow.macros</li>
+ <li class="wy-breadcrumbs-aside">
+
+
+
+ </li>
+ </ul>
+ <hr/>
+</div>
+ <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+ <div itemprop="articleBody">
+
+ <h1>Source code for airflow.macros</h1><div class="highlight"><pre>
+<span></span><span class="kn">from</span> <span class="nn">__future__</span> <span class="kn">import</span> <span class="n">absolute_import</span>
+<span class="kn">from</span> <span class="nn">random</span> <span class="kn">import</span> <span class="n">random</span>
+<span class="kn">from</span> <span class="nn">datetime</span> <span class="kn">import</span> <span class="n">datetime</span><span class="p">,</span> <span class="n">timedelta</span>
+<span class="kn">import</span> <span class="nn">dateutil</span>
+<span class="kn">import</span> <span class="nn">time</span>
+<span class="kn">from</span> <span class="nn">.</span> <span class="kn">import</span> <span class="n">hive</span>
+<span class="kn">import</span> <span class="nn">uuid</span>
+
+
+<div class="viewcode-block" id="ds_add"><a class="viewcode-back" href="../../code.html#airflow.macros.ds_add">[docs]</a><span class="k">def</span> <span class="nf">ds_add</span><span class="p">(</span><span class="n">ds</span><span class="p">,</span> <span class="n">days</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Add or subtract days from a YYYY-MM-DD</span>
+
+<span class="sd"> :param ds: anchor date in ``YYYY-MM-DD`` format to add to</span>
+<span class="sd"> :type ds: str</span>
+<span class="sd"> :param days: number of days to add to the ds, you can use negative values</span>
+<span class="sd"> :type days: int</span>
+
+<span class="sd"> >>> ds_add('2015-01-01', 5)</span>
+<span class="sd"> '2015-01-06'</span>
+<span class="sd"> >>> ds_add('2015-01-06', -5)</span>
+<span class="sd"> '2015-01-01'</span>
+<span class="sd"> """</span>
+
+ <span class="n">ds</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">strptime</span><span class="p">(</span><span class="n">ds</span><span class="p">,</span> <span class="s1">'%Y-%m-</span><span class="si">%d</span><span class="s1">'</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">days</span><span class="p">:</span>
+ <span class="n">ds</span> <span class="o">=</span> <span class="n">ds</span> <span class="o">+</span> <span class="n">timedelta</span><span class="p">(</span><span class="n">days</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">ds</span><span class="o">.</span><span class="n">isoformat</span><span class="p">()[:</span><span class="mi">10</span><span class="p">]</span></div>
+
+
+<div class="viewcode-block" id="ds_format"><a class="viewcode-back" href="../../code.html#airflow.macros.ds_format">[docs]</a><span class="k">def</span> <span class="nf">ds_format</span><span class="p">(</span><span class="n">ds</span><span class="p">,</span> <span class="n">input_format</span><span class="p">,</span> <span class="n">output_format</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Takes an input string and outputs another string</span>
+<span class="sd"> as specified in the output format</span>
+
+<span class="sd"> :param ds: input string which contains a date</span>
+<span class="sd"> :type ds: str</span>
+<span class="sd"> :param input_format: input string format. E.g. %Y-%m-%d</span>
+<span class="sd"> :type input_format: str</span>
+<span class="sd"> :param output_format: output string format E.g. %Y-%m-%d</span>
+<span class="sd"> :type output_format: str</span>
+
+<span class="sd"> >>> ds_format('2015-01-01', "%Y-%m-%d", "%m-%d-%y")</span>
+<span class="sd"> '01-01-15'</span>
+<span class="sd"> >>> ds_format('1/5/2015', "%m/%d/%Y", "%Y-%m-%d")</span>
+<span class="sd"> '2015-01-05'</span>
+<span class="sd"> """</span>
+ <span class="k">return</span> <span class="n">datetime</span><span class="o">.</span><span class="n">strptime</span><span class="p">(</span><span class="n">ds</span><span class="p">,</span> <span class="n">input_format</span><span class="p">)</span><span class="o">.</span><span class="n">strftime</span><span class="p">(</span><span class="n">output_format</span><span class="p">)</span></div>
+
+
+<div class="viewcode-block" id="integrate_plugins"><a class="viewcode-back" href="../../code.html#airflow.macros.integrate_plugins">[docs]</a><span class="k">def</span> <span class="nf">integrate_plugins</span><span class="p">():</span>
+ <span class="sd">"""Integrate plugins to the context"""</span>
+ <span class="kn">from</span> <span class="nn">airflow.plugins_manager</span> <span class="kn">import</span> <span class="n">macros</span> <span class="k">as</span> <span class="n">_macros</span>
+ <span class="k">for</span> <span class="n">_macro</span> <span class="ow">in</span> <span class="n">_macros</span><span class="p">:</span>
+ <span class="nb">globals</span><span class="p">()[</span><span class="n">_macro</span><span class="o">.</span><span class="n">__name__</span><span class="p">]</span> <span class="o">=</span> <span class="n">_macro</span></div>
+</pre></div>
+
+ </div>
+ </div>
+ <footer>
+
+
+ <hr/>
+
+ <div role="contentinfo">
+ <p>
+ © Copyright 2014, Maxime Beauchemin, Airbnb.
+
+ </p>
+ </div>
+ Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
+
+</footer>
+
+ </div>
+ </div>
+
+ </section>
+
+ </div>
+
+
+
+
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT:'../../',
+ VERSION:'',
+ COLLAPSE_INDEX:false,
+ FILE_SUFFIX:'.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+
+
+
+
+
+ <script type="text/javascript" src="../../_static/js/theme.js"></script>
+
+
+
+
+ <script type="text/javascript">
+ jQuery(function () {
+ SphinxRtdTheme.StickyNav.enable();
+ });
+ </script>
+
+
+</body>
+</html>
\ No newline at end of file
diff --git a/_modules/airflow/macros/hive.html b/_modules/airflow/macros/hive.html
new file mode 100644
index 0000000..cfcc1a0
--- /dev/null
+++ b/_modules/airflow/macros/hive.html
@@ -0,0 +1,298 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+ <meta charset="utf-8">
+
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+ <title>airflow.macros.hive — Airflow Documentation</title>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" />
+
+
+
+
+
+ <link rel="top" title="Airflow Documentation" href="../../../index.html"/>
+ <link rel="up" title="airflow.macros" href="../macros.html"/>
+
+
+ <script src="../../../_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+ <div class="wy-grid-for-nav">
+
+
+ <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+ <div class="wy-side-scroll">
+ <div class="wy-side-nav-search">
+
+
+
+ <a href="../../../index.html" class="icon icon-home"> Airflow
+
+
+
+ </a>
+
+
+
+
+
+
+
+<div role="search">
+ <form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
+ <input type="text" name="q" placeholder="Search docs" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+</div>
+
+
+ </div>
+
+ <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+
+
+
+ <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../project.html">Project</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../license.html">License</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../start.html">Quick Start</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../tutorial.html">Tutorial</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../configuration.html">Configuration</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../ui.html">UI / Screenshots</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../concepts.html">Concepts</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../profiling.html">Data Profiling</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../cli.html">Command Line Interface</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../scheduler.html">Scheduling & Triggers</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../plugins.html">Plugins</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../security.html">Security</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../faq.html">FAQ</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../code.html">API Reference</a></li>
+</ul>
+
+
+
+ </div>
+ </div>
+ </nav>
+
+ <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+
+ <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+ <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+ <a href="../../../index.html">Airflow</a>
+ </nav>
+
+
+
+ <div class="wy-nav-content">
+ <div class="rst-content">
+
+
+
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+ <ul class="wy-breadcrumbs">
+ <li><a href="../../../index.html">Docs</a> »</li>
+
+ <li><a href="../../index.html">Module code</a> »</li>
+
+ <li><a href="../macros.html">airflow.macros</a> »</li>
+
+ <li>airflow.macros.hive</li>
+ <li class="wy-breadcrumbs-aside">
+
+
+
+ </li>
+ </ul>
+ <hr/>
+</div>
+ <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+ <div itemprop="articleBody">
+
+ <h1>Source code for airflow.macros.hive</h1><div class="highlight"><pre>
+<span></span><span class="kn">import</span> <span class="nn">datetime</span>
+
+
+<div class="viewcode-block" id="max_partition"><a class="viewcode-back" href="../../../code.html#airflow.macros.hive.max_partition">[docs]</a><span class="k">def</span> <span class="nf">max_partition</span><span class="p">(</span>
+ <span class="n">table</span><span class="p">,</span> <span class="n">schema</span><span class="o">=</span><span class="s2">"default"</span><span class="p">,</span> <span class="n">field</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="nb">filter</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">metastore_conn_id</span><span class="o">=</span><span class="s1">'metastore_default'</span><span class="p">):</span>
+ <span class="sd">'''</span>
+<span class="sd"> Gets the max partition for a table.</span>
+
+<span class="sd"> :param schema: The hive schema the table lives in</span>
+<span class="sd"> :type schema: string</span>
+<span class="sd"> :param table: The hive table you are interested in, supports the dot</span>
+<span class="sd"> notation as in "my_database.my_table", if a dot is found,</span>
+<span class="sd"> the schema param is disregarded</span>
+<span class="sd"> :type table: string</span>
+<span class="sd"> :param hive_conn_id: The hive connection you are interested in.</span>
+<span class="sd"> If your default is set you don't need to use this parameter.</span>
+<span class="sd"> :type hive_conn_id: string</span>
+<span class="sd"> :param filter: filter on a subset of partition as in</span>
+<span class="sd"> `sub_part='specific_value'`</span>
+<span class="sd"> :type filter: string</span>
+<span class="sd"> :param field: the field to get the max value from. If there's only</span>
+<span class="sd"> one partition field, this will be inferred</span>
+
+<span class="sd"> >>> max_partition('airflow.static_babynames_partitioned')</span>
+<span class="sd"> '2015-01-01'</span>
+<span class="sd"> '''</span>
+ <span class="kn">from</span> <span class="nn">airflow.hooks</span> <span class="kn">import</span> <span class="n">HiveMetastoreHook</span>
+ <span class="k">if</span> <span class="s1">'.'</span> <span class="ow">in</span> <span class="n">table</span><span class="p">:</span>
+ <span class="n">schema</span><span class="p">,</span> <span class="n">table</span> <span class="o">=</span> <span class="n">table</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'.'</span><span class="p">)</span>
+ <span class="n">hh</span> <span class="o">=</span> <span class="n">HiveMetastoreHook</span><span class="p">(</span><span class="n">metastore_conn_id</span><span class="o">=</span><span class="n">metastore_conn_id</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">hh</span><span class="o">.</span><span class="n">max_partition</span><span class="p">(</span>
+ <span class="n">schema</span><span class="o">=</span><span class="n">schema</span><span class="p">,</span> <span class="n">table_name</span><span class="o">=</span><span class="n">table</span><span class="p">,</span> <span class="n">field</span><span class="o">=</span><span class="n">field</span><span class="p">,</span> <span class="nb">filter</span><span class="o">=</span><span class="nb">filter</span><span class="p">)</span></div>
+
+
+<span class="k">def</span> <span class="nf">_closest_date</span><span class="p">(</span><span class="n">target_dt</span><span class="p">,</span> <span class="n">date_list</span><span class="p">,</span> <span class="n">before_target</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="sd">'''</span>
+<span class="sd"> This function finds the date in a list closest to the target date.</span>
+<span class="sd"> An optional parameter can be given to get the closest before or after.</span>
+
+<span class="sd"> :param target_dt: The target date</span>
+<span class="sd"> :type target_dt: datetime.date</span>
+<span class="sd"> :param date_list: The list of dates to search</span>
+<span class="sd"> :type date_list: datetime.date list</span>
+<span class="sd"> :param before_target: closest before or after the target</span>
+<span class="sd"> :type before_target: bool or None</span>
+<span class="sd"> :returns: The closest date</span>
+<span class="sd"> :rtype: datetime.date or None</span>
+<span class="sd"> '''</span>
+ <span class="n">fb</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">d</span><span class="p">:</span> <span class="n">d</span> <span class="o">-</span> <span class="n">target_dt</span> <span class="k">if</span> <span class="n">d</span> <span class="o">>=</span> <span class="n">target_dt</span> <span class="k">else</span> <span class="n">datetime</span><span class="o">.</span><span class="n">timedelta</span><span class="o">.</span><span class="n">max</span>
+ <span class="n">fa</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">d</span><span class="p">:</span> <span class="n">d</span> <span class="o">-</span> <span class="n">target_dt</span> <span class="k">if</span> <span class="n">d</span> <span class="o"><=</span> <span class="n">target_dt</span> <span class="k">else</span> <span class="n">datetime</span><span class="o">.</span><span class="n">timedelta</span><span class="o">.</span><span class="n">min</span>
+ <span class="n">fnone</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">d</span><span class="p">:</span> <span class="n">target_dt</span> <span class="o">-</span> <span class="n">d</span> <span class="k">if</span> <span class="n">d</span> <span class="o"><</span> <span class="n">target_dt</span> <span class="k">else</span> <span class="n">d</span> <span class="o">-</span> <span class="n">target_dt</span>
+ <span class="k">if</span> <span class="n">before_target</span> <span class="ow">is</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="k">return</span> <span class="nb">min</span><span class="p">(</span><span class="n">date_list</span><span class="p">,</span> <span class="n">key</span><span class="o">=</span><span class="n">fnone</span><span class="p">)</span><span class="o">.</span><span class="n">date</span><span class="p">()</span>
+ <span class="k">if</span> <span class="n">before_target</span><span class="p">:</span>
+ <span class="k">return</span> <span class="nb">min</span><span class="p">(</span><span class="n">date_list</span><span class="p">,</span> <span class="n">key</span><span class="o">=</span><span class="n">fb</span><span class="p">)</span><span class="o">.</span><span class="n">date</span><span class="p">()</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">return</span> <span class="nb">min</span><span class="p">(</span><span class="n">date_list</span><span class="p">,</span> <span class="n">key</span><span class="o">=</span><span class="n">fa</span><span class="p">)</span><span class="o">.</span><span class="n">date</span><span class="p">()</span>
+
+
+<div class="viewcode-block" id="closest_ds_partition"><a class="viewcode-back" href="../../../code.html#airflow.macros.hive.closest_ds_partition">[docs]</a><span class="k">def</span> <span class="nf">closest_ds_partition</span><span class="p">(</span>
+ <span class="n">table</span><span class="p">,</span> <span class="n">ds</span><span class="p">,</span> <span class="n">before</span><span class="o">=</span><span class="bp">True</span><span class="p">,</span> <span class="n">schema</span><span class="o">=</span><span class="s2">"default"</span><span class="p">,</span>
+ <span class="n">metastore_conn_id</span><span class="o">=</span><span class="s1">'metastore_default'</span><span class="p">):</span>
+ <span class="sd">'''</span>
+<span class="sd"> This function finds the date in a list closest to the target date.</span>
+<span class="sd"> An optional parameter can be given to get the closest before or after.</span>
+
+<span class="sd"> :param table: A hive table name</span>
+<span class="sd"> :type table: str</span>
+<span class="sd"> :param ds: A datestamp ``%Y-%m-%d`` e.g. ``yyyy-mm-dd``</span>
+<span class="sd"> :type ds: datetime.date list</span>
+<span class="sd"> :param before: closest before (True), after (False) or either side of ds</span>
+<span class="sd"> :type before: bool or None</span>
+<span class="sd"> :returns: The closest date</span>
+<span class="sd"> :rtype: str or None</span>
+
+<span class="sd"> >>> tbl = 'airflow.static_babynames_partitioned'</span>
+<span class="sd"> >>> closest_ds_partition(tbl, '2015-01-02')</span>
+<span class="sd"> '2015-01-01'</span>
+<span class="sd"> '''</span>
+ <span class="kn">from</span> <span class="nn">airflow.hooks</span> <span class="kn">import</span> <span class="n">HiveMetastoreHook</span>
+ <span class="k">if</span> <span class="s1">'.'</span> <span class="ow">in</span> <span class="n">table</span><span class="p">:</span>
+ <span class="n">schema</span><span class="p">,</span> <span class="n">table</span> <span class="o">=</span> <span class="n">table</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'.'</span><span class="p">)</span>
+ <span class="n">hh</span> <span class="o">=</span> <span class="n">HiveMetastoreHook</span><span class="p">(</span><span class="n">metastore_conn_id</span><span class="o">=</span><span class="n">metastore_conn_id</span><span class="p">)</span>
+ <span class="n">partitions</span> <span class="o">=</span> <span class="n">hh</span><span class="o">.</span><span class="n">get_partitions</span><span class="p">(</span><span class="n">schema</span><span class="o">=</span><span class="n">schema</span><span class="p">,</span> <span class="n">table_name</span><span class="o">=</span><span class="n">table</span><span class="p">)</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">partitions</span><span class="p">:</span>
+ <span class="k">return</span> <span class="bp">None</span>
+ <span class="n">part_vals</span> <span class="o">=</span> <span class="p">[</span><span class="nb">list</span><span class="p">(</span><span class="n">p</span><span class="o">.</span><span class="n">values</span><span class="p">())[</span><span class="mi">0</span><span class="p">]</span> <span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">partitions</span><span class="p">]</span>
+ <span class="k">if</span> <span class="n">ds</span> <span class="ow">in</span> <span class="n">part_vals</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">ds</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">parts</span> <span class="o">=</span> <span class="p">[</span><span class="n">datetime</span><span class="o">.</span><span class="n">datetime</span><span class="o">.</span><span class="n">strptime</span><span class="p">(</span><span class="n">pv</span><span class="p">,</span> <span class="s1">'%Y-%m-</span><span class="si">%d</span><span class="s1">'</span><span class="p">)</span>
+ <span class="k">for</span> <span class="n">pv</span> <span class="ow">in</span> <span class="n">part_vals</span><span class="p">]</span>
+ <span class="n">target_dt</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">datetime</span><span class="o">.</span><span class="n">strptime</span><span class="p">(</span><span class="n">ds</span><span class="p">,</span> <span class="s1">'%Y-%m-</span><span class="si">%d</span><span class="s1">'</span><span class="p">)</span>
+ <span class="n">closest_ds</span> <span class="o">=</span> <span class="n">_closest_date</span><span class="p">(</span><span class="n">target_dt</span><span class="p">,</span> <span class="n">parts</span><span class="p">,</span> <span class="n">before_target</span><span class="o">=</span><span class="n">before</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">closest_ds</span><span class="o">.</span><span class="n">isoformat</span><span class="p">()</span></div>
+</pre></div>
+
+ </div>
+ </div>
+ <footer>
+
+
+ <hr/>
+
+ <div role="contentinfo">
+ <p>
+ © Copyright 2014, Maxime Beauchemin, Airbnb.
+
+ </p>
+ </div>
+ Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
+
+</footer>
+
+ </div>
+ </div>
+
+ </section>
+
+ </div>
+
+
+
+
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT:'../../../',
+ VERSION:'',
+ COLLAPSE_INDEX:false,
+ FILE_SUFFIX:'.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../../_static/doctools.js"></script>
+
+
+
+
+
+ <script type="text/javascript" src="../../../_static/js/theme.js"></script>
+
+
+
+
+ <script type="text/javascript">
+ jQuery(function () {
+ SphinxRtdTheme.StickyNav.enable();
+ });
+ </script>
+
+
+</body>
+</html>
\ No newline at end of file
diff --git a/_modules/airflow/models.html b/_modules/airflow/models.html
new file mode 100644
index 0000000..9ec98be
--- /dev/null
+++ b/_modules/airflow/models.html
@@ -0,0 +1,3802 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+ <meta charset="utf-8">
+
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+ <title>airflow.models — Airflow Documentation</title>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
+
+
+
+
+
+ <link rel="top" title="Airflow Documentation" href="../../index.html"/>
+ <link rel="up" title="Module code" href="../index.html"/>
+
+
+ <script src="../../_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+ <div class="wy-grid-for-nav">
+
+
+ <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+ <div class="wy-side-scroll">
+ <div class="wy-side-nav-search">
+
+
+
+ <a href="../../index.html" class="icon icon-home"> Airflow
+
+
+
+ </a>
+
+
+
+
+
+
+
+<div role="search">
+ <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
+ <input type="text" name="q" placeholder="Search docs" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+</div>
+
+
+ </div>
+
+ <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+
+
+
+ <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../project.html">Project</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../license.html">License</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../start.html">Quick Start</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../tutorial.html">Tutorial</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../configuration.html">Configuration</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../ui.html">UI / Screenshots</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../concepts.html">Concepts</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../profiling.html">Data Profiling</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../cli.html">Command Line Interface</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../scheduler.html">Scheduling & Triggers</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../plugins.html">Plugins</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../security.html">Security</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../faq.html">FAQ</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../code.html">API Reference</a></li>
+</ul>
+
+
+
+ </div>
+ </div>
+ </nav>
+
+ <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+
+ <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+ <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+ <a href="../../index.html">Airflow</a>
+ </nav>
+
+
+
+ <div class="wy-nav-content">
+ <div class="rst-content">
+
+
+
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+ <ul class="wy-breadcrumbs">
+ <li><a href="../../index.html">Docs</a> »</li>
+
+ <li><a href="../index.html">Module code</a> »</li>
+
+ <li>airflow.models</li>
+ <li class="wy-breadcrumbs-aside">
+
+
+
+ </li>
+ </ul>
+ <hr/>
+</div>
+ <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+ <div itemprop="articleBody">
+
+ <h1>Source code for airflow.models</h1><div class="highlight"><pre>
+<span></span><span class="c1"># -*- coding: utf-8 -*-</span>
+<span class="c1">#</span>
+<span class="c1"># Licensed under the Apache License, Version 2.0 (the "License");</span>
+<span class="c1"># you may not use this file except in compliance with the License.</span>
+<span class="c1"># You may obtain a copy of the License at</span>
+<span class="c1">#</span>
+<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
+<span class="c1">#</span>
+<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
+<span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span>
+<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
+<span class="c1"># See the License for the specific language governing permissions and</span>
+<span class="c1"># limitations under the License.</span>
+<span class="kn">from</span> <span class="nn">__future__</span> <span class="kn">import</span> <span class="n">absolute_import</span>
+<span class="kn">from</span> <span class="nn">__future__</span> <span class="kn">import</span> <span class="n">division</span>
+<span class="kn">from</span> <span class="nn">__future__</span> <span class="kn">import</span> <span class="n">print_function</span>
+<span class="kn">from</span> <span class="nn">__future__</span> <span class="kn">import</span> <span class="n">unicode_literals</span>
+
+<span class="kn">from</span> <span class="nn">future.standard_library</span> <span class="kn">import</span> <span class="n">install_aliases</span>
+
+<span class="n">install_aliases</span><span class="p">()</span>
+<span class="kn">from</span> <span class="nn">builtins</span> <span class="kn">import</span> <span class="nb">str</span>
+<span class="kn">from</span> <span class="nn">builtins</span> <span class="kn">import</span> <span class="nb">object</span><span class="p">,</span> <span class="nb">bytes</span>
+<span class="kn">import</span> <span class="nn">copy</span>
+<span class="kn">from</span> <span class="nn">collections</span> <span class="kn">import</span> <span class="n">namedtuple</span>
+<span class="kn">from</span> <span class="nn">datetime</span> <span class="kn">import</span> <span class="n">datetime</span><span class="p">,</span> <span class="n">timedelta</span>
+<span class="kn">import</span> <span class="nn">dill</span>
+<span class="kn">import</span> <span class="nn">functools</span>
+<span class="kn">import</span> <span class="nn">getpass</span>
+<span class="kn">import</span> <span class="nn">imp</span>
+<span class="kn">import</span> <span class="nn">importlib</span>
+<span class="kn">import</span> <span class="nn">zipfile</span>
+<span class="kn">import</span> <span class="nn">jinja2</span>
+<span class="kn">import</span> <span class="nn">json</span>
+<span class="kn">import</span> <span class="nn">logging</span>
+<span class="kn">import</span> <span class="nn">os</span>
+<span class="kn">import</span> <span class="nn">pickle</span>
+<span class="kn">import</span> <span class="nn">re</span>
+<span class="kn">import</span> <span class="nn">signal</span>
+<span class="kn">import</span> <span class="nn">socket</span>
+<span class="kn">import</span> <span class="nn">sys</span>
+<span class="kn">import</span> <span class="nn">textwrap</span>
+<span class="kn">import</span> <span class="nn">traceback</span>
+<span class="kn">import</span> <span class="nn">warnings</span>
+<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlparse</span>
+
+<span class="kn">from</span> <span class="nn">sqlalchemy</span> <span class="kn">import</span> <span class="p">(</span>
+ <span class="n">Column</span><span class="p">,</span> <span class="n">Integer</span><span class="p">,</span> <span class="n">String</span><span class="p">,</span> <span class="n">DateTime</span><span class="p">,</span> <span class="n">Text</span><span class="p">,</span> <span class="n">Boolean</span><span class="p">,</span> <span class="n">ForeignKey</span><span class="p">,</span> <span class="n">PickleType</span><span class="p">,</span>
+ <span class="n">Index</span><span class="p">,</span> <span class="n">Float</span><span class="p">)</span>
+<span class="kn">from</span> <span class="nn">sqlalchemy</span> <span class="kn">import</span> <span class="n">case</span><span class="p">,</span> <span class="n">func</span><span class="p">,</span> <span class="n">or_</span><span class="p">,</span> <span class="n">and_</span>
+<span class="kn">from</span> <span class="nn">sqlalchemy.ext.declarative</span> <span class="kn">import</span> <span class="n">declarative_base</span><span class="p">,</span> <span class="n">declared_attr</span>
+<span class="kn">from</span> <span class="nn">sqlalchemy.dialects.mysql</span> <span class="kn">import</span> <span class="n">LONGTEXT</span>
+<span class="kn">from</span> <span class="nn">sqlalchemy.orm</span> <span class="kn">import</span> <span class="n">relationship</span><span class="p">,</span> <span class="n">synonym</span>
+
+<span class="kn">from</span> <span class="nn">croniter</span> <span class="kn">import</span> <span class="n">croniter</span>
+<span class="kn">import</span> <span class="nn">six</span>
+
+<span class="kn">from</span> <span class="nn">airflow</span> <span class="kn">import</span> <span class="n">settings</span><span class="p">,</span> <span class="n">utils</span>
+<span class="kn">from</span> <span class="nn">airflow.executors</span> <span class="kn">import</span> <span class="n">DEFAULT_EXECUTOR</span><span class="p">,</span> <span class="n">LocalExecutor</span>
+<span class="kn">from</span> <span class="nn">airflow</span> <span class="kn">import</span> <span class="n">configuration</span>
+<span class="kn">from</span> <span class="nn">airflow.exceptions</span> <span class="kn">import</span> <span class="n">AirflowException</span><span class="p">,</span> <span class="n">AirflowSkipException</span>
+<span class="kn">from</span> <span class="nn">airflow.utils.dates</span> <span class="kn">import</span> <span class="n">cron_presets</span><span class="p">,</span> <span class="n">date_range</span> <span class="k">as</span> <span class="n">utils_date_range</span>
+<span class="kn">from</span> <span class="nn">airflow.utils.db</span> <span class="kn">import</span> <span class="n">provide_session</span>
+<span class="kn">from</span> <span class="nn">airflow.utils.decorators</span> <span class="kn">import</span> <span class="n">apply_defaults</span>
+<span class="kn">from</span> <span class="nn">airflow.utils.email</span> <span class="kn">import</span> <span class="n">send_email</span>
+<span class="kn">from</span> <span class="nn">airflow.utils.helpers</span> <span class="kn">import</span> <span class="p">(</span>
+ <span class="n">as_tuple</span><span class="p">,</span> <span class="n">is_container</span><span class="p">,</span> <span class="n">is_in</span><span class="p">,</span> <span class="n">validate_key</span><span class="p">,</span> <span class="n">pprinttable</span><span class="p">)</span>
+<span class="kn">from</span> <span class="nn">airflow.utils.logging</span> <span class="kn">import</span> <span class="n">LoggingMixin</span>
+<span class="kn">from</span> <span class="nn">airflow.utils.state</span> <span class="kn">import</span> <span class="n">State</span>
+<span class="kn">from</span> <span class="nn">airflow.utils.timeout</span> <span class="kn">import</span> <span class="n">timeout</span>
+<span class="kn">from</span> <span class="nn">airflow.utils.trigger_rule</span> <span class="kn">import</span> <span class="n">TriggerRule</span>
+
+<span class="n">Base</span> <span class="o">=</span> <span class="n">declarative_base</span><span class="p">()</span>
+<span class="n">ID_LEN</span> <span class="o">=</span> <span class="mi">250</span>
+<span class="n">SQL_ALCHEMY_CONN</span> <span class="o">=</span> <span class="n">configuration</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'core'</span><span class="p">,</span> <span class="s1">'SQL_ALCHEMY_CONN'</span><span class="p">)</span>
+<span class="n">DAGS_FOLDER</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">expanduser</span><span class="p">(</span><span class="n">configuration</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'core'</span><span class="p">,</span> <span class="s1">'DAGS_FOLDER'</span><span class="p">))</span>
+<span class="n">XCOM_RETURN_KEY</span> <span class="o">=</span> <span class="s1">'return_value'</span>
+
+<span class="n">Stats</span> <span class="o">=</span> <span class="n">settings</span><span class="o">.</span><span class="n">Stats</span>
+
+<span class="n">ENCRYPTION_ON</span> <span class="o">=</span> <span class="bp">False</span>
+<span class="k">try</span><span class="p">:</span>
+ <span class="kn">from</span> <span class="nn">cryptography.fernet</span> <span class="kn">import</span> <span class="n">Fernet</span>
+ <span class="n">FERNET</span> <span class="o">=</span> <span class="n">Fernet</span><span class="p">(</span><span class="n">configuration</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'core'</span><span class="p">,</span> <span class="s1">'FERNET_KEY'</span><span class="p">)</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="s1">'utf-8'</span><span class="p">))</span>
+ <span class="n">ENCRYPTION_ON</span> <span class="o">=</span> <span class="bp">True</span>
+<span class="k">except</span><span class="p">:</span>
+ <span class="k">pass</span>
+
+<span class="k">if</span> <span class="s1">'mysql'</span> <span class="ow">in</span> <span class="n">SQL_ALCHEMY_CONN</span><span class="p">:</span>
+ <span class="n">LongText</span> <span class="o">=</span> <span class="n">LONGTEXT</span>
+<span class="k">else</span><span class="p">:</span>
+ <span class="n">LongText</span> <span class="o">=</span> <span class="n">Text</span>
+
+<span class="c1"># used by DAG context_managers</span>
+<span class="n">_CONTEXT_MANAGER_DAG</span> <span class="o">=</span> <span class="bp">None</span>
+
+
+<span class="k">def</span> <span class="nf">clear_task_instances</span><span class="p">(</span><span class="n">tis</span><span class="p">,</span> <span class="n">session</span><span class="p">,</span> <span class="n">activate_dag_runs</span><span class="o">=</span><span class="bp">True</span><span class="p">):</span>
+ <span class="sd">'''</span>
+<span class="sd"> Clears a set of task instances, but makes sure the running ones</span>
+<span class="sd"> get killed.</span>
+<span class="sd"> '''</span>
+ <span class="n">job_ids</span> <span class="o">=</span> <span class="p">[]</span>
+ <span class="k">for</span> <span class="n">ti</span> <span class="ow">in</span> <span class="n">tis</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">ti</span><span class="o">.</span><span class="n">state</span> <span class="o">==</span> <span class="n">State</span><span class="o">.</span><span class="n">RUNNING</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">ti</span><span class="o">.</span><span class="n">job_id</span><span class="p">:</span>
+ <span class="n">ti</span><span class="o">.</span><span class="n">state</span> <span class="o">=</span> <span class="n">State</span><span class="o">.</span><span class="n">SHUTDOWN</span>
+ <span class="n">job_ids</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">ti</span><span class="o">.</span><span class="n">job_id</span><span class="p">)</span>
+ <span class="c1"># todo: this creates an issue with the webui tests</span>
+ <span class="c1">#elif ti.state != State.REMOVED:</span>
+ <span class="c1"># ti.state = State.NONE</span>
+ <span class="c1"># session.merge(ti)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">delete</span><span class="p">(</span><span class="n">ti</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">job_ids</span><span class="p">:</span>
+ <span class="kn">from</span> <span class="nn">airflow.jobs</span> <span class="kn">import</span> <span class="n">BaseJob</span> <span class="k">as</span> <span class="n">BJ</span>
+ <span class="k">for</span> <span class="n">job</span> <span class="ow">in</span> <span class="n">session</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">BJ</span><span class="p">)</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">BJ</span><span class="o">.</span><span class="n">id</span><span class="o">.</span><span class="n">in_</span><span class="p">(</span><span class="n">job_ids [...]
+ <span class="n">job</span><span class="o">.</span><span class="n">state</span> <span class="o">=</span> <span class="n">State</span><span class="o">.</span><span class="n">SHUTDOWN</span>
+ <span class="k">if</span> <span class="n">activate_dag_runs</span><span class="p">:</span>
+ <span class="n">execution_dates</span> <span class="o">=</span> <span class="p">{</span><span class="n">ti</span><span class="o">.</span><span class="n">execution_date</span> <span class="k">for</span> <span class="n">ti</span> <span class="ow">in</span> <span class="n">tis</span><span class="p">}</span>
+ <span class="n">dag_ids</span> <span class="o">=</span> <span class="p">{</span><span class="n">ti</span><span class="o">.</span><span class="n">dag_id</span> <span class="k">for</span> <span class="n">ti</span> <span class="ow">in</span> <span class="n">tis</span><span class="p">}</span>
+ <span class="n">drs</span> <span class="o">=</span> <span class="n">session</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">DagRun</span><span class="p">)</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span>
+ <span class="n">DagRun</span><span class="o">.</span><span class="n">dag_id</span><span class="o">.</span><span class="n">in_</span><span class="p">(</span><span class="n">dag_ids</span><span class="p">),</span>
+ <span class="n">DagRun</span><span class="o">.</span><span class="n">execution_date</span><span class="o">.</span><span class="n">in_</span><span class="p">(</span><span class="n">execution_dates</span><span class="p">),</span>
+ <span class="p">)</span><span class="o">.</span><span class="n">all</span><span class="p">()</span>
+ <span class="k">for</span> <span class="n">dr</span> <span class="ow">in</span> <span class="n">drs</span><span class="p">:</span>
+ <span class="n">dr</span><span class="o">.</span><span class="n">state</span> <span class="o">=</span> <span class="n">State</span><span class="o">.</span><span class="n">RUNNING</span>
+ <span class="n">dr</span><span class="o">.</span><span class="n">start_date</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span>
+
+
+<div class="viewcode-block" id="DagBag"><a class="viewcode-back" href="../../code.html#airflow.models.DagBag">[docs]</a><span class="k">class</span> <span class="nc">DagBag</span><span class="p">(</span><span class="n">LoggingMixin</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> A dagbag is a collection of dags, parsed out of a folder tree and has high</span>
+<span class="sd"> level configuration settings, like what database to use as a backend and</span>
+<span class="sd"> what executor to use to fire off tasks. This makes it easier to run</span>
+<span class="sd"> distinct environments for say production and development, tests, or for</span>
+<span class="sd"> different teams or security profiles. What would have been system level</span>
+<span class="sd"> settings are now dagbag level so that one system can run multiple,</span>
+<span class="sd"> independent settings sets.</span>
+
+<span class="sd"> :param dag_folder: the folder to scan to find DAGs</span>
+<span class="sd"> :type dag_folder: str</span>
+<span class="sd"> :param executor: the executor to use when executing task instances</span>
+<span class="sd"> in this DagBag</span>
+<span class="sd"> :param include_examples: whether to include the examples that ship</span>
+<span class="sd"> with airflow or not</span>
+<span class="sd"> :type include_examples: bool</span>
+<span class="sd"> :param sync_to_db: whether to sync the properties of the DAGs to</span>
+<span class="sd"> the metadata DB while finding them, typically should be done</span>
+<span class="sd"> by the scheduler job only</span>
+<span class="sd"> :type sync_to_db: bool</span>
+<span class="sd"> """</span>
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span>
+ <span class="n">dag_folder</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">executor</span><span class="o">=</span><span class="n">DEFAULT_EXECUTOR</span><span class="p">,</span>
+ <span class="n">include_examples</span><span class="o">=</span><span class="n">configuration</span><span class="o">.</span><span class="n">getboolean</span><span class="p">(</span><span class="s1">'core'</span><span class="p">,</span> <span class="s1">'LOAD_EXAMPLES'</span><span class="p">),</span>
+ <span class="n">sync_to_db</span><span class="o">=</span><span class="bp">False</span><span class="p">):</span>
+
+ <span class="n">dag_folder</span> <span class="o">=</span> <span class="n">dag_folder</span> <span class="ow">or</span> <span class="n">DAGS_FOLDER</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">"Filling up the DagBag from {}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">dag_folder</span><span class="p">))</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">dag_folder</span> <span class="o">=</span> <span class="n">dag_folder</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">dags</span> <span class="o">=</span> <span class="p">{}</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">sync_to_db</span> <span class="o">=</span> <span class="n">sync_to_db</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">file_last_changed</span> <span class="o">=</span> <span class="p">{}</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">executor</span> <span class="o">=</span> <span class="n">executor</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">import_errors</span> <span class="o">=</span> <span class="p">{}</span>
+ <span class="k">if</span> <span class="n">include_examples</span><span class="p">:</span>
+ <span class="n">example_dag_folder</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span>
+ <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">dirname</span><span class="p">(</span><span class="n">__file__</span><span class="p">),</span>
+ <span class="s1">'example_dags'</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">collect_dags</span><span class="p">(</span><span class="n">example_dag_folder</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">collect_dags</span><span class="p">(</span><span class="n">dag_folder</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">sync_to_db</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">deactivate_inactive_dags</span><span class="p">()</span>
+
+<div class="viewcode-block" id="DagBag.size"><a class="viewcode-back" href="../../code.html#airflow.models.DagBag.size">[docs]</a> <span class="k">def</span> <span class="nf">size</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> :return: the amount of dags contained in this dagbag</span>
+<span class="sd"> """</span>
+ <span class="k">return</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dags</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="DagBag.get_dag"><a class="viewcode-back" href="../../code.html#airflow.models.DagBag.get_dag">[docs]</a> <span class="k">def</span> <span class="nf">get_dag</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dag_id</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Gets the DAG out of the dictionary, and refreshes it if expired</span>
+<span class="sd"> """</span>
+ <span class="c1"># If asking for a known subdag, we want to refresh the parent</span>
+ <span class="n">root_dag_id</span> <span class="o">=</span> <span class="n">dag_id</span>
+ <span class="k">if</span> <span class="n">dag_id</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">dags</span><span class="p">:</span>
+ <span class="n">dag</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">dags</span><span class="p">[</span><span class="n">dag_id</span><span class="p">]</span>
+ <span class="k">if</span> <span class="n">dag</span><span class="o">.</span><span class="n">is_subdag</span><span class="p">:</span>
+ <span class="n">root_dag_id</span> <span class="o">=</span> <span class="n">dag</span><span class="o">.</span><span class="n">parent_dag</span><span class="o">.</span><span class="n">dag_id</span>
+
+ <span class="c1"># If the root_dag_id is absent or expired</span>
+ <span class="n">orm_dag</span> <span class="o">=</span> <span class="n">DagModel</span><span class="o">.</span><span class="n">get_current</span><span class="p">(</span><span class="n">root_dag_id</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">orm_dag</span> <span class="ow">and</span> <span class="p">(</span>
+ <span class="n">root_dag_id</span> <span class="ow">not</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">dags</span> <span class="ow">or</span>
+ <span class="p">(</span>
+ <span class="n">orm_dag</span><span class="o">.</span><span class="n">last_expired</span> <span class="ow">and</span>
+ <span class="n">dag</span><span class="o">.</span><span class="n">last_loaded</span> <span class="o"><</span> <span class="n">orm_dag</span><span class="o">.</span><span class="n">last_expired</span>
+ <span class="p">)</span>
+ <span class="p">):</span>
+ <span class="c1"># Reprocessing source file</span>
+ <span class="n">found_dags</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">process_file</span><span class="p">(</span>
+ <span class="n">filepath</span><span class="o">=</span><span class="n">orm_dag</span><span class="o">.</span><span class="n">fileloc</span><span class="p">,</span> <span class="n">only_if_updated</span><span class="o">=</span><span class="bp">False</span><span class="p">)</span>
+
+ <span class="k">if</span> <span class="n">found_dags</span> <span class="ow">and</span> <span class="n">dag_id</span> <span class="ow">in</span> <span class="p">[</span><span class="n">dag</span><span class="o">.</span><span class="n">dag_id</span> <span class="k">for</span> <span class="n">dag</span> <span class="ow">in</span> <span class="n">found_dags</span><span class="p">]:</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">dags</span><span class="p">[</span><span class="n">dag_id</span><span class="p">]</span>
+ <span class="k">elif</span> <span class="n">dag_id</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">dags</span><span class="p">:</span>
+ <span class="k">del</span> <span class="bp">self</span><span class="o">.</span><span class="n">dags</span><span class="p">[</span><span class="n">dag_id</span><span class="p">]</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">dags</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">dag_id</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="DagBag.process_file"><a class="viewcode-back" href="../../code.html#airflow.models.DagBag.process_file">[docs]</a> <span class="k">def</span> <span class="nf">process_file</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">filepath</span><span class="p">,</span> <span class="n">only_if_updated</span><span class="o">=</span><span class="bp">True</span><span class="p">,</span> <span class="n">safe_mode</span [...]
+ <span class="sd">"""</span>
+<span class="sd"> Given a path to a python module or zip file, this method imports</span>
+<span class="sd"> the module and look for dag objects within it.</span>
+<span class="sd"> """</span>
+ <span class="n">found_dags</span> <span class="o">=</span> <span class="p">[]</span>
+
+ <span class="c1"># todo: raise exception?</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">isfile</span><span class="p">(</span><span class="n">filepath</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">found_dags</span>
+
+ <span class="k">try</span><span class="p">:</span>
+ <span class="c1"># This failed before in what may have been a git sync</span>
+ <span class="c1"># race condition</span>
+ <span class="n">dttm</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">fromtimestamp</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">getmtime</span><span class="p">(</span><span class="n">filepath</span><span class="p">))</span>
+ <span class="k">if</span> <span class="n">only_if_updated</span> \
+ <span class="ow">and</span> <span class="n">filepath</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">file_last_changed</span> \
+ <span class="ow">and</span> <span class="n">dttm</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">file_last_changed</span><span class="p">[</span><span class="n">filepath</span><span class="p">]:</span>
+ <span class="k">return</span> <span class="n">found_dags</span>
+
+ <span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">exception</span><span class="p">(</span><span class="n">e</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">found_dags</span>
+
+ <span class="n">mods</span> <span class="o">=</span> <span class="p">[]</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">zipfile</span><span class="o">.</span><span class="n">is_zipfile</span><span class="p">(</span><span class="n">filepath</span><span class="p">):</span>
+ <span class="k">if</span> <span class="n">safe_mode</span> <span class="ow">and</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">isfile</span><span class="p">(</span><span class="n">filepath</span><span class="p">):</span>
+ <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">filepath</span><span class="p">,</span> <span class="s1">'rb'</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
+ <span class="n">content</span> <span class="o">=</span> <span class="n">f</span><span class="o">.</span><span class="n">read</span><span class="p">()</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="nb">all</span><span class="p">([</span><span class="n">s</span> <span class="ow">in</span> <span class="n">content</span> <span class="k">for</span> <span class="n">s</span> <span class="ow">in</span> <span class="p">(</span><span class="n">b</span><span class="s1">'DAG'</span><span class="p">,</span> <span class="n">b</span><span class="s1">'airflow'</span><span class="p">)]):</span>
+ <span class="k">return</span> <span class="n">found_dags</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"Importing {}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">filepath</span><span class="p">))</span>
+ <span class="n">org_mod_name</span><span class="p">,</span> <span class="n">file_ext</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">splitext</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">filepath</span><span class="p">)[</span> [...]
+ <span class="n">mod_name</span> <span class="o">=</span> <span class="s1">'unusual_prefix_'</span> <span class="o">+</span> <span class="n">org_mod_name</span>
+
+ <span class="k">if</span> <span class="n">mod_name</span> <span class="ow">in</span> <span class="n">sys</span><span class="o">.</span><span class="n">modules</span><span class="p">:</span>
+ <span class="k">del</span> <span class="n">sys</span><span class="o">.</span><span class="n">modules</span><span class="p">[</span><span class="n">mod_name</span><span class="p">]</span>
+
+ <span class="k">with</span> <span class="n">timeout</span><span class="p">(</span><span class="n">configuration</span><span class="o">.</span><span class="n">getint</span><span class="p">(</span><span class="s1">'core'</span><span class="p">,</span> <span class="s2">"DAGBAG_IMPORT_TIMEOUT"</span><span class="p">)):</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="n">m</span> <span class="o">=</span> <span class="n">imp</span><span class="o">.</span><span class="n">load_source</span><span class="p">(</span><span class="n">mod_name</span><span class="p">,</span> <span class="n">filepath</span><span class="p">)</span>
+ <span class="n">mods</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">m</span><span class="p">)</span>
+ <span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">exception</span><span class="p">(</span><span class="s2">"Failed to import: "</span> <span class="o">+</span> <span class="n">filepath</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">import_errors</span><span class="p">[</span><span class="n">filepath</span><span class="p">]</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">file_last_changed</span><span class="p">[</span><span class="n">filepath</span><span class="p">]</span> <span class="o">=</span> <span class="n">dttm</span>
+
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">zip_file</span> <span class="o">=</span> <span class="n">zipfile</span><span class="o">.</span><span class="n">ZipFile</span><span class="p">(</span><span class="n">filepath</span><span class="p">)</span>
+ <span class="k">for</span> <span class="n">mod</span> <span class="ow">in</span> <span class="n">zip_file</span><span class="o">.</span><span class="n">infolist</span><span class="p">():</span>
+ <span class="n">head</span><span class="p">,</span> <span class="n">tail</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">mod</span><span class="o">.</span><span class="n">filename</span><span class="p">)</span>
+ <span class="n">mod_name</span><span class="p">,</span> <span class="n">ext</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">splitext</span><span class="p">(</span><span class="n">mod</span><span class="o">.</span><span class="n">filename</span><span class="p">)</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">head</span> <span class="ow">and</span> <span class="p">(</span><span class="n">ext</span> <span class="o">==</span> <span class="s1">'.py'</span> <span class="ow">or</span> <span class="n">ext</span> <span class="o">==</span> <span class="s1">'.pyc'</span><span class="p">):</span>
+ <span class="k">if</span> <span class="n">mod_name</span> <span class="o">==</span> <span class="s1">'__init__'</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s2">"Found __init__.{0} at root of {1}"</span><span class="o">.</span>
+ <span class="n">format</span><span class="p">(</span><span class="n">ext</span><span class="p">,</span> <span class="n">filepath</span><span class="p">))</span>
+
+ <span class="k">if</span> <span class="n">safe_mode</span><span class="p">:</span>
+ <span class="k">with</span> <span class="n">zip_file</span><span class="o">.</span><span class="n">open</span><span class="p">(</span><span class="n">mod</span><span class="o">.</span><span class="n">filename</span><span class="p">)</span> <span class="k">as</span> <span class="n">zf</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"Reading {} from {}"</span><span class="o">.</span>
+ <span class="n">format</span><span class="p">(</span><span class="n">mod</span><span class="o">.</span><span class="n">filename</span><span class="p">,</span> <span class="n">filepath</span><span class="p">))</span>
+ <span class="n">content</span> <span class="o">=</span> <span class="n">zf</span><span class="o">.</span><span class="n">read</span><span class="p">()</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="nb">all</span><span class="p">([</span><span class="n">s</span> <span class="ow">in</span> <span class="n">content</span> <span class="k">for</span> <span class="n">s</span> <span class="ow">in</span> <span class="p">(</span><span class="n">b</span><span class="s1">'DAG'</span><span class="p">,</span> <span class="n">b</span><span class="s1">'airflow'</span><span class="p">)]):</span>
+ <span class="c1"># todo: create ignore list</span>
+ <span class="k">return</span> <span class="n">found_dags</span>
+
+ <span class="k">if</span> <span class="n">mod_name</span> <span class="ow">in</span> <span class="n">sys</span><span class="o">.</span><span class="n">modules</span><span class="p">:</span>
+ <span class="k">del</span> <span class="n">sys</span><span class="o">.</span><span class="n">modules</span><span class="p">[</span><span class="n">mod_name</span><span class="p">]</span>
+
+ <span class="k">try</span><span class="p">:</span>
+ <span class="n">sys</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">insert</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">filepath</span><span class="p">)</span>
+ <span class="n">m</span> <span class="o">=</span> <span class="n">importlib</span><span class="o">.</span><span class="n">import_module</span><span class="p">(</span><span class="n">mod_name</span><span class="p">)</span>
+ <span class="n">mods</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">m</span><span class="p">)</span>
+ <span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">exception</span><span class="p">(</span><span class="s2">"Failed to import: "</span> <span class="o">+</span> <span class="n">filepath</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">import_errors</span><span class="p">[</span><span class="n">filepath</span><span class="p">]</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">file_last_changed</span><span class="p">[</span><span class="n">filepath</span><span class="p">]</span> <span class="o">=</span> <span class="n">dttm</span>
+
+ <span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="n">mods</span><span class="p">:</span>
+ <span class="k">for</span> <span class="n">dag</span> <span class="ow">in</span> <span class="nb">list</span><span class="p">(</span><span class="n">m</span><span class="o">.</span><span class="n">__dict__</span><span class="o">.</span><span class="n">values</span><span class="p">()):</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">dag</span><span class="p">,</span> <span class="n">DAG</span><span class="p">):</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">dag</span><span class="o">.</span><span class="n">full_filepath</span><span class="p">:</span>
+ <span class="n">dag</span><span class="o">.</span><span class="n">full_filepath</span> <span class="o">=</span> <span class="n">filepath</span>
+ <span class="n">dag</span><span class="o">.</span><span class="n">is_subdag</span> <span class="o">=</span> <span class="bp">False</span>
+ <span class="n">dag</span><span class="o">.</span><span class="n">module_name</span> <span class="o">=</span> <span class="n">m</span><span class="o">.</span><span class="n">__name__</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">bag_dag</span><span class="p">(</span><span class="n">dag</span><span class="p">,</span> <span class="n">parent_dag</span><span class="o">=</span><span class="n">dag</span><span class="p">,</span> <span class="n">root_dag</span><span class="o">=</span><span class="n">dag</span><span class="p">)</span>
+ <span class="n">found_dags</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">dag</span><span class="p">)</span>
+ <span class="n">found_dags</span> <span class="o">+=</span> <span class="n">dag</span><span class="o">.</span><span class="n">subdags</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">file_last_changed</span><span class="p">[</span><span class="n">filepath</span><span class="p">]</span> <span class="o">=</span> <span class="n">dttm</span>
+ <span class="k">return</span> <span class="n">found_dags</span></div>
+
+ <span class="nd">@provide_session</span>
+<div class="viewcode-block" id="DagBag.kill_zombies"><a class="viewcode-back" href="../../code.html#airflow.models.DagBag.kill_zombies">[docs]</a> <span class="k">def</span> <span class="nf">kill_zombies</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">session</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Fails tasks that haven't had a heartbeat in too long</span>
+<span class="sd"> """</span>
+ <span class="kn">from</span> <span class="nn">airflow.jobs</span> <span class="kn">import</span> <span class="n">LocalTaskJob</span> <span class="k">as</span> <span class="n">LJ</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">"Finding 'running' jobs without a recent heartbeat"</span><span class="p">)</span>
+ <span class="n">TI</span> <span class="o">=</span> <span class="n">TaskInstance</span>
+ <span class="n">secs</span> <span class="o">=</span> <span class="p">(</span>
+ <span class="n">configuration</span><span class="o">.</span><span class="n">getint</span><span class="p">(</span><span class="s1">'scheduler'</span><span class="p">,</span> <span class="s1">'job_heartbeat_sec'</span><span class="p">)</span> <span class="o">*</span> <span class="mi">3</span><span class="p">)</span> <span class="o">+</span> <span class="mi">120</span>
+ <span class="n">limit_dttm</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span> <span class="o">-</span> <span class="n">timedelta</span><span class="p">(</span><span class="n">seconds</span><span class="o">=</span><span class="n">secs</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span>
+ <span class="s2">"Failing jobs without heartbeat after {}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">limit_dttm</span><span class="p">))</span>
+
+ <span class="n">tis</span> <span class="o">=</span> <span class="p">(</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">TI</span><span class="p">)</span>
+ <span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">LJ</span><span class="p">,</span> <span class="n">TI</span><span class="o">.</span><span class="n">job_id</span> <span class="o">==</span> <span class="n">LJ</span><span class="o">.</span><span class="n">id</span><span class="p">)</span>
+ <span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">TI</span><span class="o">.</span><span class="n">state</span> <span class="o">==</span> <span class="n">State</span><span class="o">.</span><span class="n">RUNNING</span><span class="p">)</span>
+ <span class="o">.</span><span class="n">filter</span><span class="p">(</span>
+ <span class="n">or_</span><span class="p">(</span>
+ <span class="n">LJ</span><span class="o">.</span><span class="n">state</span> <span class="o">!=</span> <span class="n">State</span><span class="o">.</span><span class="n">RUNNING</span><span class="p">,</span>
+ <span class="n">LJ</span><span class="o">.</span><span class="n">latest_heartbeat</span> <span class="o"><</span> <span class="n">limit_dttm</span><span class="p">,</span>
+ <span class="p">))</span>
+ <span class="o">.</span><span class="n">all</span><span class="p">()</span>
+ <span class="p">)</span>
+
+ <span class="k">for</span> <span class="n">ti</span> <span class="ow">in</span> <span class="n">tis</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">ti</span> <span class="ow">and</span> <span class="n">ti</span><span class="o">.</span><span class="n">dag_id</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">dags</span><span class="p">:</span>
+ <span class="n">dag</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">dags</span><span class="p">[</span><span class="n">ti</span><span class="o">.</span><span class="n">dag_id</span><span class="p">]</span>
+ <span class="k">if</span> <span class="n">ti</span><span class="o">.</span><span class="n">task_id</span> <span class="ow">in</span> <span class="n">dag</span><span class="o">.</span><span class="n">task_ids</span><span class="p">:</span>
+ <span class="n">task</span> <span class="o">=</span> <span class="n">dag</span><span class="o">.</span><span class="n">get_task</span><span class="p">(</span><span class="n">ti</span><span class="o">.</span><span class="n">task_id</span><span class="p">)</span>
+ <span class="n">ti</span><span class="o">.</span><span class="n">task</span> <span class="o">=</span> <span class="n">task</span>
+ <span class="n">ti</span><span class="o">.</span><span class="n">handle_failure</span><span class="p">(</span><span class="s2">"{} killed as zombie"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">ti</span><span class="p">))</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span>
+ <span class="s1">'Marked zombie job {} as failed'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">ti</span><span class="p">))</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span></div>
+
+<div class="viewcode-block" id="DagBag.bag_dag"><a class="viewcode-back" href="../../code.html#airflow.models.DagBag.bag_dag">[docs]</a> <span class="k">def</span> <span class="nf">bag_dag</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dag</span><span class="p">,</span> <span class="n">parent_dag</span><span class="p">,</span> <span class="n">root_dag</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Adds the DAG into the bag, recurses into sub dags.</span>
+<span class="sd"> """</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">dags</span><span class="p">[</span><span class="n">dag</span><span class="o">.</span><span class="n">dag_id</span><span class="p">]</span> <span class="o">=</span> <span class="n">dag</span>
+ <span class="n">dag</span><span class="o">.</span><span class="n">resolve_template_files</span><span class="p">()</span>
+ <span class="n">dag</span><span class="o">.</span><span class="n">last_loaded</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span>
+
+ <span class="k">for</span> <span class="n">task</span> <span class="ow">in</span> <span class="n">dag</span><span class="o">.</span><span class="n">tasks</span><span class="p">:</span>
+ <span class="n">settings</span><span class="o">.</span><span class="n">policy</span><span class="p">(</span><span class="n">task</span><span class="p">)</span>
+
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">sync_to_db</span><span class="p">:</span>
+ <span class="n">session</span> <span class="o">=</span> <span class="n">settings</span><span class="o">.</span><span class="n">Session</span><span class="p">()</span>
+ <span class="n">orm_dag</span> <span class="o">=</span> <span class="n">session</span><span class="o">.</span><span class="n">query</span><span class="p">(</span>
+ <span class="n">DagModel</span><span class="p">)</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">DagModel</span><span class="o">.</span><span class="n">dag_id</span> <span class="o">==</span> <span class="n">dag</span><span class="o">.</span><span class="n">dag_id</span><span class="p">)</span><span class="o">.</span><span class="n">first</span><span class="p">()</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">orm_dag</span><span class="p">:</span>
+ <span class="n">orm_dag</span> <span class="o">=</span> <span class="n">DagModel</span><span class="p">(</span><span class="n">dag_id</span><span class="o">=</span><span class="n">dag</span><span class="o">.</span><span class="n">dag_id</span><span class="p">)</span>
+ <span class="n">orm_dag</span><span class="o">.</span><span class="n">fileloc</span> <span class="o">=</span> <span class="n">root_dag</span><span class="o">.</span><span class="n">full_filepath</span>
+ <span class="n">orm_dag</span><span class="o">.</span><span class="n">is_subdag</span> <span class="o">=</span> <span class="n">dag</span><span class="o">.</span><span class="n">is_subdag</span>
+ <span class="n">orm_dag</span><span class="o">.</span><span class="n">owners</span> <span class="o">=</span> <span class="n">root_dag</span><span class="o">.</span><span class="n">owner</span>
+ <span class="n">orm_dag</span><span class="o">.</span><span class="n">is_active</span> <span class="o">=</span> <span class="bp">True</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">merge</span><span class="p">(</span><span class="n">orm_dag</span><span class="p">)</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
+
+ <span class="k">for</span> <span class="n">subdag</span> <span class="ow">in</span> <span class="n">dag</span><span class="o">.</span><span class="n">subdags</span><span class="p">:</span>
+ <span class="n">subdag</span><span class="o">.</span><span class="n">full_filepath</span> <span class="o">=</span> <span class="n">dag</span><span class="o">.</span><span class="n">full_filepath</span>
+ <span class="n">subdag</span><span class="o">.</span><span class="n">parent_dag</span> <span class="o">=</span> <span class="n">dag</span>
+ <span class="n">subdag</span><span class="o">.</span><span class="n">fileloc</span> <span class="o">=</span> <span class="n">root_dag</span><span class="o">.</span><span class="n">full_filepath</span>
+ <span class="n">subdag</span><span class="o">.</span><span class="n">is_subdag</span> <span class="o">=</span> <span class="bp">True</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">bag_dag</span><span class="p">(</span><span class="n">subdag</span><span class="p">,</span> <span class="n">parent_dag</span><span class="o">=</span><span class="n">dag</span><span class="p">,</span> <span class="n">root_dag</span><span class="o">=</span><span class="n">root_dag</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s1">'Loaded DAG {dag}'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="nb">locals</span><span class="p">()))</span></div>
+
+<div class="viewcode-block" id="DagBag.collect_dags"><a class="viewcode-back" href="../../code.html#airflow.models.DagBag.collect_dags">[docs]</a> <span class="k">def</span> <span class="nf">collect_dags</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span>
+ <span class="n">dag_folder</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">only_if_updated</span><span class="o">=</span><span class="bp">True</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Given a file path or a folder, this method looks for python modules,</span>
+<span class="sd"> imports them and adds them to the dagbag collection.</span>
+
+<span class="sd"> Note that if a .airflowignore file is found while processing,</span>
+<span class="sd"> the directory, it will behaves much like a .gitignore does,</span>
+<span class="sd"> ignoring files that match any of the regex patterns specified</span>
+<span class="sd"> in the file.</span>
+<span class="sd"> """</span>
+ <span class="n">start_dttm</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span>
+ <span class="n">dag_folder</span> <span class="o">=</span> <span class="n">dag_folder</span> <span class="ow">or</span> <span class="bp">self</span><span class="o">.</span><span class="n">dag_folder</span>
+
+ <span class="c1"># Used to store stats around DagBag processing</span>
+ <span class="n">stats</span> <span class="o">=</span> <span class="p">[]</span>
+ <span class="n">FileLoadStat</span> <span class="o">=</span> <span class="n">namedtuple</span><span class="p">(</span>
+ <span class="s1">'FileLoadStat'</span><span class="p">,</span> <span class="s2">"file duration dag_num task_num dags"</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">isfile</span><span class="p">(</span><span class="n">dag_folder</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">process_file</span><span class="p">(</span><span class="n">dag_folder</span><span class="p">,</span> <span class="n">only_if_updated</span><span class="o">=</span><span class="n">only_if_updated</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">isdir</span><span class="p">(</span><span class="n">dag_folder</span><span class="p">):</span>
+ <span class="n">patterns</span> <span class="o">=</span> <span class="p">[]</span>
+ <span class="k">for</span> <span class="n">root</span><span class="p">,</span> <span class="n">dirs</span><span class="p">,</span> <span class="n">files</span> <span class="ow">in</span> <span class="n">os</span><span class="o">.</span><span class="n">walk</span><span class="p">(</span><span class="n">dag_folder</span><span class="p">,</span> <span class="n">followlinks</span><span class="o">=</span><span class="bp">True</span><span class="p">):</span>
+ <span class="n">ignore_file</span> <span class="o">=</span> <span class="p">[</span><span class="n">f</span> <span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">files</span> <span class="k">if</span> <span class="n">f</span> <span class="o">==</span> <span class="s1">'.airflowignore'</span><span class="p">]</span>
+ <span class="k">if</span> <span class="n">ignore_file</span><span class="p">:</span>
+ <span class="n">f</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">root</span><span class="p">,</span> <span class="n">ignore_file</span><span class="p">[</span><span class="mi">0</span><span class="p">]),</span> <span class="s1">'r'</span><span class="p">)</span>
+ <span class="n">patterns</span> <span class="o">+=</span> <span class="p">[</span><span class="n">p</span> <span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">f</span><span class="o">.</span><span class="n">read</span><span class="p">()</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'</span><span class="se">\n</span><span class="s1">'</span><span class="p">)</span> < [...]
+ <span class="n">f</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
+ <span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">files</span><span class="p">:</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="n">filepath</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">root</span><span class="p">,</span> <span class="n">f</span><span class="p">)</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">isfile</span><span class="p">(</span><span class="n">filepath</span><span class="p">):</span>
+ <span class="k">continue</span>
+ <span class="n">mod_name</span><span class="p">,</span> <span class="n">file_ext</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">splitext</span><span class="p">(</span>
+ <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">filepath</span><span class="p">)[</span><span class="o">-</span><span class="mi">1</span><span class="p">])</span>
+ <span class="k">if</span> <span class="n">file_ext</span> <span class="o">!=</span> <span class="s1">'.py'</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">zipfile</span><span class="o">.</span><span class="n">is_zipfile</span><span class="p">(</span><span class="n">filepath</span><span class="p">):</span>
+ <span class="k">continue</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="nb">any</span><span class="p">(</span>
+ <span class="p">[</span><span class="n">re</span><span class="o">.</span><span class="n">findall</span><span class="p">(</span><span class="n">p</span><span class="p">,</span> <span class="n">filepath</span><span class="p">)</span> <span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">patterns</span><span class="p">]):</span>
+ <span class="n">ts</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span>
+ <span class="n">found_dags</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">process_file</span><span class="p">(</span>
+ <span class="n">filepath</span><span class="p">,</span> <span class="n">only_if_updated</span><span class="o">=</span><span class="n">only_if_updated</span><span class="p">)</span>
+
+ <span class="n">td</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span> <span class="o">-</span> <span class="n">ts</span>
+ <span class="n">td</span> <span class="o">=</span> <span class="n">td</span><span class="o">.</span><span class="n">total_seconds</span><span class="p">()</span> <span class="o">+</span> <span class="p">(</span>
+ <span class="nb">float</span><span class="p">(</span><span class="n">td</span><span class="o">.</span><span class="n">microseconds</span><span class="p">)</span> <span class="o">/</span> <span class="mi">1000000</span><span class="p">)</span>
+ <span class="n">stats</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">FileLoadStat</span><span class="p">(</span>
+ <span class="n">filepath</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">dag_folder</span><span class="p">,</span> <span class="s1">''</span><span class="p">),</span>
+ <span class="n">td</span><span class="p">,</span>
+ <span class="nb">len</span><span class="p">(</span><span class="n">found_dags</span><span class="p">),</span>
+ <span class="nb">sum</span><span class="p">([</span><span class="nb">len</span><span class="p">(</span><span class="n">dag</span><span class="o">.</span><span class="n">tasks</span><span class="p">)</span> <span class="k">for</span> <span class="n">dag</span> <span class="ow">in</span> <span class="n">found_dags</span><span class="p">]),</span>
+ <span class="nb">str</span><span class="p">([</span><span class="n">dag</span><span class="o">.</span><span class="n">dag_id</span> <span class="k">for</span> <span class="n">dag</span> <span class="ow">in</span> <span class="n">found_dags</span><span class="p">]),</span>
+ <span class="p">))</span>
+ <span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="n">e</span><span class="p">)</span>
+ <span class="n">Stats</span><span class="o">.</span><span class="n">gauge</span><span class="p">(</span>
+ <span class="s1">'collect_dags'</span><span class="p">,</span> <span class="p">(</span><span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span> <span class="o">-</span> <span class="n">start_dttm</span><span class="p">)</span><span class="o">.</span><span class="n">total_seconds</span><span class="p">(),</span> <span class="mi">1</span><span class="p">)</span>
+ <span class="n">Stats</span><span class="o">.</span><span class="n">gauge</span><span class="p">(</span>
+ <span class="s1">'dagbag_size'</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dags</span><span class="p">),</span> <span class="mi">1</span><span class="p">)</span>
+ <span class="n">Stats</span><span class="o">.</span><span class="n">gauge</span><span class="p">(</span>
+ <span class="s1">'dagbag_import_errors'</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">import_errors</span><span class="p">),</span> <span class="mi">1</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">dagbag_stats</span> <span class="o">=</span> <span class="nb">sorted</span><span class="p">(</span>
+ <span class="n">stats</span><span class="p">,</span> <span class="n">key</span><span class="o">=</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span><span class="o">.</span><span class="n">duration</span><span class="p">,</span> <span class="n">reverse</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="DagBag.dagbag_report"><a class="viewcode-back" href="../../code.html#airflow.models.DagBag.dagbag_report">[docs]</a> <span class="k">def</span> <span class="nf">dagbag_report</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""Prints a report around DagBag loading stats"""</span>
+ <span class="n">report</span> <span class="o">=</span> <span class="n">textwrap</span><span class="o">.</span><span class="n">dedent</span><span class="p">(</span><span class="s2">"""</span><span class="se">\n</span><span class="s2"></span>
+<span class="s2"> -------------------------------------------------------------------</span>
+<span class="s2"> DagBag loading stats for {dag_folder}</span>
+<span class="s2"> -------------------------------------------------------------------</span>
+<span class="s2"> Number of DAGs: {dag_num}</span>
+<span class="s2"> Total task number: {task_num}</span>
+<span class="s2"> DagBag parsing time: {duration}</span>
+<span class="s2"> {table}</span>
+<span class="s2"> """</span><span class="p">)</span>
+ <span class="n">stats</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">dagbag_stats</span>
+ <span class="k">return</span> <span class="n">report</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
+ <span class="n">dag_folder</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">dag_folder</span><span class="p">,</span>
+ <span class="n">duration</span><span class="o">=</span><span class="nb">sum</span><span class="p">([</span><span class="n">o</span><span class="o">.</span><span class="n">duration</span> <span class="k">for</span> <span class="n">o</span> <span class="ow">in</span> <span class="n">stats</span><span class="p">]),</span>
+ <span class="n">dag_num</span><span class="o">=</span><span class="nb">sum</span><span class="p">([</span><span class="n">o</span><span class="o">.</span><span class="n">dag_num</span> <span class="k">for</span> <span class="n">o</span> <span class="ow">in</span> <span class="n">stats</span><span class="p">]),</span>
+ <span class="n">task_num</span><span class="o">=</span><span class="nb">sum</span><span class="p">([</span><span class="n">o</span><span class="o">.</span><span class="n">dag_num</span> <span class="k">for</span> <span class="n">o</span> <span class="ow">in</span> <span class="n">stats</span><span class="p">]),</span>
+ <span class="n">table</span><span class="o">=</span><span class="n">pprinttable</span><span class="p">(</span><span class="n">stats</span><span class="p">),</span>
+ <span class="p">)</span></div>
+
+ <span class="k">def</span> <span class="nf">deactivate_inactive_dags</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="n">active_dag_ids</span> <span class="o">=</span> <span class="p">[</span><span class="n">dag</span><span class="o">.</span><span class="n">dag_id</span> <span class="k">for</span> <span class="n">dag</span> <span class="ow">in</span> <span class="nb">list</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dags</span><span class="o">.</span><span class="n">values</span><span class="p">())]</span>
+ <span class="n">session</span> <span class="o">=</span> <span class="n">settings</span><span class="o">.</span><span class="n">Session</span><span class="p">()</span>
+ <span class="k">for</span> <span class="n">dag</span> <span class="ow">in</span> <span class="n">session</span><span class="o">.</span><span class="n">query</span><span class="p">(</span>
+ <span class="n">DagModel</span><span class="p">)</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="o">~</span><span class="n">DagModel</span><span class="o">.</span><span class="n">dag_id</span><span class="o">.</span><span class="n">in_</span><span class="p">(</span><span class="n">active_dag_ids</span><span class="p">))</span><span class="o">.</span><span class="n">all</span><span class="p">():</span>
+ <span class="n">dag</span><span class="o">.</span><span class="n">is_active</span> <span class="o">=</span> <span class="bp">False</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">merge</span><span class="p">(</span><span class="n">dag</span><span class="p">)</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
+
+ <span class="k">def</span> <span class="nf">paused_dags</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="n">session</span> <span class="o">=</span> <span class="n">settings</span><span class="o">.</span><span class="n">Session</span><span class="p">()</span>
+ <span class="n">dag_ids</span> <span class="o">=</span> <span class="p">[</span><span class="n">dp</span><span class="o">.</span><span class="n">dag_id</span> <span class="k">for</span> <span class="n">dp</span> <span class="ow">in</span> <span class="n">session</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">DagModel</span><span class="p">)</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span>
+ <span class="n">DagModel</span><span class="o">.</span><span class="n">is_paused</span> <span class="o">==</span> <span class="bp">True</span><span class="p">)]</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
+ <span class="k">return</span> <span class="n">dag_ids</span></div>
+
+
+<span class="k">class</span> <span class="nc">User</span><span class="p">(</span><span class="n">Base</span><span class="p">):</span>
+ <span class="n">__tablename__</span> <span class="o">=</span> <span class="s2">"users"</span>
+
+ <span class="nb">id</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Integer</span><span class="p">,</span> <span class="n">primary_key</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+ <span class="n">username</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">String</span><span class="p">(</span><span class="n">ID_LEN</span><span class="p">),</span> <span class="n">unique</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+ <span class="n">email</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">String</span><span class="p">(</span><span class="mi">500</span><span class="p">))</span>
+ <span class="n">superuser</span> <span class="o">=</span> <span class="bp">False</span>
+
+ <span class="k">def</span> <span class="nf">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">username</span>
+
+ <span class="k">def</span> <span class="nf">get_id</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">id</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">is_superuser</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">superuser</span>
+
+
+<div class="viewcode-block" id="Connection"><a class="viewcode-back" href="../../code.html#airflow.models.Connection">[docs]</a><span class="k">class</span> <span class="nc">Connection</span><span class="p">(</span><span class="n">Base</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Placeholder to store information about different database instances</span>
+<span class="sd"> connection information. The idea here is that scripts use references to</span>
+<span class="sd"> database instances (conn_id) instead of hard coding hostname, logins and</span>
+<span class="sd"> passwords when using operators or hooks.</span>
+<span class="sd"> """</span>
+ <span class="n">__tablename__</span> <span class="o">=</span> <span class="s2">"connection"</span>
+
+ <span class="nb">id</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Integer</span><span class="p">(),</span> <span class="n">primary_key</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+ <span class="n">conn_id</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">String</span><span class="p">(</span><span class="n">ID_LEN</span><span class="p">))</span>
+ <span class="n">conn_type</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">String</span><span class="p">(</span><span class="mi">500</span><span class="p">))</span>
+ <span class="n">host</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">String</span><span class="p">(</span><span class="mi">500</span><span class="p">))</span>
+ <span class="n">schema</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">String</span><span class="p">(</span><span class="mi">500</span><span class="p">))</span>
+ <span class="n">login</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">String</span><span class="p">(</span><span class="mi">500</span><span class="p">))</span>
+ <span class="n">_password</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="s1">'password'</span><span class="p">,</span> <span class="n">String</span><span class="p">(</span><span class="mi">5000</span><span class="p">))</span>
+ <span class="n">port</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Integer</span><span class="p">())</span>
+ <span class="n">is_encrypted</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Boolean</span><span class="p">,</span> <span class="n">unique</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="bp">False</span><span class="p">)</span>
+ <span class="n">is_extra_encrypted</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Boolean</span><span class="p">,</span> <span class="n">unique</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="bp">False</span><span class="p">)</span>
+ <span class="n">_extra</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="s1">'extra'</span><span class="p">,</span> <span class="n">String</span><span class="p">(</span><span class="mi">5000</span><span class="p">))</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span> <span class="n">conn_id</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="n">conn_type</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">host</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="n">login</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="n">password</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">schema</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="n">port</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="n">extra</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">uri</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">conn_id</span> <span class="o">=</span> <span class="n">conn_id</span>
+ <span class="k">if</span> <span class="n">uri</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">parse_from_uri</span><span class="p">(</span><span class="n">uri</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">conn_type</span> <span class="o">=</span> <span class="n">conn_type</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">host</span> <span class="o">=</span> <span class="n">host</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">login</span> <span class="o">=</span> <span class="n">login</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">password</span> <span class="o">=</span> <span class="n">password</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">schema</span> <span class="o">=</span> <span class="n">schema</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">port</span> <span class="o">=</span> <span class="n">port</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">extra</span> <span class="o">=</span> <span class="n">extra</span>
+
+ <span class="k">def</span> <span class="nf">parse_from_uri</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">uri</span><span class="p">):</span>
+ <span class="n">temp_uri</span> <span class="o">=</span> <span class="n">urlparse</span><span class="p">(</span><span class="n">uri</span><span class="p">)</span>
+ <span class="n">hostname</span> <span class="o">=</span> <span class="n">temp_uri</span><span class="o">.</span><span class="n">hostname</span> <span class="ow">or</span> <span class="s1">''</span>
+ <span class="k">if</span> <span class="s1">'</span><span class="si">%2f</span><span class="s1">'</span> <span class="ow">in</span> <span class="n">hostname</span><span class="p">:</span>
+ <span class="n">hostname</span> <span class="o">=</span> <span class="n">hostname</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="si">%2f</span><span class="s1">'</span><span class="p">,</span> <span class="s1">'/'</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="si">%2F</span><span class [...]
+ <span class="n">conn_type</span> <span class="o">=</span> <span class="n">temp_uri</span><span class="o">.</span><span class="n">scheme</span>
+ <span class="k">if</span> <span class="n">conn_type</span> <span class="o">==</span> <span class="s1">'postgresql'</span><span class="p">:</span>
+ <span class="n">conn_type</span> <span class="o">=</span> <span class="s1">'postgres'</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">conn_type</span> <span class="o">=</span> <span class="n">conn_type</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">host</span> <span class="o">=</span> <span class="n">hostname</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">schema</span> <span class="o">=</span> <span class="n">temp_uri</span><span class="o">.</span><span class="n">path</span><span class="p">[</span><span class="mi">1</span><span class="p">:]</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">login</span> <span class="o">=</span> <span class="n">temp_uri</span><span class="o">.</span><span class="n">username</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">password</span> <span class="o">=</span> <span class="n">temp_uri</span><span class="o">.</span><span class="n">password</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">port</span> <span class="o">=</span> <span class="n">temp_uri</span><span class="o">.</span><span class="n">port</span>
+
+ <span class="k">def</span> <span class="nf">get_password</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_password</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_encrypted</span><span class="p">:</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">ENCRYPTION_ON</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span>
+ <span class="s2">"Can't decrypt, configuration is missing"</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">FERNET</span><span class="o">.</span><span class="n">decrypt</span><span class="p">(</span><span class="nb">bytes</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_password</span><span class="p">,</span> <span class="s1">'utf-8'</span><span class="p">))</span><span class="o">.</span><span class="n">decode</span><span class="p">()</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_password</span>
+
+ <span class="k">def</span> <span class="nf">set_password</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">):</span>
+ <span class="k">if</span> <span class="n">value</span><span class="p">:</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_password</span> <span class="o">=</span> <span class="n">FERNET</span><span class="o">.</span><span class="n">encrypt</span><span class="p">(</span><span class="nb">bytes</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="s1">'utf-8'</span><span class="p">))</span><span class="o">.</span><span class="n">decode</span><span class="p">()</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">is_encrypted</span> <span class="o">=</span> <span class="bp">True</span>
+ <span class="k">except</span> <span class="ne">NameError</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_password</span> <span class="o">=</span> <span class="n">value</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">is_encrypted</span> <span class="o">=</span> <span class="bp">False</span>
+
+ <span class="nd">@declared_attr</span>
+ <span class="k">def</span> <span class="nf">password</span><span class="p">(</span><span class="n">cls</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">synonym</span><span class="p">(</span><span class="s1">'_password'</span><span class="p">,</span>
+ <span class="n">descriptor</span><span class="o">=</span><span class="nb">property</span><span class="p">(</span><span class="n">cls</span><span class="o">.</span><span class="n">get_password</span><span class="p">,</span> <span class="n">cls</span><span class="o">.</span><span class="n">set_password</span><span class="p">))</span>
+
+ <span class="k">def</span> <span class="nf">get_extra</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_extra</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_extra_encrypted</span><span class="p">:</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">ENCRYPTION_ON</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span>
+ <span class="s2">"Can't decrypt `extra`, configuration is missing"</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">FERNET</span><span class="o">.</span><span class="n">decrypt</span><span class="p">(</span><span class="nb">bytes</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_extra</span><span class="p">,</span> <span class="s1">'utf-8'</span><span class="p">))</span><span class="o">.</span><span class="n">decode</span><span class="p">()</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_extra</span>
+
+ <span class="k">def</span> <span class="nf">set_extra</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">):</span>
+ <span class="k">if</span> <span class="n">value</span><span class="p">:</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_extra</span> <span class="o">=</span> <span class="n">FERNET</span><span class="o">.</span><span class="n">encrypt</span><span class="p">(</span><span class="nb">bytes</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="s1">'utf-8'</span><span class="p">))</span><span class="o">.</span><span class="n">decode</span><span class="p">()</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">is_extra_encrypted</span> <span class="o">=</span> <span class="bp">True</span>
+ <span class="k">except</span> <span class="ne">NameError</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_extra</span> <span class="o">=</span> <span class="n">value</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">is_extra_encrypted</span> <span class="o">=</span> <span class="bp">False</span>
+
+ <span class="nd">@declared_attr</span>
+ <span class="k">def</span> <span class="nf">extra</span><span class="p">(</span><span class="n">cls</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">synonym</span><span class="p">(</span><span class="s1">'_extra'</span><span class="p">,</span>
+ <span class="n">descriptor</span><span class="o">=</span><span class="nb">property</span><span class="p">(</span><span class="n">cls</span><span class="o">.</span><span class="n">get_extra</span><span class="p">,</span> <span class="n">cls</span><span class="o">.</span><span class="n">set_extra</span><span class="p">))</span>
+
+ <span class="k">def</span> <span class="nf">get_hook</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="kn">from</span> <span class="nn">airflow</span> <span class="kn">import</span> <span class="n">hooks</span>
+ <span class="kn">from</span> <span class="nn">airflow.contrib</span> <span class="kn">import</span> <span class="n">hooks</span> <span class="k">as</span> <span class="n">contrib_hooks</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">conn_type</span> <span class="o">==</span> <span class="s1">'mysql'</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">hooks</span><span class="o">.</span><span class="n">MySqlHook</span><span class="p">(</span><span class="n">mysql_conn_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">conn_id</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">conn_type</span> <span class="o">==</span> <span class="s1">'google_cloud_platform'</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">contrib_hooks</span><span class="o">.</span><span class="n">BigQueryHook</span><span class="p">(</span><span class="n">bigquery_conn_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">conn_id</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">conn_type</span> <span class="o">==</span> <span class="s1">'postgres'</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">hooks</span><span class="o">.</span><span class="n">PostgresHook</span><span class="p">(</span><span class="n">postgres_conn_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">conn_id</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">conn_type</span> <span class="o">==</span> <span class="s1">'hive_cli'</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">hooks</span><span class="o">.</span><span class="n">HiveCliHook</span><span class="p">(</span><span class="n">hive_cli_conn_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">conn_id</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">conn_type</span> <span class="o">==</span> <span class="s1">'presto'</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">hooks</span><span class="o">.</span><span class="n">PrestoHook</span><span class="p">(</span><span class="n">presto_conn_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">conn_id</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">conn_type</span> <span class="o">==</span> <span class="s1">'hiveserver2'</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">hooks</span><span class="o">.</span><span class="n">HiveServer2Hook</span><span class="p">(</span><span class="n">hiveserver2_conn_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">conn_id</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">conn_type</span> <span class="o">==</span> <span class="s1">'sqlite'</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">hooks</span><span class="o">.</span><span class="n">SqliteHook</span><span class="p">(</span><span class="n">sqlite_conn_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">conn_id</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">conn_type</span> <span class="o">==</span> <span class="s1">'jdbc'</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">hooks</span><span class="o">.</span><span class="n">JdbcHook</span><span class="p">(</span><span class="n">jdbc_conn_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">conn_id</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">conn_type</span> <span class="o">==</span> <span class="s1">'mssql'</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">hooks</span><span class="o">.</span><span class="n">MsSqlHook</span><span class="p">(</span><span class="n">mssql_conn_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">conn_id</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">conn_type</span> <span class="o">==</span> <span class="s1">'oracle'</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">hooks</span><span class="o">.</span><span class="n">OracleHook</span><span class="p">(</span><span class="n">oracle_conn_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">conn_id</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">conn_type</span> <span class="o">==</span> <span class="s1">'vertica'</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">contrib_hooks</span><span class="o">.</span><span class="n">VerticaHook</span><span class="p">(</span><span class="n">vertica_conn_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">conn_id</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">conn_type</span> <span class="o">==</span> <span class="s1">'cloudant'</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">contrib_hooks</span><span class="o">.</span><span class="n">CloudantHook</span><span class="p">(</span><span class="n">cloudant_conn_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">conn_id</span><span class="p">)</span>
+ <span class="k">except</span><span class="p">:</span>
+ <span class="k">return</span> <span class="bp">None</span>
+
+ <span class="k">def</span> <span class="nf">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">conn_id</span>
+
+ <span class="nd">@property</span>
+ <span class="k">def</span> <span class="nf">extra_dejson</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""Returns the extra property by deserializing json"""</span>
+ <span class="n">obj</span> <span class="o">=</span> <span class="p">{}</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">extra</span><span class="p">:</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="n">obj</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">extra</span><span class="p">)</span>
+ <span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">exception</span><span class="p">(</span><span class="n">e</span><span class="p">)</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">error</span><span class="p">(</span>
+ <span class="s2">"Failed parsing the json for "</span>
+ <span class="s2">"conn_id {}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">conn_id</span><span class="p">))</span>
+ <span class="k">return</span> <span class="n">obj</span></div>
+
+
+<span class="k">class</span> <span class="nc">DagPickle</span><span class="p">(</span><span class="n">Base</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Dags can originate from different places (user repos, master repo, ...)</span>
+<span class="sd"> and also get executed in different places (different executors). This</span>
+<span class="sd"> object represents a version of a DAG and becomes a source of truth for</span>
+<span class="sd"> a BackfillJob execution. A pickle is a native python serialized object,</span>
+<span class="sd"> and in this case gets stored in the database for the duration of the job.</span>
+
+<span class="sd"> The executors pick up the DagPickle id and read the dag definition from</span>
+<span class="sd"> the database.</span>
+<span class="sd"> """</span>
+ <span class="nb">id</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Integer</span><span class="p">,</span> <span class="n">primary_key</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+ <span class="n">pickle</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">PickleType</span><span class="p">(</span><span class="n">pickler</span><span class="o">=</span><span class="n">dill</span><span class="p">))</span>
+ <span class="n">created_dttm</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">DateTime</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="n">func</span><span class="o">.</span><span class="n">now</span><span class="p">())</span>
+ <span class="n">pickle_hash</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Text</span><span class="p">)</span>
+
+ <span class="n">__tablename__</span> <span class="o">=</span> <span class="s2">"dag_pickle"</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dag</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">dag_id</span> <span class="o">=</span> <span class="n">dag</span><span class="o">.</span><span class="n">dag_id</span>
+ <span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">dag</span><span class="p">,</span> <span class="s1">'template_env'</span><span class="p">):</span>
+ <span class="n">dag</span><span class="o">.</span><span class="n">template_env</span> <span class="o">=</span> <span class="bp">None</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">pickle_hash</span> <span class="o">=</span> <span class="nb">hash</span><span class="p">(</span><span class="n">dag</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">pickle</span> <span class="o">=</span> <span class="n">dag</span>
+
+
+<div class="viewcode-block" id="TaskInstance"><a class="viewcode-back" href="../../code.html#airflow.models.TaskInstance">[docs]</a><span class="k">class</span> <span class="nc">TaskInstance</span><span class="p">(</span><span class="n">Base</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Task instances store the state of a task instance. This table is the</span>
+<span class="sd"> authority and single source of truth around what tasks have run and the</span>
+<span class="sd"> state they are in.</span>
+
+<span class="sd"> The SqlAchemy model doesn't have a SqlAlchemy foreign key to the task or</span>
+<span class="sd"> dag model deliberately to have more control over transactions.</span>
+
+<span class="sd"> Database transactions on this table should insure double triggers and</span>
+<span class="sd"> any confusion around what task instances are or aren't ready to run</span>
+<span class="sd"> even while multiple schedulers may be firing task instances.</span>
+<span class="sd"> """</span>
+
+ <span class="n">__tablename__</span> <span class="o">=</span> <span class="s2">"task_instance"</span>
+
+ <span class="n">task_id</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">String</span><span class="p">(</span><span class="n">ID_LEN</span><span class="p">),</span> <span class="n">primary_key</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+ <span class="n">dag_id</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">String</span><span class="p">(</span><span class="n">ID_LEN</span><span class="p">),</span> <span class="n">primary_key</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+ <span class="n">execution_date</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">DateTime</span><span class="p">,</span> <span class="n">primary_key</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+ <span class="n">start_date</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">DateTime</span><span class="p">)</span>
+ <span class="n">end_date</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">DateTime</span><span class="p">)</span>
+ <span class="n">duration</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Float</span><span class="p">)</span>
+ <span class="n">state</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">String</span><span class="p">(</span><span class="mi">20</span><span class="p">))</span>
+ <span class="n">try_number</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Integer</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+ <span class="n">hostname</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">String</span><span class="p">(</span><span class="mi">1000</span><span class="p">))</span>
+ <span class="n">unixname</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">String</span><span class="p">(</span><span class="mi">1000</span><span class="p">))</span>
+ <span class="n">job_id</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Integer</span><span class="p">)</span>
+ <span class="n">pool</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">String</span><span class="p">(</span><span class="mi">50</span><span class="p">))</span>
+ <span class="n">queue</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">String</span><span class="p">(</span><span class="mi">50</span><span class="p">))</span>
+ <span class="n">priority_weight</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Integer</span><span class="p">)</span>
+ <span class="n">operator</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">String</span><span class="p">(</span><span class="mi">1000</span><span class="p">))</span>
+ <span class="n">queued_dttm</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">DateTime</span><span class="p">)</span>
+
+ <span class="n">__table_args__</span> <span class="o">=</span> <span class="p">(</span>
+ <span class="n">Index</span><span class="p">(</span><span class="s1">'ti_dag_state'</span><span class="p">,</span> <span class="n">dag_id</span><span class="p">,</span> <span class="n">state</span><span class="p">),</span>
+ <span class="n">Index</span><span class="p">(</span><span class="s1">'ti_state_lkp'</span><span class="p">,</span> <span class="n">dag_id</span><span class="p">,</span> <span class="n">task_id</span><span class="p">,</span> <span class="n">execution_date</span><span class="p">,</span> <span class="n">state</span><span class="p">),</span>
+ <span class="n">Index</span><span class="p">(</span><span class="s1">'ti_pool'</span><span class="p">,</span> <span class="n">pool</span><span class="p">,</span> <span class="n">state</span><span class="p">,</span> <span class="n">priority_weight</span><span class="p">),</span>
+ <span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">task</span><span class="p">,</span> <span class="n">execution_date</span><span class="p">,</span> <span class="n">state</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">dag_id</span> <span class="o">=</span> <span class="n">task</span><span class="o">.</span><span class="n">dag_id</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">task_id</span> <span class="o">=</span> <span class="n">task</span><span class="o">.</span><span class="n">task_id</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">execution_date</span> <span class="o">=</span> <span class="n">execution_date</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">task</span> <span class="o">=</span> <span class="n">task</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">queue</span> <span class="o">=</span> <span class="n">task</span><span class="o">.</span><span class="n">queue</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">pool</span> <span class="o">=</span> <span class="n">task</span><span class="o">.</span><span class="n">pool</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">priority_weight</span> <span class="o">=</span> <span class="n">task</span><span class="o">.</span><span class="n">priority_weight_total</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">try_number</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">test_mode</span> <span class="o">=</span> <span class="bp">False</span> <span class="c1"># can be changed when calling 'run'</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">force</span> <span class="o">=</span> <span class="bp">False</span> <span class="c1"># can be changed when calling 'run'</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">unixname</span> <span class="o">=</span> <span class="n">getpass</span><span class="o">.</span><span class="n">getuser</span><span class="p">()</span>
+ <span class="k">if</span> <span class="n">state</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">state</span> <span class="o">=</span> <span class="n">state</span>
+
+<div class="viewcode-block" id="TaskInstance.command"><a class="viewcode-back" href="../../code.html#airflow.models.TaskInstance.command">[docs]</a> <span class="k">def</span> <span class="nf">command</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span>
+ <span class="n">mark_success</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="n">ignore_dependencies</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="n">ignore_depends_on_past</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="n">force</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="n">local</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="n">pickle_id</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">raw</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="n">job_id</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">pool</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Returns a command that can be executed anywhere where airflow is</span>
+<span class="sd"> installed. This command is part of the message sent to executors by</span>
+<span class="sd"> the orchestrator.</span>
+<span class="sd"> """</span>
+ <span class="n">dag</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">task</span><span class="o">.</span><span class="n">dag</span>
+ <span class="n">iso</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">execution_date</span><span class="o">.</span><span class="n">isoformat</span><span class="p">()</span>
+ <span class="n">cmd</span> <span class="o">=</span> <span class="s2">"airflow run {self.dag_id} {self.task_id} {iso} "</span>
+ <span class="n">cmd</span> <span class="o">+=</span> <span class="s2">"--mark_success "</span> <span class="k">if</span> <span class="n">mark_success</span> <span class="k">else</span> <span class="s2">""</span>
+ <span class="n">cmd</span> <span class="o">+=</span> <span class="s2">"--pickle {pickle_id} "</span> <span class="k">if</span> <span class="n">pickle_id</span> <span class="k">else</span> <span class="s2">""</span>
+ <span class="n">cmd</span> <span class="o">+=</span> <span class="s2">"--job_id {job_id} "</span> <span class="k">if</span> <span class="n">job_id</span> <span class="k">else</span> <span class="s2">""</span>
+ <span class="n">cmd</span> <span class="o">+=</span> <span class="s2">"-i "</span> <span class="k">if</span> <span class="n">ignore_dependencies</span> <span class="k">else</span> <span class="s2">""</span>
+ <span class="n">cmd</span> <span class="o">+=</span> <span class="s2">"-I "</span> <span class="k">if</span> <span class="n">ignore_depends_on_past</span> <span class="k">else</span> <span class="s2">""</span>
+ <span class="n">cmd</span> <span class="o">+=</span> <span class="s2">"--force "</span> <span class="k">if</span> <span class="n">force</span> <span class="k">else</span> <span class="s2">""</span>
+ <span class="n">cmd</span> <span class="o">+=</span> <span class="s2">"--local "</span> <span class="k">if</span> <span class="n">local</span> <span class="k">else</span> <span class="s2">""</span>
+ <span class="n">cmd</span> <span class="o">+=</span> <span class="s2">"--pool {pool} "</span> <span class="k">if</span> <span class="n">pool</span> <span class="k">else</span> <span class="s2">""</span>
+ <span class="n">cmd</span> <span class="o">+=</span> <span class="s2">"--raw "</span> <span class="k">if</span> <span class="n">raw</span> <span class="k">else</span> <span class="s2">""</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">pickle_id</span> <span class="ow">and</span> <span class="n">dag</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">dag</span><span class="o">.</span><span class="n">full_filepath</span> <span class="o">!=</span> <span class="n">dag</span><span class="o">.</span><span class="n">filepath</span><span class="p">:</span>
+ <span class="n">cmd</span> <span class="o">+=</span> <span class="s2">"-sd DAGS_FOLDER/{dag.filepath} "</span>
+ <span class="k">elif</span> <span class="n">dag</span><span class="o">.</span><span class="n">full_filepath</span><span class="p">:</span>
+ <span class="n">cmd</span> <span class="o">+=</span> <span class="s2">"-sd {dag.full_filepath}"</span>
+ <span class="k">return</span> <span class="n">cmd</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="nb">locals</span><span class="p">())</span></div>
+
+ <span class="nd">@property</span>
+ <span class="k">def</span> <span class="nf">log_filepath</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="n">iso</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">execution_date</span><span class="o">.</span><span class="n">isoformat</span><span class="p">()</span>
+ <span class="n">log</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">expanduser</span><span class="p">(</span><span class="n">configuration</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'core'</span><span class="p">,</span> <span class="s1">'BASE_LOG_FOLDER'</span><span class="p">))</span>
+ <span class="k">return</span> <span class="p">(</span>
+ <span class="s2">"{log}/{self.dag_id}/{self.task_id}/{iso}.log"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="nb">locals</span><span class="p">()))</span>
+
+ <span class="nd">@property</span>
+ <span class="k">def</span> <span class="nf">log_url</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="n">iso</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">execution_date</span><span class="o">.</span><span class="n">isoformat</span><span class="p">()</span>
+ <span class="n">BASE_URL</span> <span class="o">=</span> <span class="n">configuration</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'webserver'</span><span class="p">,</span> <span class="s1">'BASE_URL'</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">BASE_URL</span> <span class="o">+</span> <span class="p">(</span>
+ <span class="s2">"/admin/airflow/log"</span>
+ <span class="s2">"?dag_id={self.dag_id}"</span>
+ <span class="s2">"&task_id={self.task_id}"</span>
+ <span class="s2">"&execution_date={iso}"</span>
+ <span class="p">)</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="nb">locals</span><span class="p">())</span>
+
+ <span class="nd">@property</span>
+ <span class="k">def</span> <span class="nf">mark_success_url</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="n">iso</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">execution_date</span><span class="o">.</span><span class="n">isoformat</span><span class="p">()</span>
+ <span class="n">BASE_URL</span> <span class="o">=</span> <span class="n">configuration</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'webserver'</span><span class="p">,</span> <span class="s1">'BASE_URL'</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">BASE_URL</span> <span class="o">+</span> <span class="p">(</span>
+ <span class="s2">"/admin/airflow/action"</span>
+ <span class="s2">"?action=success"</span>
+ <span class="s2">"&task_id={self.task_id}"</span>
+ <span class="s2">"&dag_id={self.dag_id}"</span>
+ <span class="s2">"&execution_date={iso}"</span>
+ <span class="s2">"&upstream=false"</span>
+ <span class="s2">"&downstream=false"</span>
+ <span class="p">)</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="nb">locals</span><span class="p">())</span>
+
+ <span class="nd">@provide_session</span>
+<div class="viewcode-block" id="TaskInstance.current_state"><a class="viewcode-back" href="../../code.html#airflow.models.TaskInstance.current_state">[docs]</a> <span class="k">def</span> <span class="nf">current_state</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">session</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Get the very latest state from the database, if a session is passed,</span>
+<span class="sd"> we use and looking up the state becomes part of the session, otherwise</span>
+<span class="sd"> a new session is used.</span>
+<span class="sd"> """</span>
+ <span class="n">TI</span> <span class="o">=</span> <span class="n">TaskInstance</span>
+ <span class="n">ti</span> <span class="o">=</span> <span class="n">session</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">TI</span><span class="p">)</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span>
+ <span class="n">TI</span><span class="o">.</span><span class="n">dag_id</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">dag_id</span><span class="p">,</span>
+ <span class="n">TI</span><span class="o">.</span><span class="n">task_id</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">task_id</span><span class="p">,</span>
+ <span class="n">TI</span><span class="o">.</span><span class="n">execution_date</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">execution_date</span><span class="p">,</span>
+ <span class="p">)</span><span class="o">.</span><span class="n">all</span><span class="p">()</span>
+ <span class="k">if</span> <span class="n">ti</span><span class="p">:</span>
+ <span class="n">state</span> <span class="o">=</span> <span class="n">ti</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">state</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">state</span> <span class="o">=</span> <span class="bp">None</span>
+ <span class="k">return</span> <span class="n">state</span></div>
+
+ <span class="nd">@provide_session</span>
+<div class="viewcode-block" id="TaskInstance.error"><a class="viewcode-back" href="../../code.html#airflow.models.TaskInstance.error">[docs]</a> <span class="k">def</span> <span class="nf">error</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">session</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Forces the task instance's state to FAILED in the database.</span>
+<span class="sd"> """</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s2">"Recording the task instance as FAILED"</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">state</span> <span class="o">=</span> <span class="n">State</span><span class="o">.</span><span class="n">FAILED</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">merge</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span></div>
+
+ <span class="nd">@provide_session</span>
+<div class="viewcode-block" id="TaskInstance.refresh_from_db"><a class="viewcode-back" href="../../code.html#airflow.models.TaskInstance.refresh_from_db">[docs]</a> <span class="k">def</span> <span class="nf">refresh_from_db</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">session</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="n">lock_for_update</span><span class="o">=</span><span class= [...]
+ <span class="sd">"""</span>
+<span class="sd"> Refreshes the task instance from the database based on the primary key</span>
+
+<span class="sd"> :param lock_for_update: if True, indicates that the database should</span>
+<span class="sd"> lock the TaskInstance (issuing a FOR UPDATE clause) until the session</span>
+<span class="sd"> is committed.</span>
+<span class="sd"> """</span>
+ <span class="n">TI</span> <span class="o">=</span> <span class="n">TaskInstance</span>
+
+ <span class="n">qry</span> <span class="o">=</span> <span class="n">session</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">TI</span><span class="p">)</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span>
+ <span class="n">TI</span><span class="o">.</span><span class="n">dag_id</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">dag_id</span><span class="p">,</span>
+ <span class="n">TI</span><span class="o">.</span><span class="n">task_id</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">task_id</span><span class="p">,</span>
+ <span class="n">TI</span><span class="o">.</span><span class="n">execution_date</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">execution_date</span><span class="p">)</span>
+
+ <span class="k">if</span> <span class="n">lock_for_update</span><span class="p">:</span>
+ <span class="n">ti</span> <span class="o">=</span> <span class="n">qry</span><span class="o">.</span><span class="n">with_for_update</span><span class="p">()</span><span class="o">.</span><span class="n">first</span><span class="p">()</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">ti</span> <span class="o">=</span> <span class="n">qry</span><span class="o">.</span><span class="n">first</span><span class="p">()</span>
+ <span class="k">if</span> <span class="n">ti</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">state</span> <span class="o">=</span> <span class="n">ti</span><span class="o">.</span><span class="n">state</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">start_date</span> <span class="o">=</span> <span class="n">ti</span><span class="o">.</span><span class="n">start_date</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">end_date</span> <span class="o">=</span> <span class="n">ti</span><span class="o">.</span><span class="n">end_date</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">try_number</span> <span class="o">=</span> <span class="n">ti</span><span class="o">.</span><span class="n">try_number</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">state</span> <span class="o">=</span> <span class="bp">None</span></div>
+
+ <span class="nd">@provide_session</span>
+<div class="viewcode-block" id="TaskInstance.clear_xcom_data"><a class="viewcode-back" href="../../code.html#airflow.models.TaskInstance.clear_xcom_data">[docs]</a> <span class="k">def</span> <span class="nf">clear_xcom_data</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">session</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Clears all XCom data from the database for the task instance</span>
+<span class="sd"> """</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">XCom</span><span class="p">)</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span>
+ <span class="n">XCom</span><span class="o">.</span><span class="n">dag_id</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">dag_id</span><span class="p">,</span>
+ <span class="n">XCom</span><span class="o">.</span><span class="n">task_id</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">task_id</span><span class="p">,</span>
+ <span class="n">XCom</span><span class="o">.</span><span class="n">execution_date</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">execution_date</span>
+ <span class="p">)</span><span class="o">.</span><span class="n">delete</span><span class="p">()</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span></div>
+
+ <span class="nd">@property</span>
+ <span class="k">def</span> <span class="nf">key</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Returns a tuple that identifies the task instance uniquely</span>
+<span class="sd"> """</span>
+ <span class="k">return</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dag_id</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">task_id</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">execution_date</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">set_state</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">state</span><span class="p">,</span> <span class="n">session</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">state</span> <span class="o">=</span> <span class="n">state</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">start_date</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">end_date</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">merge</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span>
+
+<div class="viewcode-block" id="TaskInstance.is_queueable"><a class="viewcode-back" href="../../code.html#airflow.models.TaskInstance.is_queueable">[docs]</a> <span class="k">def</span> <span class="nf">is_queueable</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span>
+ <span class="n">include_queued</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="n">ignore_depends_on_past</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="n">flag_upstream_failed</span><span class="o">=</span><span class="bp">False</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Returns a boolean on whether the task instance has met all dependencies</span>
+<span class="sd"> and is ready to run. It considers the task's state, the state</span>
+<span class="sd"> of its dependencies, depends_on_past and makes sure the execution</span>
+<span class="sd"> isn't in the future. It doesn't take into</span>
+<span class="sd"> account whether the pool has a slot for it to run.</span>
+
+<span class="sd"> :param include_queued: If True, tasks that have already been queued</span>
+<span class="sd"> are included. Defaults to False.</span>
+<span class="sd"> :type include_queued: boolean</span>
+<span class="sd"> :param ignore_depends_on_past: if True, ignores depends_on_past</span>
+<span class="sd"> dependencies. Defaults to False.</span>
+<span class="sd"> :type ignore_depends_on_past: boolean</span>
+<span class="sd"> :param flag_upstream_failed: This is a hack to generate</span>
+<span class="sd"> the upstream_failed state creation while checking to see</span>
+<span class="sd"> whether the task instance is runnable. It was the shortest</span>
+<span class="sd"> path to add the feature</span>
+<span class="sd"> :type flag_upstream_failed: boolean</span>
+<span class="sd"> """</span>
+ <span class="c1"># is the execution date in the future?</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">execution_date</span> <span class="o">></span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">():</span>
+ <span class="k">return</span> <span class="bp">False</span>
+ <span class="c1"># is the task still in the retry waiting period?</span>
+ <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_premature</span><span class="p">():</span>
+ <span class="k">return</span> <span class="bp">False</span>
+ <span class="c1"># does the task have an end_date prior to the execution date?</span>
+ <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">task</span><span class="o">.</span><span class="n">end_date</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">execution_date</span> <span class="o">></span> <span class="bp">self</span><span class="o">.</span><span class="n">task</span><span class="o">.</span><span class="n">end_date</span><span class="p">:</span>
+ <span class="k">return</span> <span class="bp">False</span>
+ <span class="c1"># has the task been skipped?</span>
+ <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">state</span> <span class="o">==</span> <span class="n">State</span><span class="o">.</span><span class="n">SKIPPED</span><span class="p">:</span>
+ <span class="k">return</span> <span class="bp">False</span>
+ <span class="c1"># has the task already been queued (and are we excluding queued tasks)?</span>
+ <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">state</span> <span class="o">==</span> <span class="n">State</span><span class="o">.</span><span class="n">QUEUED</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">include_queued</span><span class="p">:</span>
+ <span class="k">return</span> <span class="bp">False</span>
+ <span class="c1"># is the task runnable and have its dependencies been met?</span>
+ <span class="k">elif</span> <span class="p">(</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">state</span> <span class="ow">in</span> <span class="n">State</span><span class="o">.</span><span class="n">runnable</span><span class="p">()</span> <span class="ow">and</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">are_dependencies_met</span><span class="p">(</span>
+ <span class="n">ignore_depends_on_past</span><span class="o">=</span><span class="n">ignore_depends_on_past</span><span class="p">,</span>
+ <span class="n">flag_upstream_failed</span><span class="o">=</span><span class="n">flag_upstream_failed</span><span class="p">)):</span>
+ <span class="k">return</span> <span class="bp">True</span>
+ <span class="c1"># anything else</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">return</span> <span class="bp">False</span></div>
+
+<div class="viewcode-block" id="TaskInstance.is_premature"><a class="viewcode-back" href="../../code.html#airflow.models.TaskInstance.is_premature">[docs]</a> <span class="k">def</span> <span class="nf">is_premature</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Returns whether a task is in UP_FOR_RETRY state and its retry interval</span>
+<span class="sd"> has elapsed.</span>
+<span class="sd"> """</span>
+ <span class="c1"># is the task still in the retry waiting period?</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">state</span> <span class="o">==</span> <span class="n">State</span><span class="o">.</span><span class="n">UP_FOR_RETRY</span> <span class="ow">and</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">ready_for_retry</span><span class="p">()</span></div>
+
+<div class="viewcode-block" id="TaskInstance.is_runnable"><a class="viewcode-back" href="../../code.html#airflow.models.TaskInstance.is_runnable">[docs]</a> <span class="k">def</span> <span class="nf">is_runnable</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span>
+ <span class="n">include_queued</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="n">ignore_depends_on_past</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="n">flag_upstream_failed</span><span class="o">=</span><span class="bp">False</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Returns whether a task is ready to run AND there's room in the</span>
+<span class="sd"> queue.</span>
+
+<span class="sd"> :param include_queued: If True, tasks that are already QUEUED are</span>
+<span class="sd"> considered "runnable". Defaults to False.</span>
+<span class="sd"> :type include_queued: boolean</span>
+<span class="sd"> :param ignore_depends_on_past: if True, ignores depends_on_past</span>
+<span class="sd"> dependencies. Defaults to False.</span>
+<span class="sd"> :type ignore_depends_on_past: boolean</span>
+<span class="sd"> """</span>
+ <span class="n">queueable</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_queueable</span><span class="p">(</span>
+ <span class="n">include_queued</span><span class="o">=</span><span class="n">include_queued</span><span class="p">,</span>
+ <span class="n">ignore_depends_on_past</span><span class="o">=</span><span class="n">ignore_depends_on_past</span><span class="p">,</span>
+ <span class="n">flag_upstream_failed</span><span class="o">=</span><span class="n">flag_upstream_failed</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">queueable</span> <span class="ow">and</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">pool_full</span><span class="p">()</span></div>
+
+ <span class="nd">@provide_session</span>
+<div class="viewcode-block" id="TaskInstance.are_dependents_done"><a class="viewcode-back" href="../../code.html#airflow.models.TaskInstance.are_dependents_done">[docs]</a> <span class="k">def</span> <span class="nf">are_dependents_done</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">session</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Checks whether the dependents of this task instance have all succeeded.</span>
+<span class="sd"> This is meant to be used by wait_for_downstream.</span>
+
+<span class="sd"> This is useful when you do not want to start processing the next</span>
+<span class="sd"> schedule of a task until the dependents are done. For instance,</span>
+<span class="sd"> if the task DROPs and recreates a table.</span>
+<span class="sd"> """</span>
+ <span class="n">task</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">task</span>
+
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">task</span><span class="o">.</span><span class="n">downstream_task_ids</span><span class="p">:</span>
+ <span class="k">return</span> <span class="bp">True</span>
+
+ <span class="n">ti</span> <span class="o">=</span> <span class="n">session</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">func</span><span class="o">.</span><span class="n">count</span><span class="p">(</span><span class="n">TaskInstance</span><span class="o">.</span><span class="n">task_id</span><span class="p">))</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span>
+ <span class="n">TaskInstance</span><span class="o">.</span><span class="n">dag_id</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">dag_id</span><span class="p">,</span>
+ <span class="n">TaskInstance</span><span class="o">.</span><span class="n">task_id</span><span class="o">.</span><span class="n">in_</span><span class="p">(</span><span class="n">task</span><span class="o">.</span><span class="n">downstream_task_ids</span><span class="p">),</span>
+ <span class="n">TaskInstance</span><span class="o">.</span><span class="n">execution_date</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">execution_date</span><span class="p">,</span>
+ <span class="n">TaskInstance</span><span class="o">.</span><span class="n">state</span> <span class="o">==</span> <span class="n">State</span><span class="o">.</span><span class="n">SUCCESS</span><span class="p">,</span>
+ <span class="p">)</span>
+ <span class="n">count</span> <span class="o">=</span> <span class="n">ti</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span>
+ <span class="k">return</span> <span class="n">count</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span><span class="n">task</span><span class="o">.</span><span class="n">downstream_task_ids</span><span class="p">)</span></div>
+
+ <span class="nd">@provide_session</span>
+<div class="viewcode-block" id="TaskInstance.evaluate_trigger_rule"><a class="viewcode-back" href="../../code.html#airflow.models.TaskInstance.evaluate_trigger_rule">[docs]</a> <span class="k">def</span> <span class="nf">evaluate_trigger_rule</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">successes</span><span class="p">,</span> <span class="n">skipped</span><span class="p">,</span> <span class="n">failed</span><span class="p">,</span>
+ <span class="n">upstream_failed</span><span class="p">,</span> <span class="n">done</span><span class="p">,</span>
+ <span class="n">flag_upstream_failed</span><span class="p">,</span> <span class="n">session</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Returns a boolean on whether the current task can be scheduled</span>
+<span class="sd"> for execution based on its trigger_rule.</span>
+
+<span class="sd"> :param flag_upstream_failed: This is a hack to generate</span>
+<span class="sd"> the upstream_failed state creation while checking to see</span>
+<span class="sd"> whether the task instance is runnable. It was the shortest</span>
+<span class="sd"> path to add the feature</span>
+<span class="sd"> :type flag_upstream_failed: boolean</span>
+<span class="sd"> :param successes: Number of successful upstream tasks</span>
+<span class="sd"> :type successes: boolean</span>
+<span class="sd"> :param skipped: Number of skipped upstream tasks</span>
+<span class="sd"> :type skipped: boolean</span>
+<span class="sd"> :param failed: Number of failed upstream tasks</span>
+<span class="sd"> :type failed: boolean</span>
+<span class="sd"> :param upstream_failed: Number of upstream_failed upstream tasks</span>
+<span class="sd"> :type upstream_failed: boolean</span>
+<span class="sd"> :param done: Number of completed upstream tasks</span>
+<span class="sd"> :type done: boolean</span>
+<span class="sd"> """</span>
+ <span class="n">TR</span> <span class="o">=</span> <span class="n">TriggerRule</span>
+
+ <span class="n">task</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">task</span>
+ <span class="n">upstream</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">task</span><span class="o">.</span><span class="n">upstream_task_ids</span><span class="p">)</span>
+ <span class="n">tr</span> <span class="o">=</span> <span class="n">task</span><span class="o">.</span><span class="n">trigger_rule</span>
+ <span class="n">upstream_done</span> <span class="o">=</span> <span class="n">done</span> <span class="o">>=</span> <span class="n">upstream</span>
+
+ <span class="c1"># handling instant state assignment based on trigger rules</span>
+ <span class="k">if</span> <span class="n">flag_upstream_failed</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">tr</span> <span class="o">==</span> <span class="n">TR</span><span class="o">.</span><span class="n">ALL_SUCCESS</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">upstream_failed</span> <span class="ow">or</span> <span class="n">failed</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">set_state</span><span class="p">(</span><span class="n">State</span><span class="o">.</span><span class="n">UPSTREAM_FAILED</span><span class="p">,</span> <span class="n">session</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="n">skipped</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">set_state</span><span class="p">(</span><span class="n">State</span><span class="o">.</span><span class="n">SKIPPED</span><span class="p">,</span> <span class="n">session</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="n">tr</span> <span class="o">==</span> <span class="n">TR</span><span class="o">.</span><span class="n">ALL_FAILED</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">successes</span> <span class="ow">or</span> <span class="n">skipped</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">set_state</span><span class="p">(</span><span class="n">State</span><span class="o">.</span><span class="n">SKIPPED</span><span class="p">,</span> <span class="n">session</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="n">tr</span> <span class="o">==</span> <span class="n">TR</span><span class="o">.</span><span class="n">ONE_SUCCESS</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">upstream_done</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">successes</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">set_state</span><span class="p">(</span><span class="n">State</span><span class="o">.</span><span class="n">SKIPPED</span><span class="p">,</span> <span class="n">session</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="n">tr</span> <span class="o">==</span> <span class="n">TR</span><span class="o">.</span><span class="n">ONE_FAILED</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">upstream_done</span> <span class="ow">and</span> <span class="ow">not</span> <span class="p">(</span><span class="n">failed</span> <span class="ow">or</span> <span class="n">upstream_failed</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">set_state</span><span class="p">(</span><span class="n">State</span><span class="o">.</span><span class="n">SKIPPED</span><span class="p">,</span> <span class="n">session</span><span class="p">)</span>
+
+ <span class="k">return</span> <span class="p">(</span>
+ <span class="p">(</span><span class="n">tr</span> <span class="o">==</span> <span class="n">TR</span><span class="o">.</span><span class="n">ONE_SUCCESS</span> <span class="ow">and</span> <span class="n">successes</span> <span class="o">></span> <span class="mi">0</span><span class="p">)</span> <span class="ow">or</span>
+ <span class="p">(</span><span class="n">tr</span> <span class="o">==</span> <span class="n">TR</span><span class="o">.</span><span class="n">ONE_FAILED</span> <span class="ow">and</span> <span class="p">(</span><span class="n">failed</span> <span class="ow">or</span> <span class="n">upstream_failed</span><span class="p">))</span> <span class="ow">or</span>
+ <span class="p">(</span><span class="n">tr</span> <span class="o">==</span> <span class="n">TR</span><span class="o">.</span><span class="n">ALL_SUCCESS</span> <span class="ow">and</span> <span class="n">successes</span> <span class="o">>=</span> <span class="n">upstream</span><span class="p">)</span> <span class="ow">or</span>
+ <span class="p">(</span><span class="n">tr</span> <span class="o">==</span> <span class="n">TR</span><span class="o">.</span><span class="n">ALL_FAILED</span> <span class="ow">and</span> <span class="n">failed</span> <span class="o">+</span> <span class="n">upstream_failed</span> <span class="o">>=</span> <span class="n">upstream</span><span class="p">)</span> <span class="ow">or</span>
+ <span class="p">(</span><span class="n">tr</span> <span class="o">==</span> <span class="n">TR</span><span class="o">.</span><span class="n">ALL_DONE</span> <span class="ow">and</span> <span class="n">upstream_done</span><span class="p">)</span>
+ <span class="p">)</span></div>
+
+ <span class="nd">@provide_session</span>
+<div class="viewcode-block" id="TaskInstance.are_dependencies_met"><a class="viewcode-back" href="../../code.html#airflow.models.TaskInstance.are_dependencies_met">[docs]</a> <span class="k">def</span> <span class="nf">are_dependencies_met</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span>
+ <span class="n">session</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">flag_upstream_failed</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="n">ignore_depends_on_past</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="n">verbose</span><span class="o">=</span><span class="bp">False</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Returns a boolean on whether the upstream tasks are in a SUCCESS state</span>
+<span class="sd"> and considers depends_on_past and the previous run's state.</span>
+
+<span class="sd"> :param flag_upstream_failed: This is a hack to generate</span>
+<span class="sd"> the upstream_failed state creation while checking to see</span>
+<span class="sd"> whether the task instance is runnable. It was the shortest</span>
+<span class="sd"> path to add the feature</span>
+<span class="sd"> :type flag_upstream_failed: boolean</span>
+<span class="sd"> :param ignore_depends_on_past: if True, ignores depends_on_past</span>
+<span class="sd"> dependencies. Defaults to False.</span>
+<span class="sd"> :type ignore_depends_on_past: boolean</span>
+<span class="sd"> :param verbose: verbose provides more logging in the case where the</span>
+<span class="sd"> task instance is evaluated as a check right before being executed.</span>
+<span class="sd"> In the case of the scheduler evaluating the dependencies, this</span>
+<span class="sd"> logging would be way too verbose.</span>
+<span class="sd"> :type verbose: boolean</span>
+<span class="sd"> """</span>
+ <span class="n">TI</span> <span class="o">=</span> <span class="n">TaskInstance</span>
+ <span class="n">TR</span> <span class="o">=</span> <span class="n">TriggerRule</span>
+
+ <span class="n">task</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">task</span>
+
+ <span class="c1"># Checking that the depends_on_past is fulfilled</span>
+ <span class="k">if</span> <span class="p">(</span><span class="n">task</span><span class="o">.</span><span class="n">depends_on_past</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">ignore_depends_on_past</span> <span class="ow">and</span>
+ <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">execution_date</span> <span class="o">==</span> <span class="n">task</span><span class="o">.</span><span class="n">start_date</span><span class="p">):</span>
+ <span class="n">previous_ti</span> <span class="o">=</span> <span class="n">session</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">TI</span><span class="p">)</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span>
+ <span class="n">TI</span><span class="o">.</span><span class="n">dag_id</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">dag_id</span><span class="p">,</span>
+ <span class="n">TI</span><span class="o">.</span><span class="n">task_id</span> <span class="o">==</span> <span class="n">task</span><span class="o">.</span><span class="n">task_id</span><span class="p">,</span>
+ <span class="n">TI</span><span class="o">.</span><span class="n">execution_date</span> <span class="o">==</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">task</span><span class="o">.</span><span class="n">dag</span><span class="o">.</span><span class="n">previous_schedule</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">execution_date</span><span class="p">),</span>
+ <span class="n">TI</span><span class="o">.</span><span class="n">state</span><span class="o">.</span><span class="n">in_</span><span class="p">({</span><span class="n">State</span><span class="o">.</span><span class="n">SUCCESS</span><span class="p">,</span> <span class="n">State</span><span class="o">.</span><span class="n">SKIPPED</span><span class="p">}),</span>
+ <span class="p">)</span><span class="o">.</span><span class="n">first</span><span class="p">()</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">previous_ti</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">verbose</span><span class="p">:</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s2">"depends_on_past not satisfied"</span><span class="p">)</span>
+ <span class="k">return</span> <span class="bp">False</span>
+
+ <span class="c1"># Applying wait_for_downstream</span>
+ <span class="n">previous_ti</span><span class="o">.</span><span class="n">task</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">task</span>
+ <span class="k">if</span> <span class="n">task</span><span class="o">.</span><span class="n">wait_for_downstream</span> <span class="ow">and</span> <span class="ow">not</span> \
+ <span class="n">previous_ti</span><span class="o">.</span><span class="n">are_dependents_done</span><span class="p">(</span><span class="n">session</span><span class="o">=</span><span class="n">session</span><span class="p">):</span>
+ <span class="k">if</span> <span class="n">verbose</span><span class="p">:</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s2">"wait_for_downstream not satisfied"</span><span class="p">)</span>
+ <span class="k">return</span> <span class="bp">False</span>
+
+ <span class="c1"># Checking that all upstream dependencies have succeeded</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">task</span><span class="o">.</span><span class="n">upstream_list</span> <span class="ow">or</span> <span class="n">task</span><span class="o">.</span><span class="n">trigger_rule</span> <span class="o">==</span> <span class="n">TR</span><span class="o">.</span><span class="n">DUMMY</span><span class="p">:</span>
+ <span class="k">return</span> <span class="bp">True</span>
+
+ <span class="c1"># todo: this query becomes quite expensive with dags that have</span>
+ <span class="c1"># many tasks. It should be refactored to let the task report</span>
+ <span class="c1"># to the dag run and get the aggregates from there</span>
+ <span class="n">qry</span> <span class="o">=</span> <span class="p">(</span>
+ <span class="n">session</span>
+ <span class="o">.</span><span class="n">query</span><span class="p">(</span>
+ <span class="n">func</span><span class="o">.</span><span class="n">coalesce</span><span class="p">(</span><span class="n">func</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span>
+ <span class="n">case</span><span class="p">([(</span><span class="n">TI</span><span class="o">.</span><span class="n">state</span> <span class="o">==</span> <span class="n">State</span><span class="o">.</span><span class="n">SUCCESS</span><span class="p">,</span> <span class="mi">1</span><span class="p">)],</span> <span class="n">else_</span><span class="o">=</span><span class="mi">0</span><span class="p">)),</span> <span class="mi">0</span><span class="p">),</span>
+ <span class="n">func</span><span class="o">.</span><span class="n">coalesce</span><span class="p">(</span><span class="n">func</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span>
+ <span class="n">case</span><span class="p">([(</span><span class="n">TI</span><span class="o">.</span><span class="n">state</span> <span class="o">==</span> <span class="n">State</span><span class="o">.</span><span class="n">SKIPPED</span><span class="p">,</span> <span class="mi">1</span><span class="p">)],</span> <span class="n">else_</span><span class="o">=</span><span class="mi">0</span><span class="p">)),</span> <span class="mi">0</span><span class="p">),</span>
+ <span class="n">func</span><span class="o">.</span><span class="n">coalesce</span><span class="p">(</span><span class="n">func</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span>
+ <span class="n">case</span><span class="p">([(</span><span class="n">TI</span><span class="o">.</span><span class="n">state</span> <span class="o">==</span> <span class="n">State</span><span class="o">.</span><span class="n">FAILED</span><span class="p">,</span> <span class="mi">1</span><span class="p">)],</span> <span class="n">else_</span><span class="o">=</span><span class="mi">0</span><span class="p">)),</span> <span class="mi">0</span><span class="p">),</span>
+ <span class="n">func</span><span class="o">.</span><span class="n">coalesce</span><span class="p">(</span><span class="n">func</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span>
+ <span class="n">case</span><span class="p">([(</span><span class="n">TI</span><span class="o">.</span><span class="n">state</span> <span class="o">==</span> <span class="n">State</span><span class="o">.</span><span class="n">UPSTREAM_FAILED</span><span class="p">,</span> <span class="mi">1</span><span class="p">)],</span> <span class="n">else_</span><span class="o">=</span><span class="mi">0</span><span class="p">)),</span> <span class="mi">0</span><span class="p">),</span>
+ <span class="n">func</span><span class="o">.</span><span class="n">count</span><span class="p">(</span><span class="n">TI</span><span class="o">.</span><span class="n">task_id</span><span class="p">),</span>
+ <span class="p">)</span>
+ <span class="o">.</span><span class="n">filter</span><span class="p">(</span>
+ <span class="n">TI</span><span class="o">.</span><span class="n">dag_id</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">dag_id</span><span class="p">,</span>
+ <span class="n">TI</span><span class="o">.</span><span class="n">task_id</span><span class="o">.</span><span class="n">in_</span><span class="p">(</span><span class="n">task</span><span class="o">.</span><span class="n">upstream_task_ids</span><span class="p">),</span>
+ <span class="n">TI</span><span class="o">.</span><span class="n">execution_date</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">execution_date</span><span class="p">,</span>
+ <span class="n">TI</span><span class="o">.</span><span class="n">state</span><span class="o">.</span><span class="n">in_</span><span class="p">([</span>
+ <span class="n">State</span><span class="o">.</span><span class="n">SUCCESS</span><span class="p">,</span> <span class="n">State</span><span class="o">.</span><span class="n">FAILED</span><span class="p">,</span>
+ <span class="n">State</span><span class="o">.</span><span class="n">UPSTREAM_FAILED</span><span class="p">,</span> <span class="n">State</span><span class="o">.</span><span class="n">SKIPPED</span><span class="p">]),</span>
+ <span class="p">)</span>
+ <span class="p">)</span>
+
+ <span class="n">successes</span><span class="p">,</span> <span class="n">skipped</span><span class="p">,</span> <span class="n">failed</span><span class="p">,</span> <span class="n">upstream_failed</span><span class="p">,</span> <span class="n">done</span> <span class="o">=</span> <span class="n">qry</span><span class="o">.</span><span class="n">first</span><span class="p">()</span>
+
+ <span class="n">satisfied</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">evaluate_trigger_rule</span><span class="p">(</span>
+ <span class="n">session</span><span class="o">=</span><span class="n">session</span><span class="p">,</span> <span class="n">successes</span><span class="o">=</span><span class="n">successes</span><span class="p">,</span> <span class="n">skipped</span><span class="o">=</span><span class="n">skipped</span><span class="p">,</span>
+ <span class="n">failed</span><span class="o">=</span><span class="n">failed</span><span class="p">,</span> <span class="n">upstream_failed</span><span class="o">=</span><span class="n">upstream_failed</span><span class="p">,</span> <span class="n">done</span><span class="o">=</span><span class="n">done</span><span class="p">,</span>
+ <span class="n">flag_upstream_failed</span><span class="o">=</span><span class="n">flag_upstream_failed</span><span class="p">)</span>
+
+ <span class="k">if</span> <span class="n">verbose</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">satisfied</span><span class="p">:</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s2">"Trigger rule `{}` not satisfied"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">task</span><span class="o">.</span><span class="n">trigger_rule</span><span class="p">))</span>
+ <span class="k">return</span> <span class="n">satisfied</span></div>
+
+ <span class="k">def</span> <span class="nf">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="p">(</span>
+ <span class="s2">"<TaskInstance: {ti.dag_id}.{ti.task_id} "</span>
+ <span class="s2">"{ti.execution_date} [{ti.state}]>"</span>
+ <span class="p">)</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">ti</span><span class="o">=</span><span class="bp">self</span><span class="p">)</span>
+
+<div class="viewcode-block" id="TaskInstance.ready_for_retry"><a class="viewcode-back" href="../../code.html#airflow.models.TaskInstance.ready_for_retry">[docs]</a> <span class="k">def</span> <span class="nf">ready_for_retry</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Checks on whether the task instance is in the right state and timeframe</span>
+<span class="sd"> to be retried.</span>
+<span class="sd"> """</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">state</span> <span class="o">==</span> <span class="n">State</span><span class="o">.</span><span class="n">UP_FOR_RETRY</span> <span class="ow">and</span> \
+ <span class="bp">self</span><span class="o">.</span><span class="n">end_date</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">task</span><span class="o">.</span><span class="n">retry_delay</span> <span class="o"><</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span></div>
+
+ <span class="nd">@provide_session</span>
+<div class="viewcode-block" id="TaskInstance.pool_full"><a class="viewcode-back" href="../../code.html#airflow.models.TaskInstance.pool_full">[docs]</a> <span class="k">def</span> <span class="nf">pool_full</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">session</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Returns a boolean as to whether the slot pool has room for this</span>
+<span class="sd"> task to run</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">task</span><span class="o">.</span><span class="n">pool</span><span class="p">:</span>
+ <span class="k">return</span> <span class="bp">False</span>
+
+ <span class="n">pool</span> <span class="o">=</span> <span class="p">(</span>
+ <span class="n">session</span>
+ <span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">Pool</span><span class="p">)</span>
+ <span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">Pool</span><span class="o">.</span><span class="n">pool</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">task</span><span class="o">.</span><span class="n">pool</span><span class="p">)</span>
+ <span class="o">.</span><span class="n">first</span><span class="p">()</span>
+ <span class="p">)</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">pool</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
+ <span class="s2">"Task specified a pool ({}) but the pool "</span>
+ <span class="s2">"doesn't exist!"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">task</span><span class="o">.</span><span class="n">pool</span><span class="p">))</span>
+ <span class="n">open_slots</span> <span class="o">=</span> <span class="n">pool</span><span class="o">.</span><span class="n">open_slots</span><span class="p">(</span><span class="n">session</span><span class="o">=</span><span class="n">session</span><span class="p">)</span>
+
+ <span class="k">return</span> <span class="n">open_slots</span> <span class="o"><=</span> <span class="mi">0</span></div>
+
+ <span class="nd">@provide_session</span>
+<div class="viewcode-block" id="TaskInstance.run"><a class="viewcode-back" href="../../code.html#airflow.models.TaskInstance.run">[docs]</a> <span class="k">def</span> <span class="nf">run</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span>
+ <span class="n">verbose</span><span class="o">=</span><span class="bp">True</span><span class="p">,</span>
+ <span class="n">ignore_dependencies</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span> <span class="c1"># Doesn't check for deps, just runs</span>
+ <span class="n">ignore_depends_on_past</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span> <span class="c1"># Ignore depends_on_past but respect</span>
+ <span class="c1"># other deps</span>
+ <span class="n">force</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span> <span class="c1"># Disregards previous successes</span>
+ <span class="n">mark_success</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span> <span class="c1"># Don't run the task, act as if it succeeded</span>
+ <span class="n">test_mode</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span> <span class="c1"># Doesn't record success or failure in the DB</span>
+ <span class="n">job_id</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">pool</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">session</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Runs the task instance.</span>
+<span class="sd"> """</span>
+ <span class="n">task</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">task</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">pool</span> <span class="o">=</span> <span class="n">pool</span> <span class="ow">or</span> <span class="n">task</span><span class="o">.</span><span class="n">pool</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">test_mode</span> <span class="o">=</span> <span class="n">test_mode</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">force</span> <span class="o">=</span> <span class="n">force</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">refresh_from_db</span><span class="p">(</span><span class="n">session</span><span class="o">=</span><span class="n">session</span><span class="p">,</span> <span class="n">lock_for_update</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">clear_xcom_data</span><span class="p">()</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">job_id</span> <span class="o">=</span> <span class="n">job_id</span>
+ <span class="n">iso</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span><span class="o">.</span><span class="n">isoformat</span><span class="p">()</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">hostname</span> <span class="o">=</span> <span class="n">socket</span><span class="o">.</span><span class="n">getfqdn</span><span class="p">()</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">operator</span> <span class="o">=</span> <span class="n">task</span><span class="o">.</span><span class="n">__class__</span><span class="o">.</span><span class="n">__name__</span>
+
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">state</span> <span class="o">==</span> <span class="n">State</span><span class="o">.</span><span class="n">RUNNING</span><span class="p">:</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s2">"Another instance is running, skipping."</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">state</span> <span class="o">==</span> <span class="n">State</span><span class="o">.</span><span class="n">REMOVED</span><span class="p">:</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"Task {} was removed from the dag"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="p">))</span>
+ <span class="k">elif</span> <span class="ow">not</span> <span class="n">force</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">state</span> <span class="o">==</span> <span class="n">State</span><span class="o">.</span><span class="n">SUCCESS</span><span class="p">:</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span>
+ <span class="s2">"Task {self} previously succeeded"</span>
+ <span class="s2">" on {self.end_date}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="nb">locals</span><span class="p">())</span>
+ <span class="p">)</span>
+ <span class="n">Stats</span><span class="o">.</span><span class="n">incr</span><span class="p">(</span><span class="s1">'previously_succeeded'</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="p">(</span>
+ <span class="ow">not</span> <span class="n">ignore_dependencies</span> <span class="ow">and</span>
+ <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">are_dependencies_met</span><span class="p">(</span>
+ <span class="n">session</span><span class="o">=</span><span class="n">session</span><span class="p">,</span>
+ <span class="n">ignore_depends_on_past</span><span class="o">=</span><span class="n">ignore_depends_on_past</span><span class="p">,</span>
+ <span class="n">verbose</span><span class="o">=</span><span class="bp">True</span><span class="p">)):</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s2">"Dependencies not met yet"</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="p">(</span>
+ <span class="c1"># todo: move this to the scheduler</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">state</span> <span class="o">==</span> <span class="n">State</span><span class="o">.</span><span class="n">UP_FOR_RETRY</span> <span class="ow">and</span>
+ <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">ready_for_retry</span><span class="p">()):</span>
+ <span class="n">next_run</span> <span class="o">=</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">end_date</span> <span class="o">+</span> <span class="n">task</span><span class="o">.</span><span class="n">retry_delay</span><span class="p">)</span><span class="o">.</span><span class="n">isoformat</span><span class="p">()</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span>
+ <span class="s2">"Not ready for retry yet. "</span> <span class="o">+</span>
+ <span class="s2">"Next run after {0}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">next_run</span><span class="p">)</span>
+ <span class="p">)</span>
+ <span class="k">elif</span> <span class="n">force</span> <span class="ow">or</span> <span class="bp">self</span><span class="o">.</span><span class="n">state</span> <span class="ow">in</span> <span class="n">State</span><span class="o">.</span><span class="n">runnable</span><span class="p">():</span>
+ <span class="n">HR</span> <span class="o">=</span> <span class="s2">"</span><span class="se">\n</span><span class="s2">"</span> <span class="o">+</span> <span class="p">(</span><span class="s2">"-"</span> <span class="o">*</span> <span class="mi">80</span><span class="p">)</span> <span class="o">+</span> <span class="s2">"</span><span class="se">\n</span><span class="s2">"</span> <span class="c1"># Line break</span>
+
+ <span class="c1"># For reporting purposes, we report based on 1-indexed,</span>
+ <span class="c1"># not 0-indexed lists (i.e. Attempt 1 instead of</span>
+ <span class="c1"># Attempt 0 for the first attempt).</span>
+ <span class="n">msg</span> <span class="o">=</span> <span class="s2">"Starting attempt {attempt} of {total}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
+ <span class="n">attempt</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">try_number</span> <span class="o">%</span> <span class="p">(</span><span class="n">task</span><span class="o">.</span><span class="n">retries</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span> <span class="o">+</span> <span class="mi">1</span><span class="p">,</span>
+ <span class="n">total</span><span class="o">=</span><span class="n">task</span><span class="o">.</span><span class="n">retries</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">start_date</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span>
+
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">mark_success</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">state</span> <span class="o">!=</span> <span class="n">State</span><span class="o">.</span><span class="n">QUEUED</span> <span class="ow">and</span> <span class="p">(</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">pool</span> <span class="ow">or</span> <span class="bp">self</span><span class="o">.</span><span class="n">task</span><span class="o">.</span><span class="n">dag</span><span class="o">.</span><span class="n">concurrency_reached</span><span class="p">):</span>
+ <span class="c1"># If a pool is set for this task, marking the task instance</span>
+ <span class="c1"># as QUEUED</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">state</span> <span class="o">=</span> <span class="n">State</span><span class="o">.</span><span class="n">QUEUED</span>
+ <span class="n">msg</span> <span class="o">=</span> <span class="s2">"Queuing attempt {attempt} of {total}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
+ <span class="n">attempt</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">try_number</span> <span class="o">%</span> <span class="p">(</span><span class="n">task</span><span class="o">.</span><span class="n">retries</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span> <span class="o">+</span> <span class="mi">1</span><span class="p">,</span>
+ <span class="n">total</span><span class="o">=</span><span class="n">task</span><span class="o">.</span><span class="n">retries</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="n">HR</span> <span class="o">+</span> <span class="n">msg</span> <span class="o">+</span> <span class="n">HR</span><span class="p">)</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">queued_dttm</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">merge</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">"Queuing into pool {}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">pool</span><span class="p">))</span>
+ <span class="k">return</span>
+
+ <span class="c1"># print status message</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="n">HR</span> <span class="o">+</span> <span class="n">msg</span> <span class="o">+</span> <span class="n">HR</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">try_number</span> <span class="o">+=</span> <span class="mi">1</span>
+
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">test_mode</span><span class="p">:</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">Log</span><span class="p">(</span><span class="n">State</span><span class="o">.</span><span class="n">RUNNING</span><span class="p">,</span> <span class="bp">self</span><span class="p">))</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">state</span> <span class="o">=</span> <span class="n">State</span><span class="o">.</span><span class="n">RUNNING</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">end_date</span> <span class="o">=</span> <span class="bp">None</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">test_mode</span><span class="p">:</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">merge</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span>
+
+ <span class="c1"># Closing all pooled connections to prevent</span>
+ <span class="c1"># "max number of connections reached"</span>
+ <span class="n">settings</span><span class="o">.</span><span class="n">engine</span><span class="o">.</span><span class="n">dispose</span><span class="p">()</span>
+ <span class="k">if</span> <span class="n">verbose</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">mark_success</span><span class="p">:</span>
+ <span class="n">msg</span> <span class="o">=</span> <span class="s2">"Marking success for "</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">msg</span> <span class="o">=</span> <span class="s2">"Executing "</span>
+ <span class="n">msg</span> <span class="o">+=</span> <span class="s2">"{self.task} on {self.execution_date}"</span>
+
+ <span class="n">context</span> <span class="o">=</span> <span class="p">{}</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="n">msg</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">=</span><span class="bp">self</span><span class="p">))</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">mark_success</span><span class="p">:</span>
+ <span class="n">context</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_template_context</span><span class="p">()</span>
+
+ <span class="n">task_copy</span> <span class="o">=</span> <span class="n">copy</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">task</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">task</span> <span class="o">=</span> <span class="n">task_copy</span>
+
+ <span class="k">def</span> <span class="nf">signal_handler</span><span class="p">(</span><span class="n">signum</span><span class="p">,</span> <span class="n">frame</span><span class="p">):</span>
+ <span class="sd">'''Setting kill signal handler'''</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s2">"Killing subprocess"</span><span class="p">)</span>
+ <span class="n">task_copy</span><span class="o">.</span><span class="n">on_kill</span><span class="p">()</span>
+ <span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span><span class="s2">"Task received SIGTERM signal"</span><span class="p">)</span>
+ <span class="n">signal</span><span class="o">.</span><span class="n">signal</span><span class="p">(</span><span class="n">signal</span><span class="o">.</span><span class="n">SIGTERM</span><span class="p">,</span> <span class="n">signal_handler</span><span class="p">)</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">render_templates</span><span class="p">()</span>
+ <span class="n">task_copy</span><span class="o">.</span><span class="n">pre_execute</span><span class="p">(</span><span class="n">context</span><span class="o">=</span><span class="n">context</span><span class="p">)</span>
+
+ <span class="c1"># If a timout is specified for the task, make it fail</span>
+ <span class="c1"># if it goes beyond</span>
+ <span class="n">result</span> <span class="o">=</span> <span class="bp">None</span>
+ <span class="k">if</span> <span class="n">task_copy</span><span class="o">.</span><span class="n">execution_timeout</span><span class="p">:</span>
+ <span class="k">with</span> <span class="n">timeout</span><span class="p">(</span><span class="nb">int</span><span class="p">(</span>
+ <span class="n">task_copy</span><span class="o">.</span><span class="n">execution_timeout</span><span class="o">.</span><span class="n">total_seconds</span><span class="p">())):</span>
+ <span class="n">result</span> <span class="o">=</span> <span class="n">task_copy</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">context</span><span class="o">=</span><span class="n">context</span><span class="p">)</span>
+
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">result</span> <span class="o">=</span> <span class="n">task_copy</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">context</span><span class="o">=</span><span class="n">context</span><span class="p">)</span>
+
+ <span class="c1"># If the task returns a result, push an XCom containing it</span>
+ <span class="k">if</span> <span class="n">result</span> <span class="ow">is</span> <span class="ow">not</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">xcom_push</span><span class="p">(</span><span class="n">key</span><span class="o">=</span><span class="n">XCOM_RETURN_KEY</span><span class="p">,</span> <span class="n">value</span><span class="o">=</span><span class="n">result</span><span class="p">)</span>
+
+ <span class="n">task_copy</span><span class="o">.</span><span class="n">post_execute</span><span class="p">(</span><span class="n">context</span><span class="o">=</span><span class="n">context</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">state</span> <span class="o">=</span> <span class="n">State</span><span class="o">.</span><span class="n">SUCCESS</span>
+ <span class="k">except</span> <span class="n">AirflowSkipException</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">state</span> <span class="o">=</span> <span class="n">State</span><span class="o">.</span><span class="n">SKIPPED</span>
+ <span class="k">except</span> <span class="p">(</span><span class="ne">Exception</span><span class="p">,</span> <span class="ne">KeyboardInterrupt</span><span class="p">)</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">handle_failure</span><span class="p">(</span><span class="n">e</span><span class="p">,</span> <span class="n">test_mode</span><span class="p">,</span> <span class="n">context</span><span class="p">)</span>
+ <span class="k">raise</span>
+
+ <span class="c1"># Recording SUCCESS</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">end_date</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">set_duration</span><span class="p">()</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">test_mode</span><span class="p">:</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">Log</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">state</span><span class="p">,</span> <span class="bp">self</span><span class="p">))</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">merge</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span>
+
+ <span class="c1"># Success callback</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">task</span><span class="o">.</span><span class="n">on_success_callback</span><span class="p">:</span>
+ <span class="n">task</span><span class="o">.</span><span class="n">on_success_callback</span><span class="p">(</span><span class="n">context</span><span class="p">)</span>
+ <span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e3</span><span class="p">:</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s2">"Failed when executing success callback"</span><span class="p">)</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">exception</span><span class="p">(</span><span class="n">e3</span><span class="p">)</span>
+
+ <span class="n">session</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span></div>
+
+ <span class="k">def</span> <span class="nf">dry_run</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="n">task</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">task</span>
+ <span class="n">task_copy</span> <span class="o">=</span> <span class="n">copy</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">task</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">task</span> <span class="o">=</span> <span class="n">task_copy</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">render_templates</span><span class="p">()</span>
+ <span class="n">task_copy</span><span class="o">.</span><span class="n">dry_run</span><span class="p">()</span>
+
+ <span class="k">def</span> <span class="nf">handle_failure</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">error</span><span class="p">,</span> <span class="n">test_mode</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span> <span class="n">context</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">exception</span><span class="p">(</span><span class="n">error</span><span class="p">)</span>
+ <span class="n">task</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">task</span>
+ <span class="n">session</span> <span class="o">=</span> <span class="n">settings</span><span class="o">.</span><span class="n">Session</span><span class="p">()</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">end_date</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">set_duration</span><span class="p">()</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">test_mode</span><span class="p">:</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">Log</span><span class="p">(</span><span class="n">State</span><span class="o">.</span><span class="n">FAILED</span><span class="p">,</span> <span class="bp">self</span><span class="p">))</span>
+
+ <span class="c1"># Let's go deeper</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">task</span><span class="o">.</span><span class="n">retries</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">try_number</span> <span class="o">%</span> <span class="p">(</span><span class="n">task</span><span class="o">.</span><span class="n">retries</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span> <span class="o">!=</span> <span class="mi">0</span>< [...]
+ <span class="bp">self</span><span class="o">.</span><span class="n">state</span> <span class="o">=</span> <span class="n">State</span><span class="o">.</span><span class="n">UP_FOR_RETRY</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Marking task as UP_FOR_RETRY'</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">task</span><span class="o">.</span><span class="n">email_on_retry</span> <span class="ow">and</span> <span class="n">task</span><span class="o">.</span><span class="n">email</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">email_alert</span><span class="p">(</span><span class="n">error</span><span class="p">,</span> <span class="n">is_retry</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">state</span> <span class="o">=</span> <span class="n">State</span><span class="o">.</span><span class="n">FAILED</span>
+ <span class="k">if</span> <span class="n">task</span><span class="o">.</span><span class="n">retries</span><span class="p">:</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'All retries failed; marking task as FAILED'</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Marking task as FAILED.'</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">task</span><span class="o">.</span><span class="n">email_on_failure</span> <span class="ow">and</span> <span class="n">task</span><span class="o">.</span><span class="n">email</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">email_alert</span><span class="p">(</span><span class="n">error</span><span class="p">,</span> <span class="n">is_retry</span><span class="o">=</span><span class="bp">False</span><span class="p">)</span>
+ <span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e2</span><span class="p">:</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">error</span><span class="p">(</span>
+ <span class="s1">'Failed to send email to: '</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">task</span><span class="o">.</span><span class="n">email</span><span class="p">))</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">exception</span><span class="p">(</span><span class="n">e2</span><span class="p">)</span>
+
+ <span class="c1"># Handling callbacks pessimistically</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">state</span> <span class="o">==</span> <span class="n">State</span><span class="o">.</span><span class="n">UP_FOR_RETRY</span> <span class="ow">and</span> <span class="n">task</span><span class="o">.</span><span class="n">on_retry_callback</span><span class="p">:</span>
+ <span class="n">task</span><span class="o">.</span><span class="n">on_retry_callback</span><span class="p">(</span><span class="n">context</span><span class="p">)</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">state</span> <span class="o">==</span> <span class="n">State</span><span class="o">.</span><span class="n">FAILED</span> <span class="ow">and</span> <span class="n">task</span><span class="o">.</span><span class="n">on_failure_callback</span><span class="p">:</span>
+ <span class="n">task</span><span class="o">.</span><span class="n">on_failure_callback</span><span class="p">(</span><span class="n">context</span><span class="p">)</span>
+ <span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e3</span><span class="p">:</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s2">"Failed at executing callback"</span><span class="p">)</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">exception</span><span class="p">(</span><span class="n">e3</span><span class="p">)</span>
+
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">test_mode</span><span class="p">:</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">merge</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">error</span><span class="p">))</span>
+
+ <span class="nd">@provide_session</span>
+ <span class="k">def</span> <span class="nf">get_template_context</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">session</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="n">task</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">task</span>
+ <span class="kn">from</span> <span class="nn">airflow</span> <span class="kn">import</span> <span class="n">macros</span>
+ <span class="n">tables</span> <span class="o">=</span> <span class="bp">None</span>
+ <span class="k">if</span> <span class="s1">'tables'</span> <span class="ow">in</span> <span class="n">task</span><span class="o">.</span><span class="n">params</span><span class="p">:</span>
+ <span class="n">tables</span> <span class="o">=</span> <span class="n">task</span><span class="o">.</span><span class="n">params</span><span class="p">[</span><span class="s1">'tables'</span><span class="p">]</span>
+
+ <span class="n">ds</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">execution_date</span><span class="o">.</span><span class="n">isoformat</span><span class="p">()[:</span><span class="mi">10</span><span class="p">]</span>
+ <span class="n">ts</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">execution_date</span><span class="o">.</span><span class="n">isoformat</span><span class="p">()</span>
+ <span class="n">yesterday_ds</span> <span class="o">=</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">execution_date</span> <span class="o">-</span> <span class="n">timedelta</span><span class="p">(</span><span class="mi">1</span><span class="p">))</span><span class="o">.</span><span class="n">isoformat</span><span class="p">()[:</span><span class="mi">10</span><span class="p">]</span>
+ <span class="n">tomorrow_ds</span> <span class="o">=</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">execution_date</span> <span class="o">+</span> <span class="n">timedelta</span><span class="p">(</span><span class="mi">1</span><span class="p">))</span><span class="o">.</span><span class="n">isoformat</span><span class="p">()[:</span><span class="mi">10</span><span class="p">]</span>
+
+ <span class="n">ds_nodash</span> <span class="o">=</span> <span class="n">ds</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'-'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span>
+ <span class="n">ts_nodash</span> <span class="o">=</span> <span class="n">ts</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'-'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">':'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span>
+ <span class="n">yesterday_ds_nodash</span> <span class="o">=</span> <span class="n">yesterday_ds</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'-'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span>
+ <span class="n">tomorrow_ds_nodash</span> <span class="o">=</span> <span class="n">tomorrow_ds</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'-'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span>
+
+ <span class="n">ti_key_str</span> <span class="o">=</span> <span class="s2">"{task.dag_id}__{task.task_id}__{ds_nodash}"</span>
+ <span class="n">ti_key_str</span> <span class="o">=</span> <span class="n">ti_key_str</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="nb">locals</span><span class="p">())</span>
+
+ <span class="n">params</span> <span class="o">=</span> <span class="p">{}</span>
+ <span class="n">run_id</span> <span class="o">=</span> <span class="s1">''</span>
+ <span class="n">dag_run</span> <span class="o">=</span> <span class="bp">None</span>
+ <span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">task</span><span class="p">,</span> <span class="s1">'dag'</span><span class="p">):</span>
+ <span class="k">if</span> <span class="n">task</span><span class="o">.</span><span class="n">dag</span><span class="o">.</span><span class="n">params</span><span class="p">:</span>
+ <span class="n">params</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">task</span><span class="o">.</span><span class="n">dag</span><span class="o">.</span><span class="n">params</span><span class="p">)</span>
+ <span class="n">dag_run</span> <span class="o">=</span> <span class="p">(</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">DagRun</span><span class="p">)</span>
+ <span class="o">.</span><span class="n">filter_by</span><span class="p">(</span>
+ <span class="n">dag_id</span><span class="o">=</span><span class="n">task</span><span class="o">.</span><span class="n">dag</span><span class="o">.</span><span class="n">dag_id</span><span class="p">,</span>
+ <span class="n">execution_date</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">execution_date</span><span class="p">)</span>
+ <span class="o">.</span><span class="n">first</span><span class="p">()</span>
+ <span class="p">)</span>
+ <span class="n">run_id</span> <span class="o">=</span> <span class="n">dag_run</span><span class="o">.</span><span class="n">run_id</span> <span class="k">if</span> <span class="n">dag_run</span> <span class="k">else</span> <span class="bp">None</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">expunge_all</span><span class="p">()</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span>
+
+ <span class="k">if</span> <span class="n">task</span><span class="o">.</span><span class="n">params</span><span class="p">:</span>
+ <span class="n">params</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">task</span><span class="o">.</span><span class="n">params</span><span class="p">)</span>
+
+ <span class="k">return</span> <span class="p">{</span>
+ <span class="s1">'dag'</span><span class="p">:</span> <span class="n">task</span><span class="o">.</span><span class="n">dag</span><span class="p">,</span>
+ <span class="s1">'ds'</span><span class="p">:</span> <span class="n">ds</span><span class="p">,</span>
+ <span class="s1">'ds_nodash'</span><span class="p">:</span> <span class="n">ds_nodash</span><span class="p">,</span>
+ <span class="s1">'ts'</span><span class="p">:</span> <span class="n">ts</span><span class="p">,</span>
+ <span class="s1">'ts_nodash'</span><span class="p">:</span> <span class="n">ts_nodash</span><span class="p">,</span>
+ <span class="s1">'yesterday_ds'</span><span class="p">:</span> <span class="n">yesterday_ds</span><span class="p">,</span>
+ <span class="s1">'yesterday_ds_nodash'</span><span class="p">:</span> <span class="n">yesterday_ds_nodash</span><span class="p">,</span>
+ <span class="s1">'tomorrow_ds'</span><span class="p">:</span> <span class="n">tomorrow_ds</span><span class="p">,</span>
+ <span class="s1">'tomorrow_ds_nodash'</span><span class="p">:</span> <span class="n">tomorrow_ds_nodash</span><span class="p">,</span>
+ <span class="s1">'END_DATE'</span><span class="p">:</span> <span class="n">ds</span><span class="p">,</span>
+ <span class="s1">'end_date'</span><span class="p">:</span> <span class="n">ds</span><span class="p">,</span>
+ <span class="s1">'dag_run'</span><span class="p">:</span> <span class="n">dag_run</span><span class="p">,</span>
+ <span class="s1">'run_id'</span><span class="p">:</span> <span class="n">run_id</span><span class="p">,</span>
+ <span class="s1">'execution_date'</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">execution_date</span><span class="p">,</span>
+ <span class="s1">'latest_date'</span><span class="p">:</span> <span class="n">ds</span><span class="p">,</span>
+ <span class="s1">'macros'</span><span class="p">:</span> <span class="n">macros</span><span class="p">,</span>
+ <span class="s1">'params'</span><span class="p">:</span> <span class="n">params</span><span class="p">,</span>
+ <span class="s1">'tables'</span><span class="p">:</span> <span class="n">tables</span><span class="p">,</span>
+ <span class="s1">'task'</span><span class="p">:</span> <span class="n">task</span><span class="p">,</span>
+ <span class="s1">'task_instance'</span><span class="p">:</span> <span class="bp">self</span><span class="p">,</span>
+ <span class="s1">'ti'</span><span class="p">:</span> <span class="bp">self</span><span class="p">,</span>
+ <span class="s1">'task_instance_key_str'</span><span class="p">:</span> <span class="n">ti_key_str</span><span class="p">,</span>
+ <span class="s1">'conf'</span><span class="p">:</span> <span class="n">configuration</span><span class="p">,</span>
+ <span class="s1">'test_mode'</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">test_mode</span><span class="p">,</span>
+ <span class="p">}</span>
+
+ <span class="k">def</span> <span class="nf">render_templates</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="n">task</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">task</span>
+ <span class="n">jinja_context</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_template_context</span><span class="p">()</span>
+ <span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s1">'task'</span><span class="p">)</span> <span class="ow">and</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">task</span><span class="p">,</span> <span class="s1">'dag'</span><span class="p">):</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">task</span><span class="o">.</span><span class="n">dag</span><span class="o">.</span><span class="n">user_defined_macros</span><span class="p">:</span>
+ <span class="n">jinja_context</span><span class="o">.</span><span class="n">update</span><span class="p">(</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">task</span><span class="o">.</span><span class="n">dag</span><span class="o">.</span><span class="n">user_defined_macros</span><span class="p">)</span>
+
+ <span class="n">rt</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">task</span><span class="o">.</span><span class="n">render_template</span> <span class="c1"># shortcut to method</span>
+ <span class="k">for</span> <span class="n">attr</span> <span class="ow">in</span> <span class="n">task</span><span class="o">.</span><span class="n">__class__</span><span class="o">.</span><span class="n">template_fields</span><span class="p">:</span>
+ <span class="n">content</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">task</span><span class="p">,</span> <span class="n">attr</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">content</span><span class="p">:</span>
+ <span class="n">rendered_content</span> <span class="o">=</span> <span class="n">rt</span><span class="p">(</span><span class="n">attr</span><span class="p">,</span> <span class="n">content</span><span class="p">,</span> <span class="n">jinja_context</span><span class="p">)</span>
+ <span class="nb">setattr</span><span class="p">(</span><span class="n">task</span><span class="p">,</span> <span class="n">attr</span><span class="p">,</span> <span class="n">rendered_content</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">email_alert</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">exception</span><span class="p">,</span> <span class="n">is_retry</span><span class="o">=</span><span class="bp">False</span><span class="p">):</span>
+ <span class="n">task</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">task</span>
+ <span class="n">title</span> <span class="o">=</span> <span class="s2">"Airflow alert: {self}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="nb">locals</span><span class="p">())</span>
+ <span class="n">exception</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">exception</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="se">\n</span><span class="s1">'</span><span class="p">,</span> <span class="s1">'<br>'</span><span class="p">)</span>
+ <span class="n">try_</span> <span class="o">=</span> <span class="n">task</span><span class="o">.</span><span class="n">retries</span> <span class="o">+</span> <span class="mi">1</span>
+ <span class="n">body</span> <span class="o">=</span> <span class="p">(</span>
+ <span class="s2">"Try {self.try_number} out of {try_}<br>"</span>
+ <span class="s2">"Exception:<br>{exception}<br>"</span>
+ <span class="s2">"Log: <a href='{self.log_url}'>Link</a><br>"</span>
+ <span class="s2">"Host: {self.hostname}<br>"</span>
+ <span class="s2">"Log file: {self.log_filepath}<br>"</span>
+ <span class="s2">"Mark success: <a href='{self.mark_success_url}'>Link</a><br>"</span>
+ <span class="p">)</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="nb">locals</span><span class="p">())</span>
+ <span class="n">send_email</span><span class="p">(</span><span class="n">task</span><span class="o">.</span><span class="n">email</span><span class="p">,</span> <span class="n">title</span><span class="p">,</span> <span class="n">body</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">set_duration</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">end_date</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">start_date</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">duration</span> <span class="o">=</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">end_date</span> <span class="o">-</span> <span class="bp">self</span><span class="o">.</span><span class="n">start_date</span><span class="p">)</span><span class="o">.</span><span class="n">total_seconds</span><span class="p">()</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">duration</span> <span class="o">=</span> <span class="bp">None</span>
+
+<div class="viewcode-block" id="TaskInstance.xcom_push"><a class="viewcode-back" href="../../code.html#airflow.models.TaskInstance.xcom_push">[docs]</a> <span class="k">def</span> <span class="nf">xcom_push</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span>
+ <span class="n">key</span><span class="p">,</span>
+ <span class="n">value</span><span class="p">,</span>
+ <span class="n">execution_date</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Make an XCom available for tasks to pull.</span>
+
+<span class="sd"> :param key: A key for the XCom</span>
+<span class="sd"> :type key: string</span>
+<span class="sd"> :param value: A value for the XCom. The value is pickled and stored</span>
+<span class="sd"> in the database.</span>
+<span class="sd"> :type value: any pickleable object</span>
+<span class="sd"> :param execution_date: if provided, the XCom will not be visible until</span>
+<span class="sd"> this date. This can be used, for example, to send a message to a</span>
+<span class="sd"> task on a future date without it being immediately visible.</span>
+<span class="sd"> :type execution_date: datetime</span>
+<span class="sd"> """</span>
+
+ <span class="k">if</span> <span class="n">execution_date</span> <span class="ow">and</span> <span class="n">execution_date</span> <span class="o"><</span> <span class="bp">self</span><span class="o">.</span><span class="n">execution_date</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
+ <span class="s1">'execution_date can not be in the past (current '</span>
+ <span class="s1">'execution_date is {}; received {})'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">execution_date</span><span class="p">,</span> <span class="n">execution_date</span><span class="p">))</span>
+
+ <span class="n">XCom</span><span class="o">.</span><span class="n">set</span><span class="p">(</span>
+ <span class="n">key</span><span class="o">=</span><span class="n">key</span><span class="p">,</span>
+ <span class="n">value</span><span class="o">=</span><span class="n">value</span><span class="p">,</span>
+ <span class="n">task_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">task_id</span><span class="p">,</span>
+ <span class="n">dag_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">dag_id</span><span class="p">,</span>
+ <span class="n">execution_date</span><span class="o">=</span><span class="n">execution_date</span> <span class="ow">or</span> <span class="bp">self</span><span class="o">.</span><span class="n">execution_date</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="TaskInstance.xcom_pull"><a class="viewcode-back" href="../../code.html#airflow.models.TaskInstance.xcom_pull">[docs]</a> <span class="k">def</span> <span class="nf">xcom_pull</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span>
+ <span class="n">task_ids</span><span class="p">,</span>
+ <span class="n">dag_id</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">key</span><span class="o">=</span><span class="n">XCOM_RETURN_KEY</span><span class="p">,</span>
+ <span class="n">include_prior_dates</span><span class="o">=</span><span class="bp">False</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Pull XComs that optionally meet certain criteria.</span>
+
+<span class="sd"> The default value for `key` limits the search to XComs</span>
+<span class="sd"> that were returned by other tasks (as opposed to those that were pushed</span>
+<span class="sd"> manually). To remove this filter, pass key=None (or any desired value).</span>
+
+<span class="sd"> If a single task_id string is provided, the result is the value of the</span>
+<span class="sd"> most recent matching XCom from that task_id. If multiple task_ids are</span>
+<span class="sd"> provided, a tuple of matching values is returned. None is returned</span>
+<span class="sd"> whenever no matches are found.</span>
+
+<span class="sd"> :param key: A key for the XCom. If provided, only XComs with matching</span>
+<span class="sd"> keys will be returned. The default key is 'return_value', also</span>
+<span class="sd"> available as a constant XCOM_RETURN_KEY. This key is automatically</span>
+<span class="sd"> given to XComs returned by tasks (as opposed to being pushed</span>
+<span class="sd"> manually). To remove the filter, pass key=None.</span>
+<span class="sd"> :type key: string</span>
+<span class="sd"> :param task_ids: Only XComs from tasks with matching ids will be</span>
+<span class="sd"> pulled. Can pass None to remove the filter.</span>
+<span class="sd"> :type task_ids: string or iterable of strings (representing task_ids)</span>
+<span class="sd"> :param dag_id: If provided, only pulls XComs from this DAG.</span>
+<span class="sd"> If None (default), the DAG of the calling task is used.</span>
+<span class="sd"> :type dag_id: string</span>
+<span class="sd"> :param include_prior_dates: If False, only XComs from the current</span>
+<span class="sd"> execution_date are returned. If True, XComs from previous dates</span>
+<span class="sd"> are returned as well.</span>
+<span class="sd"> :type include_prior_dates: bool</span>
+<span class="sd"> """</span>
+
+ <span class="k">if</span> <span class="n">dag_id</span> <span class="ow">is</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="n">dag_id</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">dag_id</span>
+
+ <span class="n">pull_fn</span> <span class="o">=</span> <span class="n">functools</span><span class="o">.</span><span class="n">partial</span><span class="p">(</span>
+ <span class="n">XCom</span><span class="o">.</span><span class="n">get_one</span><span class="p">,</span>
+ <span class="n">execution_date</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">execution_date</span><span class="p">,</span>
+ <span class="n">key</span><span class="o">=</span><span class="n">key</span><span class="p">,</span>
+ <span class="n">dag_id</span><span class="o">=</span><span class="n">dag_id</span><span class="p">,</span>
+ <span class="n">include_prior_dates</span><span class="o">=</span><span class="n">include_prior_dates</span><span class="p">)</span>
+
+ <span class="k">if</span> <span class="n">is_container</span><span class="p">(</span><span class="n">task_ids</span><span class="p">):</span>
+ <span class="k">return</span> <span class="nb">tuple</span><span class="p">(</span><span class="n">pull_fn</span><span class="p">(</span><span class="n">task_id</span><span class="o">=</span><span class="n">t</span><span class="p">)</span> <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="n">task_ids</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">pull_fn</span><span class="p">(</span><span class="n">task_id</span><span class="o">=</span><span class="n">task_ids</span><span class="p">)</span></div></div>
+
+
+<span class="k">class</span> <span class="nc">Log</span><span class="p">(</span><span class="n">Base</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Used to actively log events to the database</span>
+<span class="sd"> """</span>
+
+ <span class="n">__tablename__</span> <span class="o">=</span> <span class="s2">"log"</span>
+
+ <span class="nb">id</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Integer</span><span class="p">,</span> <span class="n">primary_key</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+ <span class="n">dttm</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">DateTime</span><span class="p">)</span>
+ <span class="n">dag_id</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">String</span><span class="p">(</span><span class="n">ID_LEN</span><span class="p">))</span>
+ <span class="n">task_id</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">String</span><span class="p">(</span><span class="n">ID_LEN</span><span class="p">))</span>
+ <span class="n">event</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">String</span><span class="p">(</span><span class="mi">30</span><span class="p">))</span>
+ <span class="n">execution_date</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">DateTime</span><span class="p">)</span>
+ <span class="n">owner</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">String</span><span class="p">(</span><span class="mi">500</span><span class="p">))</span>
+ <span class="n">extra</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Text</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">event</span><span class="p">,</span> <span class="n">task_instance</span><span class="p">,</span> <span class="n">owner</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="n">extra</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="o">**</span><sp [...]
+ <span class="bp">self</span><span class="o">.</span><span class="n">dttm</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">event</span> <span class="o">=</span> <span class="n">event</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">extra</span> <span class="o">=</span> <span class="n">extra</span>
+
+ <span class="n">task_owner</span> <span class="o">=</span> <span class="bp">None</span>
+
+ <span class="k">if</span> <span class="n">task_instance</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">dag_id</span> <span class="o">=</span> <span class="n">task_instance</span><span class="o">.</span><span class="n">dag_id</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">task_id</span> <span class="o">=</span> <span class="n">task_instance</span><span class="o">.</span><span class="n">task_id</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">execution_date</span> <span class="o">=</span> <span class="n">task_instance</span><span class="o">.</span><span class="n">execution_date</span>
+ <span class="n">task_owner</span> <span class="o">=</span> <span class="n">task_instance</span><span class="o">.</span><span class="n">task</span><span class="o">.</span><span class="n">owner</span>
+
+ <span class="k">if</span> <span class="s1">'task_id'</span> <span class="ow">in</span> <span class="n">kwargs</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">task_id</span> <span class="o">=</span> <span class="n">kwargs</span><span class="p">[</span><span class="s1">'task_id'</span><span class="p">]</span>
+ <span class="k">if</span> <span class="s1">'dag_id'</span> <span class="ow">in</span> <span class="n">kwargs</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">dag_id</span> <span class="o">=</span> <span class="n">kwargs</span><span class="p">[</span><span class="s1">'dag_id'</span><span class="p">]</span>
+ <span class="k">if</span> <span class="s1">'execution_date'</span> <span class="ow">in</span> <span class="n">kwargs</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">kwargs</span><span class="p">[</span><span class="s1">'execution_date'</span><span class="p">]:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">execution_date</span> <span class="o">=</span> <span class="n">kwargs</span><span class="p">[</span><span class="s1">'execution_date'</span><span class="p">]</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">owner</span> <span class="o">=</span> <span class="n">owner</span> <span class="ow">or</span> <span class="n">task_owner</span>
+
+
+<span class="nd">@functools.total_ordering</span>
+<div class="viewcode-block" id="BaseOperator"><a class="viewcode-back" href="../../code.html#airflow.models.BaseOperator">[docs]</a><span class="k">class</span> <span class="nc">BaseOperator</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Abstract base class for all operators. Since operators create objects that</span>
+<span class="sd"> become node in the dag, BaseOperator contains many recursive methods for</span>
+<span class="sd"> dag crawling behavior. To derive this class, you are expected to override</span>
+<span class="sd"> the constructor as well as the 'execute' method.</span>
+
+<span class="sd"> Operators derived from this task should perform or trigger certain tasks</span>
+<span class="sd"> synchronously (wait for completion). Example of operators could be an</span>
+<span class="sd"> operator the runs a Pig job (PigOperator), a sensor operator that</span>
+<span class="sd"> waits for a partition to land in Hive (HiveSensorOperator), or one that</span>
+<span class="sd"> moves data from Hive to MySQL (Hive2MySqlOperator). Instances of these</span>
+<span class="sd"> operators (tasks) target specific operations, running specific scripts,</span>
+<span class="sd"> functions or data transfers.</span>
+
+<span class="sd"> This class is abstract and shouldn't be instantiated. Instantiating a</span>
+<span class="sd"> class derived from this one results in the creation of a task object,</span>
+<span class="sd"> which ultimately becomes a node in DAG objects. Task dependencies should</span>
+<span class="sd"> be set by using the set_upstream and/or set_downstream methods.</span>
+
+<span class="sd"> Note that this class is derived from SQLAlchemy's Base class, which</span>
+<span class="sd"> allows us to push metadata regarding tasks to the database. Deriving this</span>
+<span class="sd"> classes needs to implement the polymorphic specificities documented in</span>
+<span class="sd"> SQLAlchemy. This should become clear while reading the code for other</span>
+<span class="sd"> operators.</span>
+
+<span class="sd"> :param task_id: a unique, meaningful id for the task</span>
+<span class="sd"> :type task_id: string</span>
+<span class="sd"> :param owner: the owner of the task, using the unix username is recommended</span>
+<span class="sd"> :type owner: string</span>
+<span class="sd"> :param retries: the number of retries that should be performed before</span>
+<span class="sd"> failing the task</span>
+<span class="sd"> :type retries: int</span>
+<span class="sd"> :param retry_delay: delay between retries</span>
+<span class="sd"> :type retry_delay: timedelta</span>
+<span class="sd"> :param start_date: The ``start_date`` for the task, determines</span>
+<span class="sd"> the ``execution_date`` for the first task instance. The best practice</span>
+<span class="sd"> is to have the start_date rounded</span>
+<span class="sd"> to your DAG's ``schedule_interval``. Daily jobs have their start_date</span>
+<span class="sd"> some day at 00:00:00, hourly jobs have their start_date at 00:00</span>
+<span class="sd"> of a specific hour. Note that Airflow simply looks at the latest</span>
+<span class="sd"> ``execution_date`` and adds the ``schedule_interval`` to determine</span>
+<span class="sd"> the next ``execution_date``. It is also very important</span>
+<span class="sd"> to note that different tasks' dependencies</span>
+<span class="sd"> need to line up in time. If task A depends on task B and their</span>
+<span class="sd"> start_date are offset in a way that their execution_date don't line</span>
+<span class="sd"> up, A's dependencies will never be met. If you are looking to delay</span>
+<span class="sd"> a task, for example running a daily task at 2AM, look into the</span>
+<span class="sd"> ``TimeSensor`` and ``TimeDeltaSensor``. We advise against using</span>
+<span class="sd"> dynamic ``start_date`` and recommend using fixed ones. Read the</span>
+<span class="sd"> FAQ entry about start_date for more information.</span>
+<span class="sd"> :type start_date: datetime</span>
+<span class="sd"> :param end_date: if specified, the scheduler won't go beyond this date</span>
+<span class="sd"> :type end_date: datetime</span>
+<span class="sd"> :param depends_on_past: when set to true, task instances will run</span>
+<span class="sd"> sequentially while relying on the previous task's schedule to</span>
+<span class="sd"> succeed. The task instance for the start_date is allowed to run.</span>
+<span class="sd"> :type depends_on_past: bool</span>
+<span class="sd"> :param wait_for_downstream: when set to true, an instance of task</span>
+<span class="sd"> X will wait for tasks immediately downstream of the previous instance</span>
+<span class="sd"> of task X to finish successfully before it runs. This is useful if the</span>
+<span class="sd"> different instances of a task X alter the same asset, and this asset</span>
+<span class="sd"> is used by tasks downstream of task X. Note that depends_on_past</span>
+<span class="sd"> is forced to True wherever wait_for_downstream is used.</span>
+<span class="sd"> :type wait_for_downstream: bool</span>
+<span class="sd"> :param queue: which queue to target when running this job. Not</span>
+<span class="sd"> all executors implement queue management, the CeleryExecutor</span>
+<span class="sd"> does support targeting specific queues.</span>
+<span class="sd"> :type queue: str</span>
+<span class="sd"> :param dag: a reference to the dag the task is attached to (if any)</span>
+<span class="sd"> :type dag: DAG</span>
+<span class="sd"> :param priority_weight: priority weight of this task against other task.</span>
+<span class="sd"> This allows the executor to trigger higher priority tasks before</span>
+<span class="sd"> others when things get backed up.</span>
+<span class="sd"> :type priority_weight: int</span>
+<span class="sd"> :param pool: the slot pool this task should run in, slot pools are a</span>
+<span class="sd"> way to limit concurrency for certain tasks</span>
+<span class="sd"> :type pool: str</span>
+<span class="sd"> :param sla: time by which the job is expected to succeed. Note that</span>
+<span class="sd"> this represents the ``timedelta`` after the period is closed. For</span>
+<span class="sd"> example if you set an SLA of 1 hour, the scheduler would send dan email</span>
+<span class="sd"> soon after 1:00AM on the ``2016-01-02`` if the ``2016-01-01`` instance</span>
+<span class="sd"> has not succeede yet.</span>
+<span class="sd"> The scheduler pays special attention for jobs with an SLA and</span>
+<span class="sd"> sends alert</span>
+<span class="sd"> emails for sla misses. SLA misses are also recorded in the database</span>
+<span class="sd"> for future reference. All tasks that share the same SLA time</span>
+<span class="sd"> get bundled in a single email, sent soon after that time. SLA</span>
+<span class="sd"> notification are sent once and only once for each task instance.</span>
+<span class="sd"> :type sla: datetime.timedelta</span>
+<span class="sd"> :param execution_timeout: max time allowed for the execution of</span>
+<span class="sd"> this task instance, if it goes beyond it will raise and fail.</span>
+<span class="sd"> :type execution_timeout: datetime.timedelta</span>
+<span class="sd"> :param on_failure_callback: a function to be called when a task instance</span>
+<span class="sd"> of this task fails. a context dictionary is passed as a single</span>
+<span class="sd"> parameter to this function. Context contains references to related</span>
+<span class="sd"> objects to the task instance and is documented under the macros</span>
+<span class="sd"> section of the API.</span>
+<span class="sd"> :type on_failure_callback: callable</span>
+<span class="sd"> :param on_retry_callback: much like the ``on_failure_callback`` excepts</span>
+<span class="sd"> that it is executed when retries occur.</span>
+<span class="sd"> :param on_success_callback: much like the ``on_failure_callback`` excepts</span>
+<span class="sd"> that it is executed when the task succeeds.</span>
+<span class="sd"> :type on_success_callback: callable</span>
+<span class="sd"> :param trigger_rule: defines the rule by which dependencies are applied</span>
+<span class="sd"> for the task to get triggered. Options are:</span>
+<span class="sd"> ``{ all_success | all_failed | all_done | one_success |</span>
+<span class="sd"> one_failed | dummy}``</span>
+<span class="sd"> default is ``all_success``. Options can be set as string or</span>
+<span class="sd"> using the constants defined in the static class</span>
+<span class="sd"> ``airflow.utils.TriggerRule``</span>
+<span class="sd"> :type trigger_rule: str</span>
+<span class="sd"> """</span>
+
+ <span class="c1"># For derived classes to define which fields will get jinjaified</span>
+ <span class="n">template_fields</span> <span class="o">=</span> <span class="p">[]</span>
+ <span class="c1"># Defines wich files extensions to look for in the templated fields</span>
+ <span class="n">template_ext</span> <span class="o">=</span> <span class="p">[]</span>
+ <span class="c1"># Defines the color in the UI</span>
+ <span class="n">ui_color</span> <span class="o">=</span> <span class="s1">'#fff'</span>
+ <span class="n">ui_fgcolor</span> <span class="o">=</span> <span class="s1">'#000'</span>
+
+ <span class="nd">@apply_defaults</span>
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span>
+ <span class="n">task_id</span><span class="p">,</span>
+ <span class="n">owner</span><span class="o">=</span><span class="n">configuration</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'operators'</span><span class="p">,</span> <span class="s1">'DEFAULT_OWNER'</span><span class="p">),</span>
+ <span class="n">email</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">email_on_retry</span><span class="o">=</span><span class="bp">True</span><span class="p">,</span>
+ <span class="n">email_on_failure</span><span class="o">=</span><span class="bp">True</span><span class="p">,</span>
+ <span class="n">retries</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span>
+ <span class="n">retry_delay</span><span class="o">=</span><span class="n">timedelta</span><span class="p">(</span><span class="n">seconds</span><span class="o">=</span><span class="mi">300</span><span class="p">),</span>
+ <span class="n">start_date</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">end_date</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">schedule_interval</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="c1"># not hooked as of now</span>
+ <span class="n">depends_on_past</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="n">wait_for_downstream</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="n">dag</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">params</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">default_args</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">adhoc</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="n">priority_weight</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
+ <span class="n">queue</span><span class="o">=</span><span class="n">configuration</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'celery'</span><span class="p">,</span> <span class="s1">'default_queue'</span><span class="p">),</span>
+ <span class="n">pool</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">sla</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">execution_timeout</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">on_failure_callback</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">on_success_callback</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">on_retry_callback</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">trigger_rule</span><span class="o">=</span><span class="n">TriggerRule</span><span class="o">.</span><span class="n">ALL_SUCCESS</span><span class="p">,</span>
+ <span class="o">*</span><span class="n">args</span><span class="p">,</span>
+ <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+
+ <span class="k">if</span> <span class="n">args</span> <span class="ow">or</span> <span class="n">kwargs</span><span class="p">:</span>
+ <span class="c1"># TODO remove *args and **kwargs in Airflow 2.0</span>
+ <span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span>
+ <span class="s1">'Invalid arguments were passed to {c}. Support for '</span>
+ <span class="s1">'passing such arguments will be dropped in Airflow 2.0. '</span>
+ <span class="s1">'Invalid arguments were:'</span>
+ <span class="s1">'</span><span class="se">\n</span><span class="s1">*args: {a}</span><span class="se">\n</span><span class="s1">**kwargs: {k}'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
+ <span class="n">c</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">__class__</span><span class="o">.</span><span class="n">__name__</span><span class="p">,</span> <span class="n">a</span><span class="o">=</span><span class="n">args</span><span class="p">,</span> <span class="n">k</span><span class="o">=</span><span class="n">kwargs</span><span class="p">),</span>
+ <span class="n">category</span><span class="o">=</span><span class="ne">PendingDeprecationWarning</span>
+ <span class="p">)</span>
+
+ <span class="n">validate_key</span><span class="p">(</span><span class="n">task_id</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">task_id</span> <span class="o">=</span> <span class="n">task_id</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">owner</span> <span class="o">=</span> <span class="n">owner</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">email</span> <span class="o">=</span> <span class="n">email</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">email_on_retry</span> <span class="o">=</span> <span class="n">email_on_retry</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">email_on_failure</span> <span class="o">=</span> <span class="n">email_on_failure</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">start_date</span> <span class="o">=</span> <span class="n">start_date</span>
+ <span class="k">if</span> <span class="n">start_date</span> <span class="ow">and</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">start_date</span><span class="p">,</span> <span class="n">datetime</span><span class="p">):</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span>
+ <span class="s2">"start_date for {} isn't datetime.datetime"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="p">))</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">end_date</span> <span class="o">=</span> <span class="n">end_date</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">TriggerRule</span><span class="o">.</span><span class="n">is_valid</span><span class="p">(</span><span class="n">trigger_rule</span><span class="p">):</span>
+ <span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span>
+ <span class="s2">"The trigger_rule must be one of {all_triggers},"</span>
+ <span class="s2">"'{d}.{t}'; received '{tr}'."</span>
+ <span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">all_triggers</span><span class="o">=</span><span class="n">TriggerRule</span><span class="o">.</span><span class="n">all_triggers</span><span class="p">,</span>
+ <span class="n">d</span><span class="o">=</span><span class="n">dag</span><span class="o">.</span><span class="n">dag_id</span><span class="p">,</span> <span class="n">t</span><span class="o">=</span><span class="n">task_id</span><span class="p">,</span> <span class="n">tr</span> <span class="o">=</span> <span class="n">trigger_rule</span><span class="p">))</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">trigger_rule</span> <span class="o">=</span> <span class="n">trigger_rule</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">depends_on_past</span> <span class="o">=</span> <span class="n">depends_on_past</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">wait_for_downstream</span> <span class="o">=</span> <span class="n">wait_for_downstream</span>
+ <span class="k">if</span> <span class="n">wait_for_downstream</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">depends_on_past</span> <span class="o">=</span> <span class="bp">True</span>
+
+ <span class="k">if</span> <span class="n">schedule_interval</span><span class="p">:</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span>
+ <span class="s2">"schedule_interval is used for {}, though it has "</span>
+ <span class="s2">"been deprecated as a task parameter, you need to "</span>
+ <span class="s2">"specify it as a DAG parameter instead"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="p">))</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_schedule_interval</span> <span class="o">=</span> <span class="n">schedule_interval</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">retries</span> <span class="o">=</span> <span class="n">retries</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">queue</span> <span class="o">=</span> <span class="n">queue</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">pool</span> <span class="o">=</span> <span class="n">pool</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">sla</span> <span class="o">=</span> <span class="n">sla</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">execution_timeout</span> <span class="o">=</span> <span class="n">execution_timeout</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">on_failure_callback</span> <span class="o">=</span> <span class="n">on_failure_callback</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">on_success_callback</span> <span class="o">=</span> <span class="n">on_success_callback</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">on_retry_callback</span> <span class="o">=</span> <span class="n">on_retry_callback</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">retry_delay</span><span class="p">,</span> <span class="n">timedelta</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">retry_delay</span> <span class="o">=</span> <span class="n">retry_delay</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"retry_delay isn't timedelta object, assuming secs"</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">retry_delay</span> <span class="o">=</span> <span class="n">timedelta</span><span class="p">(</span><span class="n">seconds</span><span class="o">=</span><span class="n">retry_delay</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">params</span> <span class="o">=</span> <span class="n">params</span> <span class="ow">or</span> <span class="p">{}</span> <span class="c1"># Available in templates!</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">adhoc</span> <span class="o">=</span> <span class="n">adhoc</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">priority_weight</span> <span class="o">=</span> <span class="n">priority_weight</span>
+
+ <span class="c1"># Private attributes</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_upstream_task_ids</span> <span class="o">=</span> <span class="p">[]</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_downstream_task_ids</span> <span class="o">=</span> <span class="p">[]</span>
+
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">dag</span> <span class="ow">and</span> <span class="n">_CONTEXT_MANAGER_DAG</span><span class="p">:</span>
+ <span class="n">dag</span> <span class="o">=</span> <span class="n">_CONTEXT_MANAGER_DAG</span>
+ <span class="k">if</span> <span class="n">dag</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">dag</span> <span class="o">=</span> <span class="n">dag</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">_comps</span> <span class="o">=</span> <span class="p">{</span>
+ <span class="s1">'task_id'</span><span class="p">,</span>
+ <span class="s1">'dag_id'</span><span class="p">,</span>
+ <span class="s1">'owner'</span><span class="p">,</span>
+ <span class="s1">'email'</span><span class="p">,</span>
+ <span class="s1">'email_on_retry'</span><span class="p">,</span>
+ <span class="s1">'retry_delay'</span><span class="p">,</span>
+ <span class="s1">'start_date'</span><span class="p">,</span>
+ <span class="s1">'schedule_interval'</span><span class="p">,</span>
+ <span class="s1">'depends_on_past'</span><span class="p">,</span>
+ <span class="s1">'wait_for_downstream'</span><span class="p">,</span>
+ <span class="s1">'adhoc'</span><span class="p">,</span>
+ <span class="s1">'priority_weight'</span><span class="p">,</span>
+ <span class="s1">'sla'</span><span class="p">,</span>
+ <span class="s1">'execution_timeout'</span><span class="p">,</span>
+ <span class="s1">'on_failure_callback'</span><span class="p">,</span>
+ <span class="s1">'on_success_callback'</span><span class="p">,</span>
+ <span class="s1">'on_retry_callback'</span><span class="p">,</span>
+ <span class="p">}</span>
+
+ <span class="k">def</span> <span class="nf">__eq__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
+ <span class="k">return</span> <span class="p">(</span>
+ <span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">==</span> <span class="nb">type</span><span class="p">(</span><span class="n">other</span><span class="p">)</span> <span class="ow">and</span>
+ <span class="nb">all</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">__dict__</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">c</span><span class="p">,</span> <span class="bp">None</span><span class="p">)</span> <span class="o">==</span> <span class="n">other</span><span class="o">.</span><span class="n">__dict__</span><span class="o">.</span><span class="n">get</span><span class [...]
+ <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_comps</span><span class="p">))</span>
+
+ <span class="k">def</span> <span class="nf">__neq__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
+ <span class="k">return</span> <span class="ow">not</span> <span class="bp">self</span> <span class="o">==</span> <span class="n">other</span>
+
+ <span class="k">def</span> <span class="nf">__lt__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">task_id</span> <span class="o"><</span> <span class="n">other</span><span class="o">.</span><span class="n">task_id</span>
+
+ <span class="k">def</span> <span class="nf">__hash__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="n">hash_components</span> <span class="o">=</span> <span class="p">[</span><span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">)]</span>
+ <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_comps</span><span class="p">:</span>
+ <span class="n">val</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">c</span><span class="p">,</span> <span class="bp">None</span><span class="p">)</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="nb">hash</span><span class="p">(</span><span class="n">val</span><span class="p">)</span>
+ <span class="n">hash_components</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">val</span><span class="p">)</span>
+ <span class="k">except</span> <span class="ne">TypeError</span><span class="p">:</span>
+ <span class="n">hash_components</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="nb">repr</span><span class="p">(</span><span class="n">val</span><span class="p">))</span>
+ <span class="k">return</span> <span class="nb">hash</span><span class="p">(</span><span class="nb">tuple</span><span class="p">(</span><span class="n">hash_components</span><span class="p">))</span>
+
+ <span class="c1"># Composing Operators -----------------------------------------------</span>
+
+ <span class="k">def</span> <span class="nf">__rshift__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Implements Self >> Other == self.set_downstream(other)</span>
+
+<span class="sd"> If "Other" is a DAG, the DAG is assigned to the Operator.</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">DAG</span><span class="p">):</span>
+ <span class="c1"># if this dag is already assigned, do nothing</span>
+ <span class="c1"># otherwise, do normal dag assignment</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">has_dag</span><span class="p">()</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">dag</span> <span class="ow">is</span> <span class="n">other</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">dag</span> <span class="o">=</span> <span class="n">other</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">set_downstream</span><span class="p">(</span><span class="n">other</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">other</span>
+
+ <span class="k">def</span> <span class="nf">__lshift__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Implements Self << Other == self.set_upstream(other)</span>
+
+<span class="sd"> If "Other" is a DAG, the DAG is assigned to the Operator.</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">DAG</span><span class="p">):</span>
+ <span class="c1"># if this dag is already assigned, do nothing</span>
+ <span class="c1"># otherwise, do normal dag assignment</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">has_dag</span><span class="p">()</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">dag</span> <span class="ow">is</span> <span class="n">other</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">dag</span> <span class="o">=</span> <span class="n">other</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">set_upstream</span><span class="p">(</span><span class="n">other</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">other</span>
+
+ <span class="k">def</span> <span class="nf">__rrshift__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Called for [DAG] >> [Operator] because DAGs don't have</span>
+<span class="sd"> __rshift__ operators.</span>
+<span class="sd"> """</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">__lshift__</span><span class="p">(</span><span class="n">other</span><span class="p">)</span>
+ <span class="k">return</span> <span class="bp">self</span>
+
+ <span class="k">def</span> <span class="nf">__rlshift__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Called for [DAG] << [Operator] because DAGs don't have</span>
+<span class="sd"> __lshift__ operators.</span>
+<span class="sd"> """</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">__rshift__</span><span class="p">(</span><span class="n">other</span><span class="p">)</span>
+ <span class="k">return</span> <span class="bp">self</span>
+
+ <span class="c1"># /Composing Operators ---------------------------------------------</span>
+
+ <span class="nd">@property</span>
+ <span class="k">def</span> <span class="nf">dag</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Returns the Operator's DAG if set, otherwise raises an error</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">has_dag</span><span class="p">():</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_dag</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span>
+ <span class="s1">'Operator {} has not been assigned to a DAG yet'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="p">))</span>
+
+ <span class="nd">@dag.setter</span>
+ <span class="k">def</span> <span class="nf">dag</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dag</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Operators can be assigned to one DAG, one time. Repeat assignments to</span>
+<span class="sd"> that same DAG are ok.</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">dag</span><span class="p">,</span> <span class="n">DAG</span><span class="p">):</span>
+ <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span>
+ <span class="s1">'Expected DAG; received {}'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">dag</span><span class="o">.</span><span class="n">__class__</span><span class="o">.</span><span class="n">__name__</span><span class="p">))</span>
+ <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">has_dag</span><span class="p">()</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">dag</span> <span class="ow">is</span> <span class="ow">not</span> <span class="n">dag</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span>
+ <span class="s2">"The DAG assigned to {} can not be changed."</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="p">))</span>
+ <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">task_id</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">dag</span><span class="o">.</span><span class="n">task_dict</span><span class="p">:</span>
+ <span class="n">dag</span><span class="o">.</span><span class="n">add_task</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">_dag</span> <span class="o">=</span> <span class="n">dag</span>
+
+<div class="viewcode-block" id="BaseOperator.has_dag"><a class="viewcode-back" href="../../code.html#airflow.models.BaseOperator.has_dag">[docs]</a> <span class="k">def</span> <span class="nf">has_dag</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Returns True if the Operator has been assigned to a DAG.</span>
+<span class="sd"> """</span>
+ <span class="k">return</span> <span class="nb">getattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s1">'_dag'</span><span class="p">,</span> <span class="bp">None</span><span class="p">)</span> <span class="ow">is</span> <span class="ow">not</span> <span class="bp">None</span></div>
+
+ <span class="nd">@property</span>
+ <span class="k">def</span> <span class="nf">dag_id</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">has_dag</span><span class="p">():</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">dag</span><span class="o">.</span><span class="n">dag_id</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">return</span> <span class="s1">'adhoc_'</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">owner</span>
+
+ <span class="nd">@property</span>
+ <span class="k">def</span> <span class="nf">schedule_interval</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> The schedule interval of the DAG always wins over individual tasks so</span>
+<span class="sd"> that tasks within a DAG always line up. The task still needs a</span>
+<span class="sd"> schedule_interval as it may not be attached to a DAG.</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">has_dag</span><span class="p">():</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">dag</span><span class="o">.</span><span class="n">_schedule_interval</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_schedule_interval</span>
+
+ <span class="nd">@property</span>
+ <span class="k">def</span> <span class="nf">priority_weight_total</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="nb">sum</span><span class="p">([</span>
+ <span class="n">t</span><span class="o">.</span><span class="n">priority_weight</span>
+ <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_flat_relatives</span><span class="p">(</span><span class="n">upstream</span><span class="o">=</span><span class="bp">False</span><span class="p">)</span>
+ <span class="p">])</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">priority_weight</span>
+
+<div class="viewcode-block" id="BaseOperator.pre_execute"><a class="viewcode-back" href="../../code.html#airflow.models.BaseOperator.pre_execute">[docs]</a> <span class="k">def</span> <span class="nf">pre_execute</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">context</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> This is triggered right before self.execute, it's mostly a hook</span>
+<span class="sd"> for people deriving operators.</span>
+<span class="sd"> """</span>
+ <span class="k">pass</span></div>
+
+<div class="viewcode-block" id="BaseOperator.execute"><a class="viewcode-back" href="../../code.html#airflow.models.BaseOperator.execute">[docs]</a> <span class="k">def</span> <span class="nf">execute</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">context</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> This is the main method to derive when creating an operator.</span>
+<span class="sd"> Context is the same dictionary used as when rendering jinja templates.</span>
+
+<span class="sd"> Refer to get_template_context for more context.</span>
+<span class="sd"> """</span>
+ <span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">()</span></div>
+
+<div class="viewcode-block" id="BaseOperator.post_execute"><a class="viewcode-back" href="../../code.html#airflow.models.BaseOperator.post_execute">[docs]</a> <span class="k">def</span> <span class="nf">post_execute</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">context</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> This is triggered right after self.execute, it's mostly a hook</span>
+<span class="sd"> for people deriving operators.</span>
+<span class="sd"> """</span>
+ <span class="k">pass</span></div>
+
+<div class="viewcode-block" id="BaseOperator.on_kill"><a class="viewcode-back" href="../../code.html#airflow.models.BaseOperator.on_kill">[docs]</a> <span class="k">def</span> <span class="nf">on_kill</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">'''</span>
+<span class="sd"> Override this method to cleanup subprocesses when a task instance</span>
+<span class="sd"> gets killed. Any use of the threading, subprocess or multiprocessing</span>
+<span class="sd"> module within an operator needs to be cleaned up or it will leave</span>
+<span class="sd"> ghost processes behind.</span>
+<span class="sd"> '''</span>
+ <span class="k">pass</span></div>
+
+ <span class="k">def</span> <span class="nf">__deepcopy__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">memo</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Hack sorting double chained task lists by task_id to avoid hitting</span>
+<span class="sd"> max_depth on deepcopy operations.</span>
+<span class="sd"> """</span>
+ <span class="n">sys</span><span class="o">.</span><span class="n">setrecursionlimit</span><span class="p">(</span><span class="mi">5000</span><span class="p">)</span> <span class="c1"># TODO fix this in a better way</span>
+ <span class="n">cls</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">__class__</span>
+ <span class="n">result</span> <span class="o">=</span> <span class="n">cls</span><span class="o">.</span><span class="n">__new__</span><span class="p">(</span><span class="n">cls</span><span class="p">)</span>
+ <span class="n">memo</span><span class="p">[</span><span class="nb">id</span><span class="p">(</span><span class="bp">self</span><span class="p">)]</span> <span class="o">=</span> <span class="n">result</span>
+
+ <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="nb">list</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">__dict__</span><span class="o">.</span><span class="n">items</span><span class="p">()):</span>
+ <span class="k">if</span> <span class="n">k</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">(</span><span class="s1">'user_defined_macros'</span><span class="p">,</span> <span class="s1">'params'</span><span class="p">):</span>
+ <span class="nb">setattr</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="n">k</span><span class="p">,</span> <span class="n">copy</span><span class="o">.</span><span class="n">deepcopy</span><span class="p">(</span><span class="n">v</span><span class="p">,</span> <span class="n">memo</span><span class="p">))</span>
+ <span class="n">result</span><span class="o">.</span><span class="n">params</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span>
+ <span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s1">'user_defined_macros'</span><span class="p">):</span>
+ <span class="n">result</span><span class="o">.</span><span class="n">user_defined_macros</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">user_defined_macros</span>
+ <span class="k">return</span> <span class="n">result</span>
+
+<div class="viewcode-block" id="BaseOperator.render_template_from_field"><a class="viewcode-back" href="../../code.html#airflow.models.BaseOperator.render_template_from_field">[docs]</a> <span class="k">def</span> <span class="nf">render_template_from_field</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">attr</span><span class="p">,</span> <span class="n">content</span><span class="p">,</span> <span class="n">context</span><span class [...]
+ <span class="sd">'''</span>
+<span class="sd"> Renders a template from a field. If the field is a string, it will</span>
+<span class="sd"> simply render the string and return the result. If it is a collection or</span>
+<span class="sd"> nested set of collections, it will traverse the structure and render</span>
+<span class="sd"> all strings in it.</span>
+<span class="sd"> '''</span>
+ <span class="n">rt</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">render_template</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">content</span><span class="p">,</span> <span class="n">six</span><span class="o">.</span><span class="n">string_types</span><span class="p">):</span>
+ <span class="n">result</span> <span class="o">=</span> <span class="n">jinja_env</span><span class="o">.</span><span class="n">from_string</span><span class="p">(</span><span class="n">content</span><span class="p">)</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="o">**</span><span class="n">context</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">content</span><span class="p">,</span> <span class="p">(</span><span class="nb">list</span><span class="p">,</span> <span class="nb">tuple</span><span class="p">)):</span>
+ <span class="n">result</span> <span class="o">=</span> <span class="p">[</span><span class="n">rt</span><span class="p">(</span><span class="n">attr</span><span class="p">,</span> <span class="n">e</span><span class="p">,</span> <span class="n">context</span><span class="p">)</span> <span class="k">for</span> <span class="n">e</span> <span class="ow">in</span> <span class="n">content</span><span class="p">]</span>
+ <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">content</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
+ <span class="n">result</span> <span class="o">=</span> <span class="p">{</span>
+ <span class="n">k</span><span class="p">:</span> <span class="n">rt</span><span class="p">(</span><span class="s2">"{}[{}]"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">attr</span><span class="p">,</span> <span class="n">k</span><span class="p">),</span> <span class="n">v</span><span class="p">,</span> <span class="n">context</span><span class="p">)</span>
+ <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="nb">list</span><span class="p">(</span><span class="n">content</span><span class="o">.</span><span class="n">items</span><span class="p">())}</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">param_type</span> <span class="o">=</span> <span class="nb">type</span><span class="p">(</span><span class="n">content</span><span class="p">)</span>
+ <span class="n">msg</span> <span class="o">=</span> <span class="p">(</span>
+ <span class="s2">"Type '{param_type}' used for parameter '{attr}' is "</span>
+ <span class="s2">"not supported for templating"</span><span class="p">)</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="nb">locals</span><span class="p">())</span>
+ <span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span><span class="n">msg</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">result</span></div>
+
+<div class="viewcode-block" id="BaseOperator.render_template"><a class="viewcode-back" href="../../code.html#airflow.models.BaseOperator.render_template">[docs]</a> <span class="k">def</span> <span class="nf">render_template</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">attr</span><span class="p">,</span> <span class="n">content</span><span class="p">,</span> <span class="n">context</span><span class="p">):</span>
+ <span class="sd">'''</span>
+<span class="sd"> Renders a template either from a file or directly in a field, and returns</span>
+<span class="sd"> the rendered result.</span>
+<span class="sd"> '''</span>
+ <span class="n">jinja_env</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">dag</span><span class="o">.</span><span class="n">get_template_env</span><span class="p">()</span> \
+ <span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s1">'dag'</span><span class="p">)</span> \
+ <span class="k">else</span> <span class="n">jinja2</span><span class="o">.</span><span class="n">Environment</span><span class="p">(</span><span class="n">cache_size</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+
+ <span class="n">exts</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">__class__</span><span class="o">.</span><span class="n">template_ext</span>
+ <span class="k">if</span> <span class="p">(</span>
+ <span class="nb">isinstance</span><span class="p">(</span><span class="n">content</span><span class="p">,</span> <span class="n">six</span><span class="o">.</span><span class="n">string_types</span><span class="p">)</span> <span class="ow">and</span>
+ <span class="nb">any</span><span class="p">([</span><span class="n">content</span><span class="o">.</span><span class="n">endswith</span><span class="p">(</span><span class="n">ext</span><span class="p">)</span> <span class="k">for</span> <span class="n">ext</span> <span class="ow">in</span> <span class="n">exts</span><span class="p">])):</span>
+ <span class="k">return</span> <span class="n">jinja_env</span><span class="o">.</span><span class="n">get_template</span><span class="p">(</span><span class="n">content</span><span class="p">)</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="o">**</span><span class="n">context</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">render_template_from_field</span><span class="p">(</span><span class="n">attr</span><span class="p">,</span> <span class="n">content</span><span class="p">,</span> <span class="n">context</span><span class="p">,</span> <span class="n">jinja_env</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="BaseOperator.prepare_template"><a class="viewcode-back" href="../../code.html#airflow.models.BaseOperator.prepare_template">[docs]</a> <span class="k">def</span> <span class="nf">prepare_template</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">'''</span>
+<span class="sd"> Hook that is triggered after the templated fields get replaced</span>
+<span class="sd"> by their content. If you need your operator to alter the</span>
+<span class="sd"> content of the file before the template is rendered,</span>
+<span class="sd"> it should override this method to do so.</span>
+<span class="sd"> '''</span>
+ <span class="k">pass</span></div>
+
+ <span class="k">def</span> <span class="nf">resolve_template_files</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="c1"># Getting the content of files for template_field / template_ext</span>
+ <span class="k">for</span> <span class="n">attr</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">template_fields</span><span class="p">:</span>
+ <span class="n">content</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">attr</span><span class="p">)</span>
+ <span class="k">if</span> <span class="p">(</span><span class="n">content</span> <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">content</span><span class="p">,</span> <span class="n">six</span><span class="o">.</span><span class="n">string_types</span><span class="p">)</span> <span class="ow">and</span>
+ <span class="nb">any</span><span class="p">([</span><span class="n">content</span><span class="o">.</span><span class="n">endswith</span><span class="p">(</span><span class="n">ext</span><span class="p">)</span> <span class="k">for</span> <span class="n">ext</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">template_ext</span><span class="p">])):</span>
+ <span class="n">env</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">dag</span><span class="o">.</span><span class="n">get_template_env</span><span class="p">()</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="nb">setattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">attr</span><span class="p">,</span> <span class="n">env</span><span class="o">.</span><span class="n">loader</span><span class="o">.</span><span class="n">get_source</span><span class="p">(</span><span class="n">env</span><span class="p">,</span> <span class="n">content</span><span class="p">)[</span><span class="mi">0</span><span class="p">])</span>
+ <span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">exception</span><span class="p">(</span><span class="n">e</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">prepare_template</span><span class="p">()</span>
+
+ <span class="nd">@property</span>
+ <span class="k">def</span> <span class="nf">upstream_list</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""@property: list of tasks directly upstream"""</span>
+ <span class="k">return</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">dag</span><span class="o">.</span><span class="n">get_task</span><span class="p">(</span><span class="n">tid</span><span class="p">)</span> <span class="k">for</span> <span class="n">tid</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_upstream_task_ids</span><span class="p">]</span>
+
+ <span class="nd">@property</span>
+ <span class="k">def</span> <span class="nf">upstream_task_ids</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_upstream_task_ids</span>
+
+ <span class="nd">@property</span>
+ <span class="k">def</span> <span class="nf">downstream_list</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""@property: list of tasks directly downstream"""</span>
+ <span class="k">return</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">dag</span><span class="o">.</span><span class="n">get_task</span><span class="p">(</span><span class="n">tid</span><span class="p">)</span> <span class="k">for</span> <span class="n">tid</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_downstream_task_ids</span><span class="p">]</span>
+
+ <span class="nd">@property</span>
+ <span class="k">def</span> <span class="nf">downstream_task_ids</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_downstream_task_ids</span>
+
+<div class="viewcode-block" id="BaseOperator.clear"><a class="viewcode-back" href="../../code.html#airflow.models.BaseOperator.clear">[docs]</a> <span class="k">def</span> <span class="nf">clear</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span> <span class="n">start_date</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="n">end_date</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">upstream</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span> <span class="n">downstream</span><span class="o">=</span><span class="bp">False</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Clears the state of task instances associated with the task, following</span>
+<span class="sd"> the parameters specified.</span>
+<span class="sd"> """</span>
+ <span class="n">session</span> <span class="o">=</span> <span class="n">settings</span><span class="o">.</span><span class="n">Session</span><span class="p">()</span>
+
+ <span class="n">TI</span> <span class="o">=</span> <span class="n">TaskInstance</span>
+ <span class="n">qry</span> <span class="o">=</span> <span class="n">session</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">TI</span><span class="p">)</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">TI</span><span class="o">.</span><span class="n">dag_id</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">dag_id</span><span class="p [...]
+
+ <span class="k">if</span> <span class="n">start_date</span><span class="p">:</span>
+ <span class="n">qry</span> <span class="o">=</span> <span class="n">qry</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">TI</span><span class="o">.</span><span class="n">execution_date</span> <span class="o">>=</span> <span class="n">start_date</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">end_date</span><span class="p">:</span>
+ <span class="n">qry</span> <span class="o">=</span> <span class="n">qry</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">TI</span><span class="o">.</span><span class="n">execution_date</span> <span class="o"><=</span> <span class="n">end_date</span><span class="p">)</span>
+
+ <span class="n">tasks</span> <span class="o">=</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">task_id</span><span class="p">]</span>
+
+ <span class="k">if</span> <span class="n">upstream</span><span class="p">:</span>
+ <span class="n">tasks</span> <span class="o">+=</span> <span class="p">[</span>
+ <span class="n">t</span><span class="o">.</span><span class="n">task_id</span> <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_flat_relatives</span><span class="p">(</span><span class="n">upstream</span><span class="o">=</span><span class="bp">True</span><span class="p">)]</span>
+
+ <span class="k">if</span> <span class="n">downstream</span><span class="p">:</span>
+ <span class="n">tasks</span> <span class="o">+=</span> <span class="p">[</span>
+ <span class="n">t</span><span class="o">.</span><span class="n">task_id</span> <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_flat_relatives</span><span class="p">(</span><span class="n">upstream</span><span class="o">=</span><span class="bp">False</span><span class="p">)]</span>
+
+ <span class="n">qry</span> <span class="o">=</span> <span class="n">qry</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">TI</span><span class="o">.</span><span class="n">task_id</span><span class="o">.</span><span class="n">in_</span><span class="p">(</span><span class="n">tasks</span><span class="p">))</span>
+
+ <span class="n">count</span> <span class="o">=</span> <span class="n">qry</span><span class="o">.</span><span class="n">count</span><span class="p">()</span>
+ <span class="n">clear_task_instances</span><span class="p">(</span><span class="n">qry</span><span class="p">,</span> <span class="n">session</span><span class="p">)</span>
+
+ <span class="n">session</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
+ <span class="k">return</span> <span class="n">count</span></div>
+
+<div class="viewcode-block" id="BaseOperator.get_task_instances"><a class="viewcode-back" href="../../code.html#airflow.models.BaseOperator.get_task_instances">[docs]</a> <span class="k">def</span> <span class="nf">get_task_instances</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">session</span><span class="p">,</span> <span class="n">start_date</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span c [...]
+ <span class="sd">"""</span>
+<span class="sd"> Get a set of task instance related to this task for a specific date</span>
+<span class="sd"> range.</span>
+<span class="sd"> """</span>
+ <span class="n">TI</span> <span class="o">=</span> <span class="n">TaskInstance</span>
+ <span class="n">end_date</span> <span class="o">=</span> <span class="n">end_date</span> <span class="ow">or</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span>
+ <span class="k">return</span> <span class="n">session</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">TI</span><span class="p">)</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span>
+ <span class="n">TI</span><span class="o">.</span><span class="n">dag_id</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">dag_id</span><span class="p">,</span>
+ <span class="n">TI</span><span class="o">.</span><span class="n">task_id</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">task_id</span><span class="p">,</span>
+ <span class="n">TI</span><span class="o">.</span><span class="n">execution_date</span> <span class="o">>=</span> <span class="n">start_date</span><span class="p">,</span>
+ <span class="n">TI</span><span class="o">.</span><span class="n">execution_date</span> <span class="o"><=</span> <span class="n">end_date</span><span class="p">,</span>
+ <span class="p">)</span><span class="o">.</span><span class="n">order_by</span><span class="p">(</span><span class="n">TI</span><span class="o">.</span><span class="n">execution_date</span><span class="p">)</span><span class="o">.</span><span class="n">all</span><span class="p">()</span></div>
+
+<div class="viewcode-block" id="BaseOperator.get_flat_relatives"><a class="viewcode-back" href="../../code.html#airflow.models.BaseOperator.get_flat_relatives">[docs]</a> <span class="k">def</span> <span class="nf">get_flat_relatives</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">upstream</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span> <span class="n">l</span><span class="o">=</span><span class="bp [...]
+ <span class="sd">"""</span>
+<span class="sd"> Get a flat list of relatives, either upstream or downstream.</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">l</span><span class="p">:</span>
+ <span class="n">l</span> <span class="o">=</span> <span class="p">[]</span>
+ <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_direct_relatives</span><span class="p">(</span><span class="n">upstream</span><span class="p">):</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">is_in</span><span class="p">(</span><span class="n">t</span><span class="p">,</span> <span class="n">l</span><span class="p">):</span>
+ <span class="n">l</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">t</span><span class="p">)</span>
+ <span class="n">t</span><span class="o">.</span><span class="n">get_flat_relatives</span><span class="p">(</span><span class="n">upstream</span><span class="p">,</span> <span class="n">l</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">l</span></div>
+
+<div class="viewcode-block" id="BaseOperator.detect_downstream_cycle"><a class="viewcode-back" href="../../code.html#airflow.models.BaseOperator.detect_downstream_cycle">[docs]</a> <span class="k">def</span> <span class="nf">detect_downstream_cycle</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">task</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> When invoked, this routine will raise an exception if a cycle is</span>
+<span class="sd"> detected downstream from self. It is invoked when tasks are added to</span>
+<span class="sd"> the DAG to detect cycles.</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">task</span><span class="p">:</span>
+ <span class="n">task</span> <span class="o">=</span> <span class="bp">self</span>
+ <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_direct_relatives</span><span class="p">():</span>
+ <span class="k">if</span> <span class="n">task</span> <span class="ow">is</span> <span class="n">t</span><span class="p">:</span>
+ <span class="n">msg</span> <span class="o">=</span> <span class="s2">"Cycle detected in DAG. Faulty task: {0}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">task</span><span class="p">)</span>
+ <span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span><span class="n">msg</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">t</span><span class="o">.</span><span class="n">detect_downstream_cycle</span><span class="p">(</span><span class="n">task</span><span class="o">=</span><span class="n">task</span><span class="p">)</span>
+ <span class="k">return</span> <span class="bp">False</span></div>
+
+<div class="viewcode-block" id="BaseOperator.run"><a class="viewcode-back" href="../../code.html#airflow.models.BaseOperator.run">[docs]</a> <span class="k">def</span> <span class="nf">run</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span>
+ <span class="n">start_date</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">end_date</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">ignore_dependencies</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="n">ignore_first_depends_on_past</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="n">force</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="n">mark_success</span><span class="o">=</span><span class="bp">False</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Run a set of task instances for a date range.</span>
+<span class="sd"> """</span>
+ <span class="n">start_date</span> <span class="o">=</span> <span class="n">start_date</span> <span class="ow">or</span> <span class="bp">self</span><span class="o">.</span><span class="n">start_date</span>
+ <span class="n">end_date</span> <span class="o">=</span> <span class="n">end_date</span> <span class="ow">or</span> <span class="bp">self</span><span class="o">.</span><span class="n">end_date</span> <span class="ow">or</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span>
+
+ <span class="k">for</span> <span class="n">dt</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">dag</span><span class="o">.</span><span class="n">date_range</span><span class="p">(</span><span class="n">start_date</span><span class="p">,</span> <span class="n">end_date</span><span class="o">=</span><span class="n">end_date</span><span class="p">):</span>
+ <span class="n">TaskInstance</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dt</span><span class="p">)</span><span class="o">.</span><span class="n">run</span><span class="p">(</span>
+ <span class="n">mark_success</span><span class="o">=</span><span class="n">mark_success</span><span class="p">,</span>
+ <span class="n">ignore_dependencies</span><span class="o">=</span><span class="n">ignore_dependencies</span><span class="p">,</span>
+ <span class="n">ignore_depends_on_past</span><span class="o">=</span><span class="p">(</span>
+ <span class="n">dt</span> <span class="o">==</span> <span class="n">start_date</span> <span class="ow">and</span> <span class="n">ignore_first_depends_on_past</span><span class="p">),</span>
+ <span class="n">force</span><span class="o">=</span><span class="n">force</span><span class="p">,)</span></div>
+
+ <span class="k">def</span> <span class="nf">dry_run</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Dry run'</span><span class="p">)</span>
+ <span class="k">for</span> <span class="n">attr</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">template_fields</span><span class="p">:</span>
+ <span class="n">content</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">attr</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">content</span> <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">content</span><span class="p">,</span> <span class="n">six</span><span class="o">.</span><span class="n">string_types</span><span class="p">):</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Rendering template for {0}'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">attr</span><span class="p">))</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="n">content</span><span class="p">)</span>
+
+<div class="viewcode-block" id="BaseOperator.get_direct_relatives"><a class="viewcode-back" href="../../code.html#airflow.models.BaseOperator.get_direct_relatives">[docs]</a> <span class="k">def</span> <span class="nf">get_direct_relatives</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">upstream</span><span class="o">=</span><span class="bp">False</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Get the direct relatives to the current task, upstream or</span>
+<span class="sd"> downstream.</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="n">upstream</span><span class="p">:</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">upstream_list</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">downstream_list</span></div>
+
+ <span class="k">def</span> <span class="nf">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="s2">"<Task({self.__class__.__name__}): {self.task_id}>"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
+ <span class="bp">self</span><span class="o">=</span><span class="bp">self</span><span class="p">)</span>
+
+ <span class="nd">@property</span>
+ <span class="k">def</span> <span class="nf">task_type</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">__class__</span><span class="o">.</span><span class="n">__name__</span>
+
+ <span class="k">def</span> <span class="nf">append_only_new</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">l</span><span class="p">,</span> <span class="n">item</span><span class="p">):</span>
+ <span class="k">if</span> <span class="nb">any</span><span class="p">([</span><span class="n">item</span> <span class="ow">is</span> <span class="n">t</span> <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="n">l</span><span class="p">]):</span>
+ <span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span>
+ <span class="s1">'Dependency {self}, {item} already registered'</span>
+ <span class="s1">''</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="nb">locals</span><span class="p">()))</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">l</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">item</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">_set_relatives</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">task_or_task_list</span><span class="p">,</span> <span class="n">upstream</span><span class="o">=</span><span class="bp">False</span><span class="p">):</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="n">task_list</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">task_or_task_list</span><span class="p">)</span>
+ <span class="k">except</span> <span class="ne">TypeError</span><span class="p">:</span>
+ <span class="n">task_list</span> <span class="o">=</span> <span class="p">[</span><span class="n">task_or_task_list</span><span class="p">]</span>
+
+ <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="n">task_list</span><span class="p">:</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">t</span><span class="p">,</span> <span class="n">BaseOperator</span><span class="p">):</span>
+ <span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span>
+ <span class="s2">"Relationships can only be set between "</span>
+ <span class="s2">"Operators; received {}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">t</span><span class="o">.</span><span class="n">__class__</span><span class="o">.</span><span class="n">__name__</span><span class="p">))</span>
+
+ <span class="c1"># relationships can only be set if the tasks share a single DAG. Tasks</span>
+ <span class="c1"># without a DAG are assigned to that DAG.</span>
+ <span class="n">dags</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="n">t</span><span class="o">.</span><span class="n">dag</span> <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="p">[</span><span class="bp">self</span><span class="p">]</span> <span class="o">+</span> <span class="n">task_list</span> <span class="k">if</span> <span class="n">t</span><span class="o">.</span><span class="n">h [...]
+
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">dags</span><span class="p">)</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span>
+ <span class="s1">'Tried to set relationships between tasks in '</span>
+ <span class="s1">'more than one DAG: {}'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">dags</span><span class="p">))</span>
+ <span class="k">elif</span> <span class="nb">len</span><span class="p">(</span><span class="n">dags</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
+ <span class="n">dag</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">dags</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span>
+ <span class="s2">"Tried to create relationships between tasks that don't have "</span>
+ <span class="s2">"DAGs yet. Set the DAG for at least one "</span>
+ <span class="s2">"task and try again: {}"</span><span class="o">.</span><span class="n">format</span><span class="p">([</span><span class="bp">self</span><span class="p">]</span> <span class="o">+</span> <span class="n">task_list</span><span class="p">))</span>
+
+ <span class="k">if</span> <span class="n">dag</span> <span class="ow">and</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">has_dag</span><span class="p">():</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">dag</span> <span class="o">=</span> <span class="n">dag</span>
+
+ <span class="k">for</span> <span class="n">task</span> <span class="ow">in</span> <span class="n">task_list</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">dag</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">task</span><span class="o">.</span><span class="n">has_dag</span><span class="p">():</span>
+ <span class="n">task</span><span class="o">.</span><span class="n">dag</span> <span class="o">=</span> <span class="n">dag</span>
+ <span class="k">if</span> <span class="n">upstream</span><span class="p">:</span>
+ <span class="n">task</span><span class="o">.</span><span class="n">append_only_new</span><span class="p">(</span><span class="n">task</span><span class="o">.</span><span class="n">_downstream_task_ids</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">task_id</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">append_only_new</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_upstream_task_ids</span><span class="p">,</span> <span class="n">task</span><span class="o">.</span><span class="n">task_id</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">append_only_new</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_downstream_task_ids</span><span class="p">,</span> <span class="n">task</span><span class="o">.</span><span class="n">task_id</span><span class="p">)</span>
+ <span class="n">task</span><span class="o">.</span><span class="n">append_only_new</span><span class="p">(</span><span class="n">task</span><span class="o">.</span><span class="n">_upstream_task_ids</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">task_id</span><span class="p">)</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">detect_downstream_cycle</span><span class="p">()</span>
+
+<div class="viewcode-block" id="BaseOperator.set_downstream"><a class="viewcode-back" href="../../code.html#airflow.models.BaseOperator.set_downstream">[docs]</a> <span class="k">def</span> <span class="nf">set_downstream</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">task_or_task_list</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Set a task, or a task task to be directly downstream from the current</span>
+<span class="sd"> task.</span>
+<span class="sd"> """</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_set_relatives</span><span class="p">(</span><span class="n">task_or_task_list</span><span class="p">,</span> <span class="n">upstream</span><span class="o">=</span><span class="bp">False</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="BaseOperator.set_upstream"><a class="viewcode-back" href="../../code.html#airflow.models.BaseOperator.set_upstream">[docs]</a> <span class="k">def</span> <span class="nf">set_upstream</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">task_or_task_list</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Set a task, or a task task to be directly upstream from the current</span>
+<span class="sd"> task.</span>
+<span class="sd"> """</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_set_relatives</span><span class="p">(</span><span class="n">task_or_task_list</span><span class="p">,</span> <span class="n">upstream</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="BaseOperator.xcom_push"><a class="viewcode-back" href="../../code.html#airflow.models.BaseOperator.xcom_push">[docs]</a> <span class="k">def</span> <span class="nf">xcom_push</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span>
+ <span class="n">context</span><span class="p">,</span>
+ <span class="n">key</span><span class="p">,</span>
+ <span class="n">value</span><span class="p">,</span>
+ <span class="n">execution_date</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> See TaskInstance.xcom_push()</span>
+<span class="sd"> """</span>
+ <span class="n">context</span><span class="p">[</span><span class="s1">'ti'</span><span class="p">]</span><span class="o">.</span><span class="n">xcom_push</span><span class="p">(</span>
+ <span class="n">key</span><span class="o">=</span><span class="n">key</span><span class="p">,</span>
+ <span class="n">value</span><span class="o">=</span><span class="n">value</span><span class="p">,</span>
+ <span class="n">execution_date</span><span class="o">=</span><span class="n">execution_date</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="BaseOperator.xcom_pull"><a class="viewcode-back" href="../../code.html#airflow.models.BaseOperator.xcom_pull">[docs]</a> <span class="k">def</span> <span class="nf">xcom_pull</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span>
+ <span class="n">context</span><span class="p">,</span>
+ <span class="n">task_ids</span><span class="p">,</span>
+ <span class="n">dag_id</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">key</span><span class="o">=</span><span class="n">XCOM_RETURN_KEY</span><span class="p">,</span>
+ <span class="n">include_prior_dates</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> See TaskInstance.xcom_pull()</span>
+<span class="sd"> """</span>
+ <span class="k">return</span> <span class="n">context</span><span class="p">[</span><span class="s1">'ti'</span><span class="p">]</span><span class="o">.</span><span class="n">xcom_pull</span><span class="p">(</span>
+ <span class="n">key</span><span class="o">=</span><span class="n">key</span><span class="p">,</span>
+ <span class="n">task_ids</span><span class="o">=</span><span class="n">task_ids</span><span class="p">,</span>
+ <span class="n">dag_id</span><span class="o">=</span><span class="n">dag_id</span><span class="p">,</span>
+ <span class="n">include_prior_dates</span><span class="o">=</span><span class="n">include_prior_dates</span><span class="p">)</span></div></div>
+
+
+<span class="k">class</span> <span class="nc">DagModel</span><span class="p">(</span><span class="n">Base</span><span class="p">):</span>
+
+ <span class="n">__tablename__</span> <span class="o">=</span> <span class="s2">"dag"</span>
+ <span class="sd">"""</span>
+<span class="sd"> These items are stored in the database for state related information</span>
+<span class="sd"> """</span>
+ <span class="n">dag_id</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">String</span><span class="p">(</span><span class="n">ID_LEN</span><span class="p">),</span> <span class="n">primary_key</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+ <span class="c1"># A DAG can be paused from the UI / DB</span>
+ <span class="c1"># Set this default value of is_paused based on a configuration value!</span>
+ <span class="n">is_paused_at_creation</span> <span class="o">=</span> <span class="n">configuration</span><span class="o">.</span><span class="n">getboolean</span><span class="p">(</span><span class="s1">'core'</span><span class="p">,</span> <span class="s1">'dags_are_paused_at_creation'</span><span class="p">)</span>
+ <span class="n">is_paused</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Boolean</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="n">is_paused_at_creation</span><span class="p">)</span>
+ <span class="c1"># Whether the DAG is a subdag</span>
+ <span class="n">is_subdag</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Boolean</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="bp">False</span><span class="p">)</span>
+ <span class="c1"># Whether that DAG was seen on the last DagBag load</span>
+ <span class="n">is_active</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Boolean</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="bp">False</span><span class="p">)</span>
+ <span class="c1"># Last time the scheduler started</span>
+ <span class="n">last_scheduler_run</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">DateTime</span><span class="p">)</span>
+ <span class="c1"># Last time this DAG was pickled</span>
+ <span class="n">last_pickled</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">DateTime</span><span class="p">)</span>
+ <span class="c1"># When the DAG received a refreshed signal last, used to know when</span>
+ <span class="c1"># we need to force refresh</span>
+ <span class="n">last_expired</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">DateTime</span><span class="p">)</span>
+ <span class="c1"># Whether (one of) the scheduler is scheduling this DAG at the moment</span>
+ <span class="n">scheduler_lock</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Boolean</span><span class="p">)</span>
+ <span class="c1"># Foreign key to the latest pickle_id</span>
+ <span class="n">pickle_id</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Integer</span><span class="p">)</span>
+ <span class="c1"># The location of the file containing the DAG object</span>
+ <span class="n">fileloc</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">String</span><span class="p">(</span><span class="mi">2000</span><span class="p">))</span>
+ <span class="c1"># String representing the owners</span>
+ <span class="n">owners</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">String</span><span class="p">(</span><span class="mi">2000</span><span class="p">))</span>
+
+ <span class="k">def</span> <span class="nf">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="s2">"<DAG: {self.dag_id}>"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">=</span><span class="bp">self</span><span class="p">)</span>
+
+ <span class="nd">@classmethod</span>
+ <span class="k">def</span> <span class="nf">get_current</span><span class="p">(</span><span class="n">cls</span><span class="p">,</span> <span class="n">dag_id</span><span class="p">):</span>
+ <span class="n">session</span> <span class="o">=</span> <span class="n">settings</span><span class="o">.</span><span class="n">Session</span><span class="p">()</span>
+ <span class="n">obj</span> <span class="o">=</span> <span class="n">session</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">cls</span><span class="p">)</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">cls</span><span class="o">.</span><span class="n">dag_id</span> <span class="o">==</span> <span class="n">dag_id</span><span class="p">)</span><span class="o">.</span><span class="n"> [...]
+ <span class="n">session</span><span class="o">.</span><span class="n">expunge_all</span><span class="p">()</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
+ <span class="k">return</span> <span class="n">obj</span>
+
+
+<span class="nd">@functools.total_ordering</span>
+<div class="viewcode-block" id="DAG"><a class="viewcode-back" href="../../code.html#airflow.models.DAG">[docs]</a><span class="k">class</span> <span class="nc">DAG</span><span class="p">(</span><span class="n">LoggingMixin</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> A dag (directed acyclic graph) is a collection of tasks with directional</span>
+<span class="sd"> dependencies. A dag also has a schedule, a start end an end date</span>
+<span class="sd"> (optional). For each schedule, (say daily or hourly), the DAG needs to run</span>
+<span class="sd"> each individual tasks as their dependencies are met. Certain tasks have</span>
+<span class="sd"> the property of depending on their own past, meaning that they can't run</span>
+<span class="sd"> until their previous schedule (and upstream tasks) are completed.</span>
+
+<span class="sd"> DAGs essentially act as namespaces for tasks. A task_id can only be</span>
+<span class="sd"> added once to a DAG.</span>
+
+<span class="sd"> :param dag_id: The id of the DAG</span>
+<span class="sd"> :type dag_id: string</span>
+<span class="sd"> :param schedule_interval: Defines how often that DAG runs, this</span>
+<span class="sd"> timedelta object gets added to your latest task instance's</span>
+<span class="sd"> execution_date to figure out the next schedule</span>
+<span class="sd"> :type schedule_interval: datetime.timedelta or</span>
+<span class="sd"> dateutil.relativedelta.relativedelta or str that acts as a cron</span>
+<span class="sd"> expression</span>
+<span class="sd"> :param start_date: The timestamp from which the scheduler will</span>
+<span class="sd"> attempt to backfill</span>
+<span class="sd"> :type start_date: datetime.datetime</span>
+<span class="sd"> :param end_date: A date beyond which your DAG won't run, leave to None</span>
+<span class="sd"> for open ended scheduling</span>
+<span class="sd"> :type end_date: datetime.datetime</span>
+<span class="sd"> :param template_searchpath: This list of folders (non relative)</span>
+<span class="sd"> defines where jinja will look for your templates. Order matters.</span>
+<span class="sd"> Note that jinja/airflow includes the path of your DAG file by</span>
+<span class="sd"> default</span>
+<span class="sd"> :type template_searchpath: string or list of stings</span>
+<span class="sd"> :param user_defined_macros: a dictionary of macros that will be exposed</span>
+<span class="sd"> in your jinja templates. For example, passing ``dict(foo='bar')``</span>
+<span class="sd"> to this argument allows you to ``{{ foo }}`` in all jinja</span>
+<span class="sd"> templates related to this DAG. Note that you can pass any</span>
+<span class="sd"> type of object here.</span>
+<span class="sd"> :type user_defined_macros: dict</span>
+<span class="sd"> :param default_args: A dictionary of default parameters to be used</span>
+<span class="sd"> as constructor keyword parameters when initialising operators.</span>
+<span class="sd"> Note that operators have the same hook, and precede those defined</span>
+<span class="sd"> here, meaning that if your dict contains `'depends_on_past': True`</span>
+<span class="sd"> here and `'depends_on_past': False` in the operator's call</span>
+<span class="sd"> `default_args`, the actual value will be `False`.</span>
+<span class="sd"> :type default_args: dict</span>
+<span class="sd"> :param params: a dictionary of DAG level parameters that are made</span>
+<span class="sd"> accessible in templates, namespaced under `params`. These</span>
+<span class="sd"> params can be overridden at the task level.</span>
+<span class="sd"> :type params: dict</span>
+<span class="sd"> :param concurrency: the number of task instances allowed to run</span>
+<span class="sd"> concurrently</span>
+<span class="sd"> :type concurrency: int</span>
+<span class="sd"> :param max_active_runs: maximum number of active DAG runs, beyond this</span>
+<span class="sd"> number of DAG runs in a running state, the scheduler won't create</span>
+<span class="sd"> new active DAG runs</span>
+<span class="sd"> :type max_active_runs: int</span>
+<span class="sd"> :param dagrun_timeout: specify how long a DagRun should be up before</span>
+<span class="sd"> timing out / failing, so that new DagRuns can be created</span>
+<span class="sd"> :type dagrun_timeout: datetime.timedelta</span>
+<span class="sd"> :param sla_miss_callback: specify a function to call when reporting SLA</span>
+<span class="sd"> timeouts.</span>
+<span class="sd"> :type sla_miss_callback: types.FunctionType</span>
+<span class="sd"> """</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span> <span class="n">dag_id</span><span class="p">,</span>
+ <span class="n">schedule_interval</span><span class="o">=</span><span class="n">timedelta</span><span class="p">(</span><span class="n">days</span><span class="o">=</span><span class="mi">1</span><span class="p">),</span>
+ <span class="n">start_date</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="n">end_date</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">full_filepath</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">template_searchpath</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">user_defined_macros</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">default_args</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">concurrency</span><span class="o">=</span><span class="n">configuration</span><span class="o">.</span><span class="n">getint</span><span class="p">(</span><span class="s1">'core'</span><span class="p">,</span> <span class="s1">'dag_concurrency'</span><span class="p">),</span>
+ <span class="n">max_active_runs</span><span class="o">=</span><span class="n">configuration</span><span class="o">.</span><span class="n">getint</span><span class="p">(</span>
+ <span class="s1">'core'</span><span class="p">,</span> <span class="s1">'max_active_runs_per_dag'</span><span class="p">),</span>
+ <span class="n">dagrun_timeout</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">sla_miss_callback</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">params</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">user_defined_macros</span> <span class="o">=</span> <span class="n">user_defined_macros</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">default_args</span> <span class="o">=</span> <span class="n">default_args</span> <span class="ow">or</span> <span class="p">{}</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">params</span> <span class="o">=</span> <span class="n">params</span> <span class="ow">or</span> <span class="p">{}</span>
+
+ <span class="c1"># merging potentially conflicting default_args['params'] into params</span>
+ <span class="k">if</span> <span class="s1">'params'</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">default_args</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">default_args</span><span class="p">[</span><span class="s1">'params'</span><span class="p">])</span>
+ <span class="k">del</span> <span class="bp">self</span><span class="o">.</span><span class="n">default_args</span><span class="p">[</span><span class="s1">'params'</span><span class="p">]</span>
+
+ <span class="n">validate_key</span><span class="p">(</span><span class="n">dag_id</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">task_dict</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">()</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">dag_id</span> <span class="o">=</span> <span class="n">dag_id</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">start_date</span> <span class="o">=</span> <span class="n">start_date</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">end_date</span> <span class="o">=</span> <span class="n">end_date</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">schedule_interval</span> <span class="o">=</span> <span class="n">schedule_interval</span>
+ <span class="k">if</span> <span class="n">schedule_interval</span> <span class="ow">in</span> <span class="n">cron_presets</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_schedule_interval</span> <span class="o">=</span> <span class="n">cron_presets</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">schedule_interval</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="n">schedule_interval</span> <span class="o">==</span> <span class="s1">'@once'</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_schedule_interval</span> <span class="o">=</span> <span class="bp">None</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_schedule_interval</span> <span class="o">=</span> <span class="n">schedule_interval</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">full_filepath</span> <span class="o">=</span> <span class="n">full_filepath</span> <span class="k">if</span> <span class="n">full_filepath</span> <span class="k">else</span> <span class="s1">''</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">template_searchpath</span><span class="p">,</span> <span class="n">six</span><span class="o">.</span><span class="n">string_types</span><span class="p">):</span>
+ <span class="n">template_searchpath</span> <span class="o">=</span> <span class="p">[</span><span class="n">template_searchpath</span><span class="p">]</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">template_searchpath</span> <span class="o">=</span> <span class="n">template_searchpath</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">parent_dag</span> <span class="o">=</span> <span class="bp">None</span> <span class="c1"># Gets set when DAGs are loaded</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">last_loaded</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">safe_dag_id</span> <span class="o">=</span> <span class="n">dag_id</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'.'</span><span class="p">,</span> <span class="s1">'__dot__'</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">concurrency</span> <span class="o">=</span> <span class="n">concurrency</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">max_active_runs</span> <span class="o">=</span> <span class="n">max_active_runs</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">dagrun_timeout</span> <span class="o">=</span> <span class="n">dagrun_timeout</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">sla_miss_callback</span> <span class="o">=</span> <span class="n">sla_miss_callback</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">_comps</span> <span class="o">=</span> <span class="p">{</span>
+ <span class="s1">'dag_id'</span><span class="p">,</span>
+ <span class="s1">'task_ids'</span><span class="p">,</span>
+ <span class="s1">'parent_dag'</span><span class="p">,</span>
+ <span class="s1">'start_date'</span><span class="p">,</span>
+ <span class="s1">'schedule_interval'</span><span class="p">,</span>
+ <span class="s1">'full_filepath'</span><span class="p">,</span>
+ <span class="s1">'template_searchpath'</span><span class="p">,</span>
+ <span class="s1">'last_loaded'</span><span class="p">,</span>
+ <span class="p">}</span>
+
+ <span class="k">def</span> <span class="nf">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="s2">"<DAG: {self.dag_id}>"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">=</span><span class="bp">self</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">__eq__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
+ <span class="k">return</span> <span class="p">(</span>
+ <span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">==</span> <span class="nb">type</span><span class="p">(</span><span class="n">other</span><span class="p">)</span> <span class="ow">and</span>
+ <span class="nb">all</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">__dict__</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">c</span><span class="p">,</span> <span class="bp">None</span><span class="p">)</span> <span class="o">==</span> <span class="n">other</span><span class="o">.</span><span class="n">__dict__</span><span class="o">.</span><span class="n">get</span><span class [...]
+ <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_comps</span><span class="p">))</span>
+
+ <span class="k">def</span> <span class="nf">__neq__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
+ <span class="k">return</span> <span class="ow">not</span> <span class="bp">self</span> <span class="o">==</span> <span class="n">other</span>
+
+ <span class="k">def</span> <span class="nf">__lt__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">dag_id</span> <span class="o"><</span> <span class="n">other</span><span class="o">.</span><span class="n">dag_id</span>
+
+ <span class="k">def</span> <span class="nf">__hash__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="n">hash_components</span> <span class="o">=</span> <span class="p">[</span><span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">)]</span>
+ <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_comps</span><span class="p">:</span>
+ <span class="c1"># task_ids returns a list and lists can't be hashed</span>
+ <span class="k">if</span> <span class="n">c</span> <span class="o">==</span> <span class="s1">'task_ids'</span><span class="p">:</span>
+ <span class="n">val</span> <span class="o">=</span> <span class="nb">tuple</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">task_dict</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">val</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">c</span><span class="p">,</span> <span class="bp">None</span><span class="p">)</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="nb">hash</span><span class="p">(</span><span class="n">val</span><span class="p">)</span>
+ <span class="n">hash_components</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">val</span><span class="p">)</span>
+ <span class="k">except</span> <span class="ne">TypeError</span><span class="p">:</span>
+ <span class="n">hash_components</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="nb">repr</span><span class="p">(</span><span class="n">val</span><span class="p">))</span>
+ <span class="k">return</span> <span class="nb">hash</span><span class="p">(</span><span class="nb">tuple</span><span class="p">(</span><span class="n">hash_components</span><span class="p">))</span>
+
+ <span class="c1"># Context Manager -----------------------------------------------</span>
+
+ <span class="k">def</span> <span class="nf">__enter__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">global</span> <span class="n">_CONTEXT_MANAGER_DAG</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_old_context_manager_dag</span> <span class="o">=</span> <span class="n">_CONTEXT_MANAGER_DAG</span>
+ <span class="n">_CONTEXT_MANAGER_DAG</span> <span class="o">=</span> <span class="bp">self</span>
+ <span class="k">return</span> <span class="bp">self</span>
+
+ <span class="k">def</span> <span class="nf">__exit__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">_type</span><span class="p">,</span> <span class="n">_value</span><span class="p">,</span> <span class="n">_tb</span><span class="p">):</span>
+ <span class="k">global</span> <span class="n">_CONTEXT_MANAGER_DAG</span>
+ <span class="n">_CONTEXT_MANAGER_DAG</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_old_context_manager_dag</span>
+
+ <span class="c1"># /Context Manager ----------------------------------------------</span>
+
+ <span class="k">def</span> <span class="nf">date_range</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">start_date</span><span class="p">,</span> <span class="n">num</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="n">end_date</span><span class="o">=</span><span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()):</span>
+ <span class="k">if</span> <span class="n">num</span><span class="p">:</span>
+ <span class="n">end_date</span> <span class="o">=</span> <span class="bp">None</span>
+ <span class="k">return</span> <span class="n">utils_date_range</span><span class="p">(</span>
+ <span class="n">start_date</span><span class="o">=</span><span class="n">start_date</span><span class="p">,</span> <span class="n">end_date</span><span class="o">=</span><span class="n">end_date</span><span class="p">,</span>
+ <span class="n">num</span><span class="o">=</span><span class="n">num</span><span class="p">,</span> <span class="n">delta</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_schedule_interval</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">following_schedule</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dttm</span><span class="p">):</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_schedule_interval</span><span class="p">,</span> <span class="n">six</span><span class="o">.</span><span class="n">string_types</span><span class="p">):</span>
+ <span class="n">cron</span> <span class="o">=</span> <span class="n">croniter</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_schedule_interval</span><span class="p">,</span> <span class="n">dttm</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">cron</span><span class="o">.</span><span class="n">get_next</span><span class="p">(</span><span class="n">datetime</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_schedule_interval</span><span class="p">,</span> <span class="n">timedelta</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">dttm</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">_schedule_interval</span>
+
+ <span class="k">def</span> <span class="nf">previous_schedule</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dttm</span><span class="p">):</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_schedule_interval</span><span class="p">,</span> <span class="n">six</span><span class="o">.</span><span class="n">string_types</span><span class="p">):</span>
+ <span class="n">cron</span> <span class="o">=</span> <span class="n">croniter</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_schedule_interval</span><span class="p">,</span> <span class="n">dttm</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">cron</span><span class="o">.</span><span class="n">get_prev</span><span class="p">(</span><span class="n">datetime</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_schedule_interval</span><span class="p">,</span> <span class="n">timedelta</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">dttm</span> <span class="o">-</span> <span class="bp">self</span><span class="o">.</span><span class="n">_schedule_interval</span>
+
+ <span class="nd">@property</span>
+ <span class="k">def</span> <span class="nf">tasks</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="nb">list</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">task_dict</span><span class="o">.</span><span class="n">values</span><span class="p">())</span>
+
+ <span class="nd">@tasks.setter</span>
+ <span class="k">def</span> <span class="nf">tasks</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">val</span><span class="p">):</span>
+ <span class="k">raise</span> <span class="ne">AttributeError</span><span class="p">(</span>
+ <span class="s1">'DAG.tasks can not be modified. Use dag.add_task() instead.'</span><span class="p">)</span>
+
+ <span class="nd">@property</span>
+ <span class="k">def</span> <span class="nf">task_ids</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="nb">list</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">task_dict</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span>
+
+ <span class="nd">@property</span>
+ <span class="k">def</span> <span class="nf">active_task_ids</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="nb">list</span><span class="p">(</span><span class="n">k</span> <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">task_dict</span><span class="o">.</span><span class="n">items</span><span class="p">()</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">v</span><span cla [...]
+
+ <span class="nd">@property</span>
+ <span class="k">def</span> <span class="nf">active_tasks</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="p">[</span><span class="n">t</span> <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">tasks</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">t</span><span class="o">.</span><span class="n">adhoc</span><span class="p">]</span>
+
+ <span class="nd">@property</span>
+ <span class="k">def</span> <span class="nf">filepath</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> File location of where the dag object is instantiated</span>
+<span class="sd"> """</span>
+ <span class="n">fn</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">full_filepath</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">DAGS_FOLDER</span> <span class="o">+</span> <span class="s1">'/'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span>
+ <span class="n">fn</span> <span class="o">=</span> <span class="n">fn</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">dirname</span><span class="p">(</span><span class="n">__file__</span><span class="p">)</span> <span class="o">+</span> <span class="s1">'/'</span><span class="p">,</span> <span class="s1">''</span>< [...]
+ <span class="k">return</span> <span class="n">fn</span>
+
+ <span class="nd">@property</span>
+ <span class="k">def</span> <span class="nf">folder</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Folder location of where the dag object is instantiated</span>
+<span class="sd"> """</span>
+ <span class="k">return</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">dirname</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">full_filepath</span><span class="p">)</span>
+
+ <span class="nd">@property</span>
+ <span class="k">def</span> <span class="nf">owner</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="s2">", "</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="nb">list</span><span class="p">(</span><span class="nb">set</span><span class="p">([</span><span class="n">t</span><span class="o">.</span><span class="n">owner</span> <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">tasks</span>< [...]
+
+ <span class="nd">@property</span>
+ <span class="nd">@provide_session</span>
+ <span class="k">def</span> <span class="nf">concurrency_reached</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">session</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Returns a boolean indicating whether the concurrency limit for this DAG</span>
+<span class="sd"> has been reached</span>
+<span class="sd"> """</span>
+ <span class="n">TI</span> <span class="o">=</span> <span class="n">TaskInstance</span>
+ <span class="n">qry</span> <span class="o">=</span> <span class="n">session</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">func</span><span class="o">.</span><span class="n">count</span><span class="p">(</span><span class="n">TI</span><span class="o">.</span><span class="n">task_id</span><span class="p">))</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span>
+ <span class="n">TI</span><span class="o">.</span><span class="n">dag_id</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">dag_id</span><span class="p">,</span>
+ <span class="n">TI</span><span class="o">.</span><span class="n">task_id</span><span class="o">.</span><span class="n">in_</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">task_ids</span><span class="p">),</span>
+ <span class="n">TI</span><span class="o">.</span><span class="n">state</span> <span class="o">==</span> <span class="n">State</span><span class="o">.</span><span class="n">RUNNING</span><span class="p">,</span>
+ <span class="p">)</span>
+ <span class="k">return</span> <span class="n">qry</span><span class="o">.</span><span class="n">scalar</span><span class="p">()</span> <span class="o">>=</span> <span class="bp">self</span><span class="o">.</span><span class="n">concurrency</span>
+
+ <span class="nd">@property</span>
+ <span class="nd">@provide_session</span>
+ <span class="k">def</span> <span class="nf">is_paused</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">session</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Returns a boolean indicating whether this DAG is paused</span>
+<span class="sd"> """</span>
+ <span class="n">qry</span> <span class="o">=</span> <span class="n">session</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">DagModel</span><span class="p">)</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span>
+ <span class="n">DagModel</span><span class="o">.</span><span class="n">dag_id</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">dag_id</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">qry</span><span class="o">.</span><span class="n">value</span><span class="p">(</span><span class="s1">'is_paused'</span><span class="p">)</span>
+
+ <span class="nd">@property</span>
+ <span class="k">def</span> <span class="nf">latest_execution_date</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Returns the latest date for which at least one task instance exists</span>
+<span class="sd"> """</span>
+ <span class="n">TI</span> <span class="o">=</span> <span class="n">TaskInstance</span>
+ <span class="n">session</span> <span class="o">=</span> <span class="n">settings</span><span class="o">.</span><span class="n">Session</span><span class="p">()</span>
+ <span class="n">execution_date</span> <span class="o">=</span> <span class="n">session</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">func</span><span class="o">.</span><span class="n">max</span><span class="p">(</span><span class="n">TI</span><span class="o">.</span><span class="n">execution_date</span><span class="p">))</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span>
+ <span class="n">TI</span><span class="o">.</span><span class="n">dag_id</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">dag_id</span><span class="p">,</span>
+ <span class="n">TI</span><span class="o">.</span><span class="n">task_id</span><span class="o">.</span><span class="n">in_</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">task_ids</span><span class="p">)</span>
+ <span class="p">)</span><span class="o">.</span><span class="n">scalar</span><span class="p">()</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
+ <span class="k">return</span> <span class="n">execution_date</span>
+
+ <span class="nd">@property</span>
+ <span class="k">def</span> <span class="nf">subdags</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Returns a list of the subdag objects associated to this DAG</span>
+<span class="sd"> """</span>
+ <span class="c1"># Check SubDag for class but don't check class directly, see</span>
+ <span class="c1"># https://github.com/airbnb/airflow/issues/1168</span>
+ <span class="n">l</span> <span class="o">=</span> <span class="p">[]</span>
+ <span class="k">for</span> <span class="n">task</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">tasks</span><span class="p">:</span>
+ <span class="k">if</span> <span class="p">(</span>
+ <span class="n">task</span><span class="o">.</span><span class="n">__class__</span><span class="o">.</span><span class="n">__name__</span> <span class="o">==</span> <span class="s1">'SubDagOperator'</span> <span class="ow">and</span>
+ <span class="nb">hasattr</span><span class="p">(</span><span class="n">task</span><span class="p">,</span> <span class="s1">'subdag'</span><span class="p">)):</span>
+ <span class="n">l</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">task</span><span class="o">.</span><span class="n">subdag</span><span class="p">)</span>
+ <span class="n">l</span> <span class="o">+=</span> <span class="n">task</span><span class="o">.</span><span class="n">subdag</span><span class="o">.</span><span class="n">subdags</span>
+ <span class="k">return</span> <span class="n">l</span>
+
+ <span class="k">def</span> <span class="nf">resolve_template_files</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">tasks</span><span class="p">:</span>
+ <span class="n">t</span><span class="o">.</span><span class="n">resolve_template_files</span><span class="p">()</span>
+
+<div class="viewcode-block" id="DAG.crawl_for_tasks"><a class="viewcode-back" href="../../code.html#airflow.models.DAG.crawl_for_tasks">[docs]</a> <span class="k">def</span> <span class="nf">crawl_for_tasks</span><span class="p">(</span><span class="n">objects</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Typically called at the end of a script by passing globals() as a</span>
+<span class="sd"> parameter. This allows to not explicitly add every single task to the</span>
+<span class="sd"> dag explicitly.</span>
+<span class="sd"> """</span>
+ <span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">""</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="DAG.get_template_env"><a class="viewcode-back" href="../../code.html#airflow.models.DAG.get_template_env">[docs]</a> <span class="k">def</span> <span class="nf">get_template_env</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">'''</span>
+<span class="sd"> Returns a jinja2 Environment while taking into account the DAGs</span>
+<span class="sd"> template_searchpath and user_defined_macros</span>
+<span class="sd"> '''</span>
+ <span class="n">searchpath</span> <span class="o">=</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">folder</span><span class="p">]</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">template_searchpath</span><span class="p">:</span>
+ <span class="n">searchpath</span> <span class="o">+=</span> <span class="bp">self</span><span class="o">.</span><span class="n">template_searchpath</span>
+
+ <span class="n">env</span> <span class="o">=</span> <span class="n">jinja2</span><span class="o">.</span><span class="n">Environment</span><span class="p">(</span>
+ <span class="n">loader</span><span class="o">=</span><span class="n">jinja2</span><span class="o">.</span><span class="n">FileSystemLoader</span><span class="p">(</span><span class="n">searchpath</span><span class="p">),</span>
+ <span class="n">extensions</span><span class="o">=</span><span class="p">[</span><span class="s2">"jinja2.ext.do"</span><span class="p">],</span>
+ <span class="n">cache_size</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">user_defined_macros</span><span class="p">:</span>
+ <span class="n">env</span><span class="o">.</span><span class="n">globals</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">user_defined_macros</span><span class="p">)</span>
+
+ <span class="k">return</span> <span class="n">env</span></div>
+
+<div class="viewcode-block" id="DAG.set_dependency"><a class="viewcode-back" href="../../code.html#airflow.models.DAG.set_dependency">[docs]</a> <span class="k">def</span> <span class="nf">set_dependency</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">upstream_task_id</span><span class="p">,</span> <span class="n">downstream_task_id</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Simple utility method to set dependency between two tasks that</span>
+<span class="sd"> already have been added to the DAG using add_task()</span>
+<span class="sd"> """</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">get_task</span><span class="p">(</span><span class="n">upstream_task_id</span><span class="p">)</span><span class="o">.</span><span class="n">set_downstream</span><span class="p">(</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">get_task</span><span class="p">(</span><span class="n">downstream_task_id</span><span class="p">))</span></div>
+
+ <span class="k">def</span> <span class="nf">get_task_instances</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span> <span class="n">session</span><span class="p">,</span> <span class="n">start_date</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="n">end_date</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="n">state</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="n">TI</span> <span class="o">=</span> <span class="n">TaskInstance</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">start_date</span><span class="p">:</span>
+ <span class="n">start_date</span> <span class="o">=</span> <span class="p">(</span><span class="n">datetime</span><span class="o">.</span><span class="n">today</span><span class="p">()</span><span class="o">-</span><span class="n">timedelta</span><span class="p">(</span><span class="mi">30</span><span class="p">))</span><span class="o">.</span><span class="n">date</span><span class="p">()</span>
+ <span class="n">start_date</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">combine</span><span class="p">(</span><span class="n">start_date</span><span class="p">,</span> <span class="n">datetime</span><span class="o">.</span><span class="n">min</span><span class="o">.</span><span class="n">time</span><span class="p">())</span>
+ <span class="n">end_date</span> <span class="o">=</span> <span class="n">end_date</span> <span class="ow">or</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span>
+ <span class="n">tis</span> <span class="o">=</span> <span class="n">session</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">TI</span><span class="p">)</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span>
+ <span class="n">TI</span><span class="o">.</span><span class="n">dag_id</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">dag_id</span><span class="p">,</span>
+ <span class="n">TI</span><span class="o">.</span><span class="n">execution_date</span> <span class="o">>=</span> <span class="n">start_date</span><span class="p">,</span>
+ <span class="n">TI</span><span class="o">.</span><span class="n">execution_date</span> <span class="o"><=</span> <span class="n">end_date</span><span class="p">,</span>
+ <span class="n">TI</span><span class="o">.</span><span class="n">task_id</span><span class="o">.</span><span class="n">in_</span><span class="p">([</span><span class="n">t</span><span class="o">.</span><span class="n">task_id</span> <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">tasks</span><span class="p">]),</span>
+ <span class="p">)</span>
+ <span class="k">if</span> <span class="n">state</span><span class="p">:</span>
+ <span class="n">tis</span> <span class="o">=</span> <span class="n">tis</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">TI</span><span class="o">.</span><span class="n">state</span> <span class="o">==</span> <span class="n">state</span><span class="p">)</span>
+ <span class="n">tis</span> <span class="o">=</span> <span class="n">tis</span><span class="o">.</span><span class="n">all</span><span class="p">()</span>
+ <span class="k">return</span> <span class="n">tis</span>
+
+ <span class="nd">@property</span>
+ <span class="k">def</span> <span class="nf">roots</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="p">[</span><span class="n">t</span> <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">tasks</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">t</span><span class="o">.</span><span class="n">downstream_list</span><span class="p">]</span>
+
+ <span class="nd">@provide_session</span>
+ <span class="k">def</span> <span class="nf">set_dag_runs_state</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span> <span class="n">start_date</span><span class="p">,</span> <span class="n">end_date</span><span class="p">,</span> <span class="n">state</span><span class="o">=</span><span class="n">State</span><span class="o">.</span><span class="n">RUNNING</span><span class="p">,</span> <span class="n">session</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="n">dates</span> <span class="o">=</span> <span class="n">utils_date_range</span><span class="p">(</span><span class="n">start_date</span><span class="p">,</span> <span class="n">end_date</span><span class="p">)</span>
+ <span class="n">drs</span> <span class="o">=</span> <span class="n">session</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">DagModel</span><span class="p">)</span><span class="o">.</span><span class="n">filter_by</span><span class="p">(</span><span class="n">dag_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">dag_id</span><span class="p">)</span><span class="o">.</span><span cla [...]
+ <span class="k">for</span> <span class="n">dr</span> <span class="ow">in</span> <span class="n">drs</span><span class="p">:</span>
+ <span class="n">dr</span><span class="o">.</span><span class="n">state</span> <span class="o">=</span> <span class="n">State</span><span class="o">.</span><span class="n">RUNNING</span>
+
+ <span class="k">def</span> <span class="nf">clear</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span> <span class="n">start_date</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="n">end_date</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">only_failed</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="n">only_running</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="n">confirm_prompt</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="n">include_subdags</span><span class="o">=</span><span class="bp">True</span><span class="p">,</span>
+ <span class="n">reset_dag_runs</span><span class="o">=</span><span class="bp">True</span><span class="p">,</span>
+ <span class="n">dry_run</span><span class="o">=</span><span class="bp">False</span><span class="p">):</span>
+ <span class="n">session</span> <span class="o">=</span> <span class="n">settings</span><span class="o">.</span><span class="n">Session</span><span class="p">()</span>
+ <span class="sd">"""</span>
+<span class="sd"> Clears a set of task instances associated with the current dag for</span>
+<span class="sd"> a specified date range.</span>
+<span class="sd"> """</span>
+ <span class="n">TI</span> <span class="o">=</span> <span class="n">TaskInstance</span>
+ <span class="n">tis</span> <span class="o">=</span> <span class="n">session</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">TI</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">include_subdags</span><span class="p">:</span>
+ <span class="c1"># Crafting the right filter for dag_id and task_ids combo</span>
+ <span class="n">conditions</span> <span class="o">=</span> <span class="p">[]</span>
+ <span class="k">for</span> <span class="n">dag</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">subdags</span> <span class="o">+</span> <span class="p">[</span><span class="bp">self</span><span class="p">]:</span>
+ <span class="n">conditions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span>
+ <span class="n">TI</span><span class="o">.</span><span class="n">dag_id</span><span class="o">.</span><span class="n">like</span><span class="p">(</span><span class="n">dag</span><span class="o">.</span><span class="n">dag_id</span><span class="p">)</span> <span class="o">&</span> <span class="n">TI</span><span class="o">.</span><span class="n">task_id</span><span class="o">.</span><span class="n">in_</span><span class="p">(</span><span class="n">dag</span><span c [...]
+ <span class="p">)</span>
+ <span class="n">tis</span> <span class="o">=</span> <span class="n">tis</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">or_</span><span class="p">(</span><span class="o">*</span><span class="n">conditions</span><span class="p">))</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">tis</span> <span class="o">=</span> <span class="n">session</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">TI</span><span class="p">)</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">TI</span><span class="o">.</span><span class="n">dag_id</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">dag_id</span><span clas [...]
+ <span class="n">tis</span> <span class="o">=</span> <span class="n">tis</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">TI</span><span class="o">.</span><span class="n">task_id</span><span class="o">.</span><span class="n">in_</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">task_ids</span><span class="p">))</span>
+
+ <span class="k">if</span> <span class="n">start_date</span><span class="p">:</span>
+ <span class="n">tis</span> <span class="o">=</span> <span class="n">tis</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">TI</span><span class="o">.</span><span class="n">execution_date</span> <span class="o">>=</span> <span class="n">start_date</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">end_date</span><span class="p">:</span>
+ <span class="n">tis</span> <span class="o">=</span> <span class="n">tis</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">TI</span><span class="o">.</span><span class="n">execution_date</span> <span class="o"><=</span> <span class="n">end_date</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">only_failed</span><span class="p">:</span>
+ <span class="n">tis</span> <span class="o">=</span> <span class="n">tis</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">TI</span><span class="o">.</span><span class="n">state</span> <span class="o">==</span> <span class="n">State</span><span class="o">.</span><span class="n">FAILED</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">only_running</span><span class="p">:</span>
+ <span class="n">tis</span> <span class="o">=</span> <span class="n">tis</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">TI</span><span class="o">.</span><span class="n">state</span> <span class="o">==</span> <span class="n">State</span><span class="o">.</span><span class="n">RUNNING</span><span class="p">)</span>
+
+ <span class="k">if</span> <span class="n">dry_run</span><span class="p">:</span>
+ <span class="n">tis</span> <span class="o">=</span> <span class="n">tis</span><span class="o">.</span><span class="n">all</span><span class="p">()</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">expunge_all</span><span class="p">()</span>
+ <span class="k">return</span> <span class="n">tis</span>
+
+ <span class="n">count</span> <span class="o">=</span> <span class="n">tis</span><span class="o">.</span><span class="n">count</span><span class="p">()</span>
+ <span class="n">do_it</span> <span class="o">=</span> <span class="bp">True</span>
+ <span class="k">if</span> <span class="n">count</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
+ <span class="k">print</span><span class="p">(</span><span class="s2">"Nothing to clear."</span><span class="p">)</span>
+ <span class="k">return</span> <span class="mi">0</span>
+ <span class="k">if</span> <span class="n">confirm_prompt</span><span class="p">:</span>
+ <span class="n">ti_list</span> <span class="o">=</span> <span class="s2">"</span><span class="se">\n</span><span class="s2">"</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="nb">str</span><span class="p">(</span><span class="n">t</span><span class="p">)</span> <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="n">tis</span><span class="p">])</span>
+ <span class="n">question</span> <span class="o">=</span> <span class="p">(</span>
+ <span class="s2">"You are about to delete these {count} tasks:</span><span class="se">\n</span><span class="s2">"</span>
+ <span class="s2">"{ti_list}</span><span class="se">\n\n</span><span class="s2">"</span>
+ <span class="s2">"Are you sure? (yes/no): "</span><span class="p">)</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="nb">locals</span><span class="p">())</span>
+ <span class="n">do_it</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">helpers</span><span class="o">.</span><span class="n">ask_yesno</span><span class="p">(</span><span class="n">question</span><span class="p">)</span>
+
+ <span class="k">if</span> <span class="n">do_it</span><span class="p">:</span>
+ <span class="n">clear_task_instances</span><span class="p">(</span><span class="n">tis</span><span class="p">,</span> <span class="n">session</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">reset_dag_runs</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">set_dag_runs_state</span><span class="p">(</span><span class="n">start_date</span><span class="p">,</span> <span class="n">end_date</span><span class="p">,</span> <span class="n">session</span><span class="o">=</span><span class="n">session</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">count</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="k">print</span><span class="p">(</span><span class="s2">"Bail. Nothing was cleared."</span><span class="p">)</span>
+
+ <span class="n">session</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
+ <span class="k">return</span> <span class="n">count</span>
+
+ <span class="k">def</span> <span class="nf">__deepcopy__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">memo</span><span class="p">):</span>
+ <span class="c1"># Swiwtcharoo to go around deepcopying objects coming through the</span>
+ <span class="c1"># backdoor</span>
+ <span class="n">cls</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">__class__</span>
+ <span class="n">result</span> <span class="o">=</span> <span class="n">cls</span><span class="o">.</span><span class="n">__new__</span><span class="p">(</span><span class="n">cls</span><span class="p">)</span>
+ <span class="n">memo</span><span class="p">[</span><span class="nb">id</span><span class="p">(</span><span class="bp">self</span><span class="p">)]</span> <span class="o">=</span> <span class="n">result</span>
+ <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="nb">list</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">__dict__</span><span class="o">.</span><span class="n">items</span><span class="p">()):</span>
+ <span class="k">if</span> <span class="n">k</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">(</span><span class="s1">'user_defined_macros'</span><span class="p">,</span> <span class="s1">'params'</span><span class="p">):</span>
+ <span class="nb">setattr</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="n">k</span><span class="p">,</span> <span class="n">copy</span><span class="o">.</span><span class="n">deepcopy</span><span class="p">(</span><span class="n">v</span><span class="p">,</span> <span class="n">memo</span><span class="p">))</span>
+
+ <span class="n">result</span><span class="o">.</span><span class="n">user_defined_macros</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">user_defined_macros</span>
+ <span class="n">result</span><span class="o">.</span><span class="n">params</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">params</span>
+ <span class="k">return</span> <span class="n">result</span>
+
+<div class="viewcode-block" id="DAG.sub_dag"><a class="viewcode-back" href="../../code.html#airflow.models.DAG.sub_dag">[docs]</a> <span class="k">def</span> <span class="nf">sub_dag</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">task_regex</span><span class="p">,</span> <span class="n">include_downstream</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="n">include_upstream</span><span class="o">=</span><span class="bp">True</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Returns a subset of the current dag as a deep copy of the current dag</span>
+<span class="sd"> based on a regex that should match one or many tasks, and includes</span>
+<span class="sd"> upstream and downstream neighbours based on the flag passed.</span>
+<span class="sd"> """</span>
+
+ <span class="n">dag</span> <span class="o">=</span> <span class="n">copy</span><span class="o">.</span><span class="n">deepcopy</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
+
+ <span class="n">regex_match</span> <span class="o">=</span> <span class="p">[</span>
+ <span class="n">t</span> <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="n">dag</span><span class="o">.</span><span class="n">tasks</span> <span class="k">if</span> <span class="n">re</span><span class="o">.</span><span class="n">findall</span><span class="p">(</span><span class="n">task_regex</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">task_id</span><span class="p">)]</span>
+ <span class="n">also_include</span> <span class="o">=</span> <span class="p">[]</span>
+ <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="n">regex_match</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">include_downstream</span><span class="p">:</span>
+ <span class="n">also_include</span> <span class="o">+=</span> <span class="n">t</span><span class="o">.</span><span class="n">get_flat_relatives</span><span class="p">(</span><span class="n">upstream</span><span class="o">=</span><span class="bp">False</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">include_upstream</span><span class="p">:</span>
+ <span class="n">also_include</span> <span class="o">+=</span> <span class="n">t</span><span class="o">.</span><span class="n">get_flat_relatives</span><span class="p">(</span><span class="n">upstream</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+
+ <span class="c1"># Compiling the unique list of tasks that made the cut</span>
+ <span class="n">dag</span><span class="o">.</span><span class="n">task_dict</span> <span class="o">=</span> <span class="p">{</span><span class="n">t</span><span class="o">.</span><span class="n">task_id</span><span class="p">:</span> <span class="n">t</span> <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="n">regex_match</span> <span class="o">+</span> <span class="n">also_include</span><span class="p">}</span>
+ <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="n">dag</span><span class="o">.</span><span class="n">tasks</span><span class="p">:</span>
+ <span class="c1"># Removing upstream/downstream references to tasks that did not</span>
+ <span class="c1"># made the cut</span>
+ <span class="n">t</span><span class="o">.</span><span class="n">_upstream_task_ids</span> <span class="o">=</span> <span class="p">[</span>
+ <span class="n">tid</span> <span class="k">for</span> <span class="n">tid</span> <span class="ow">in</span> <span class="n">t</span><span class="o">.</span><span class="n">_upstream_task_ids</span> <span class="k">if</span> <span class="n">tid</span> <span class="ow">in</span> <span class="n">dag</span><span class="o">.</span><span class="n">task_ids</span><span class="p">]</span>
+ <span class="n">t</span><span class="o">.</span><span class="n">_downstream_task_ids</span> <span class="o">=</span> <span class="p">[</span>
+ <span class="n">tid</span> <span class="k">for</span> <span class="n">tid</span> <span class="ow">in</span> <span class="n">t</span><span class="o">.</span><span class="n">_downstream_task_ids</span> <span class="k">if</span> <span class="n">tid</span> <span class="ow">in</span> <span class="n">dag</span><span class="o">.</span><span class="n">task_ids</span><span class="p">]</span>
+ <span class="k">return</span> <span class="n">dag</span></div>
+
+ <span class="k">def</span> <span class="nf">has_task</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">task_id</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">task_id</span> <span class="ow">in</span> <span class="p">(</span><span class="n">t</span><span class="o">.</span><span class="n">task_id</span> <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">tasks</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">get_task</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">task_id</span><span class="p">):</span>
+ <span class="k">if</span> <span class="n">task_id</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">task_dict</span><span class="p">:</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">task_dict</span><span class="p">[</span><span class="n">task_id</span><span class="p">]</span>
+ <span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span><span class="s2">"Task {task_id} not found"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="nb">locals</span><span class="p">()))</span>
+
+ <span class="nd">@provide_session</span>
+ <span class="k">def</span> <span class="nf">pickle_info</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">session</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="n">d</span> <span class="o">=</span> <span class="p">{}</span>
+ <span class="n">d</span><span class="p">[</span><span class="s1">'is_picklable'</span><span class="p">]</span> <span class="o">=</span> <span class="bp">True</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="n">dttm</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span>
+ <span class="n">pickled</span> <span class="o">=</span> <span class="n">pickle</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
+ <span class="n">d</span><span class="p">[</span><span class="s1">'pickle_len'</span><span class="p">]</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">pickled</span><span class="p">)</span>
+ <span class="n">d</span><span class="p">[</span><span class="s1">'pickling_duration'</span><span class="p">]</span> <span class="o">=</span> <span class="s2">"{}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span> <span class="o">-</span> <span class="n">dttm</span><span class="p">)</span>
+ <span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">exception</span><span class="p">(</span><span class="n">e</span><span class="p">)</span>
+ <span class="n">d</span><span class="p">[</span><span class="s1">'is_picklable'</span><span class="p">]</span> <span class="o">=</span> <span class="bp">False</span>
+ <span class="n">d</span><span class="p">[</span><span class="s1">'stacktrace'</span><span class="p">]</span> <span class="o">=</span> <span class="n">traceback</span><span class="o">.</span><span class="n">format_exc</span><span class="p">()</span>
+ <span class="k">return</span> <span class="n">d</span>
+
+ <span class="nd">@provide_session</span>
+ <span class="k">def</span> <span class="nf">pickle</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">session</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="n">dag</span> <span class="o">=</span> <span class="n">session</span><span class="o">.</span><span class="n">query</span><span class="p">(</span>
+ <span class="n">DagModel</span><span class="p">)</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">DagModel</span><span class="o">.</span><span class="n">dag_id</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">dag_id</span><span class="p">)</span><span class="o">.</span><span class="n">first</span><span class="p">()</span>
+ <span class="n">dp</span> <span class="o">=</span> <span class="bp">None</span>
+ <span class="k">if</span> <span class="n">dag</span> <span class="ow">and</span> <span class="n">dag</span><span class="o">.</span><span class="n">pickle_id</span><span class="p">:</span>
+ <span class="n">dp</span> <span class="o">=</span> <span class="n">session</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">DagPickle</span><span class="p">)</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span>
+ <span class="n">DagPickle</span><span class="o">.</span><span class="n">id</span> <span class="o">==</span> <span class="n">dag</span><span class="o">.</span><span class="n">pickle_id</span><span class="p">)</span><span class="o">.</span><span class="n">first</span><span class="p">()</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">dp</span> <span class="ow">or</span> <span class="n">dp</span><span class="o">.</span><span class="n">pickle</span> <span class="o">!=</span> <span class="bp">self</span><span class="p">:</span>
+ <span class="n">dp</span> <span class="o">=</span> <span class="n">DagPickle</span><span class="p">(</span><span class="n">dag</span><span class="o">=</span><span class="bp">self</span><span class="p">)</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">dp</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">last_pickled</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">pickle_id</span> <span class="o">=</span> <span class="n">dp</span><span class="o">.</span><span class="n">id</span>
+
+ <span class="k">return</span> <span class="n">dp</span>
+
+<div class="viewcode-block" id="DAG.tree_view"><a class="viewcode-back" href="../../code.html#airflow.models.DAG.tree_view">[docs]</a> <span class="k">def</span> <span class="nf">tree_view</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Shows an ascii tree representation of the DAG</span>
+<span class="sd"> """</span>
+ <span class="k">def</span> <span class="nf">get_downstream</span><span class="p">(</span><span class="n">task</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span>
+ <span class="k">print</span><span class="p">((</span><span class="s2">" "</span> <span class="o">*</span> <span class="n">level</span> <span class="o">*</span> <span class="mi">4</span><span class="p">)</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">task</span><span class="p">))</span>
+ <span class="n">level</span> <span class="o">+=</span> <span class="mi">1</span>
+ <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="n">task</span><span class="o">.</span><span class="n">upstream_list</span><span class="p">:</span>
+ <span class="n">get_downstream</span><span class="p">(</span><span class="n">t</span><span class="p">,</span> <span class="n">level</span><span class="p">)</span>
+
+ <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">roots</span><span class="p">:</span>
+ <span class="n">get_downstream</span><span class="p">(</span><span class="n">t</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="DAG.add_task"><a class="viewcode-back" href="../../code.html#airflow.models.DAG.add_task">[docs]</a> <span class="k">def</span> <span class="nf">add_task</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">task</span><span class="p">):</span>
+ <span class="sd">'''</span>
+<span class="sd"> Add a task to the DAG</span>
+
+<span class="sd"> :param task: the task you want to add</span>
+<span class="sd"> :type task: task</span>
+<span class="sd"> '''</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">start_date</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">task</span><span class="o">.</span><span class="n">start_date</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span><span class="s2">"Task is missing the start_date parameter"</span><span class="p">)</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">task</span><span class="o">.</span><span class="n">start_date</span><span class="p">:</span>
+ <span class="n">task</span><span class="o">.</span><span class="n">start_date</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">start_date</span>
+
+ <span class="k">if</span> <span class="n">task</span><span class="o">.</span><span class="n">task_id</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">task_dict</span><span class="p">:</span>
+ <span class="c1">#TODO raise an error in Airflow 2.0</span>
+ <span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span>
+ <span class="s1">'The requested task could not be added to the DAG because a '</span>
+ <span class="s1">'task with task_id {} is already in the DAG. Starting in '</span>
+ <span class="s1">'Airflow 2.0, trying to overwrite a task will raise an '</span>
+ <span class="s1">'exception.'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">task</span><span class="o">.</span><span class="n">task_id</span><span class="p">),</span>
+ <span class="n">category</span><span class="o">=</span><span class="ne">PendingDeprecationWarning</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">tasks</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">task</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">task_dict</span><span class="p">[</span><span class="n">task</span><span class="o">.</span><span class="n">task_id</span><span class="p">]</span> <span class="o">=</span> <span class="n">task</span>
+ <span class="n">task</span><span class="o">.</span><span class="n">dag</span> <span class="o">=</span> <span class="bp">self</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">task_count</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">tasks</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="DAG.add_tasks"><a class="viewcode-back" href="../../code.html#airflow.models.DAG.add_tasks">[docs]</a> <span class="k">def</span> <span class="nf">add_tasks</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tasks</span><span class="p">):</span>
+ <span class="sd">'''</span>
+<span class="sd"> Add a list of tasks to the DAG</span>
+
+<span class="sd"> :param task: a lit of tasks you want to add</span>
+<span class="sd"> :type task: list of tasks</span>
+<span class="sd"> '''</span>
+ <span class="k">for</span> <span class="n">task</span> <span class="ow">in</span> <span class="n">tasks</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">add_task</span><span class="p">(</span><span class="n">task</span><span class="p">)</span></div>
+
+ <span class="k">def</span> <span class="nf">db_merge</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="n">BO</span> <span class="o">=</span> <span class="n">BaseOperator</span>
+ <span class="n">session</span> <span class="o">=</span> <span class="n">settings</span><span class="o">.</span><span class="n">Session</span><span class="p">()</span>
+ <span class="n">tasks</span> <span class="o">=</span> <span class="n">session</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">BO</span><span class="p">)</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">BO</span><span class="o">.</span><span class="n">dag_id</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">dag_id</span><span class= [...]
+ <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="n">tasks</span><span class="p">:</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">delete</span><span class="p">(</span><span class="n">t</span><span class="p">)</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">merge</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span>
+
+<div class="viewcode-block" id="DAG.run"><a class="viewcode-back" href="../../code.html#airflow.models.DAG.run">[docs]</a> <span class="k">def</span> <span class="nf">run</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span>
+ <span class="n">start_date</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">end_date</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">mark_success</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="n">include_adhoc</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="n">local</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="n">executor</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">donot_pickle</span><span class="o">=</span><span class="n">configuration</span><span class="o">.</span><span class="n">getboolean</span><span class="p">(</span><span class="s1">'core'</span><span class="p">,</span> <span class="s1">'donot_pickle'</span><span class="p">),</span>
+ <span class="n">ignore_dependencies</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="n">ignore_first_depends_on_past</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="n">pool</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Runs the DAG.</span>
+<span class="sd"> """</span>
+ <span class="kn">from</span> <span class="nn">airflow.jobs</span> <span class="kn">import</span> <span class="n">BackfillJob</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">executor</span> <span class="ow">and</span> <span class="n">local</span><span class="p">:</span>
+ <span class="n">executor</span> <span class="o">=</span> <span class="n">LocalExecutor</span><span class="p">()</span>
+ <span class="k">elif</span> <span class="ow">not</span> <span class="n">executor</span><span class="p">:</span>
+ <span class="n">executor</span> <span class="o">=</span> <span class="n">DEFAULT_EXECUTOR</span>
+ <span class="n">job</span> <span class="o">=</span> <span class="n">BackfillJob</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span>
+ <span class="n">start_date</span><span class="o">=</span><span class="n">start_date</span><span class="p">,</span>
+ <span class="n">end_date</span><span class="o">=</span><span class="n">end_date</span><span class="p">,</span>
+ <span class="n">mark_success</span><span class="o">=</span><span class="n">mark_success</span><span class="p">,</span>
+ <span class="n">include_adhoc</span><span class="o">=</span><span class="n">include_adhoc</span><span class="p">,</span>
+ <span class="n">executor</span><span class="o">=</span><span class="n">executor</span><span class="p">,</span>
+ <span class="n">donot_pickle</span><span class="o">=</span><span class="n">donot_pickle</span><span class="p">,</span>
+ <span class="n">ignore_dependencies</span><span class="o">=</span><span class="n">ignore_dependencies</span><span class="p">,</span>
+ <span class="n">ignore_first_depends_on_past</span><span class="o">=</span><span class="n">ignore_first_depends_on_past</span><span class="p">,</span>
+ <span class="n">pool</span><span class="o">=</span><span class="n">pool</span><span class="p">)</span>
+ <span class="n">job</span><span class="o">.</span><span class="n">run</span><span class="p">()</span></div>
+
+<div class="viewcode-block" id="DAG.cli"><a class="viewcode-back" href="../../code.html#airflow.models.DAG.cli">[docs]</a> <span class="k">def</span> <span class="nf">cli</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Exposes a CLI specific to this DAG</span>
+<span class="sd"> """</span>
+ <span class="kn">from</span> <span class="nn">airflow.bin</span> <span class="kn">import</span> <span class="n">cli</span>
+ <span class="n">parser</span> <span class="o">=</span> <span class="n">cli</span><span class="o">.</span><span class="n">CLIFactory</span><span class="o">.</span><span class="n">get_parser</span><span class="p">(</span><span class="n">dag_parser</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+ <span class="n">args</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">parse_args</span><span class="p">()</span>
+ <span class="n">args</span><span class="o">.</span><span class="n">func</span><span class="p">(</span><span class="n">args</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span></div>
+
+ <span class="nd">@provide_session</span>
+<div class="viewcode-block" id="DAG.create_dagrun"><a class="viewcode-back" href="../../code.html#airflow.models.DAG.create_dagrun">[docs]</a> <span class="k">def</span> <span class="nf">create_dagrun</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
+ <span class="n">run_id</span><span class="p">,</span>
+ <span class="n">execution_date</span><span class="p">,</span>
+ <span class="n">state</span><span class="p">,</span>
+ <span class="n">start_date</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">external_trigger</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="n">conf</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">session</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Creates a dag run from this dag including the tasks associated with this dag. Returns the dag</span>
+<span class="sd"> run.</span>
+<span class="sd"> :param run_id: defines the the run id for this dag run</span>
+<span class="sd"> :type run_id: string</span>
+<span class="sd"> :param execution_date: the execution date of this dag run</span>
+<span class="sd"> :type execution_date: datetime</span>
+<span class="sd"> :param state: the state of the dag run</span>
+<span class="sd"> :type state: State</span>
+<span class="sd"> :param start_date: the date this dag run should be evaluated</span>
+<span class="sd"> :type state_date: datetime</span>
+<span class="sd"> :param external_trigger: whether this dag run is externally triggered</span>
+<span class="sd"> :type external_trigger: bool</span>
+<span class="sd"> :param session: database session</span>
+<span class="sd"> :type session: Session</span>
+<span class="sd"> """</span>
+ <span class="n">run</span> <span class="o">=</span> <span class="n">DagRun</span><span class="p">(</span>
+ <span class="n">dag_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">dag_id</span><span class="p">,</span>
+ <span class="n">run_id</span><span class="o">=</span><span class="n">run_id</span><span class="p">,</span>
+ <span class="n">execution_date</span><span class="o">=</span><span class="n">execution_date</span><span class="p">,</span>
+ <span class="n">start_date</span><span class="o">=</span><span class="n">start_date</span><span class="p">,</span>
+ <span class="n">external_trigger</span><span class="o">=</span><span class="n">external_trigger</span><span class="p">,</span>
+ <span class="n">conf</span><span class="o">=</span><span class="n">conf</span><span class="p">,</span>
+ <span class="n">state</span><span class="o">=</span><span class="n">state</span>
+ <span class="p">)</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">run</span><span class="p">)</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span>
+
+ <span class="n">run</span><span class="o">.</span><span class="n">dag</span> <span class="o">=</span> <span class="bp">self</span>
+
+ <span class="c1"># create the associated taskinstances</span>
+ <span class="c1"># state is None at the moment of creation</span>
+ <span class="n">run</span><span class="o">.</span><span class="n">verify_integrity</span><span class="p">(</span><span class="n">session</span><span class="o">=</span><span class="n">session</span><span class="p">)</span>
+
+ <span class="n">run</span><span class="o">.</span><span class="n">refresh_from_db</span><span class="p">()</span>
+ <span class="k">return</span> <span class="n">run</span></div></div>
+
+
+<span class="k">class</span> <span class="nc">Chart</span><span class="p">(</span><span class="n">Base</span><span class="p">):</span>
+ <span class="n">__tablename__</span> <span class="o">=</span> <span class="s2">"chart"</span>
+
+ <span class="nb">id</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Integer</span><span class="p">,</span> <span class="n">primary_key</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+ <span class="n">label</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">String</span><span class="p">(</span><span class="mi">200</span><span class="p">))</span>
+ <span class="n">conn_id</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">String</span><span class="p">(</span><span class="n">ID_LEN</span><span class="p">),</span> <span class="n">nullable</span><span class="o">=</span><span class="bp">False</span><span class="p">)</span>
+ <span class="n">user_id</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Integer</span><span class="p">(),</span> <span class="n">ForeignKey</span><span class="p">(</span><span class="s1">'users.id'</span><span class="p">),</span> <span class="n">nullable</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+ <span class="n">chart_type</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">String</span><span class="p">(</span><span class="mi">100</span><span class="p">),</span> <span class="n">default</span><span class="o">=</span><span class="s2">"line"</span><span class="p">)</span>
+ <span class="n">sql_layout</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">String</span><span class="p">(</span><span class="mi">50</span><span class="p">),</span> <span class="n">default</span><span class="o">=</span><span class="s2">"series"</span><span class="p">)</span>
+ <span class="n">sql</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Text</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="s2">"SELECT series, x, y FROM table"</span><span class="p">)</span>
+ <span class="n">y_log_scale</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Boolean</span><span class="p">)</span>
+ <span class="n">show_datatable</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Boolean</span><span class="p">)</span>
+ <span class="n">show_sql</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Boolean</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+ <span class="n">height</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Integer</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="mi">600</span><span class="p">)</span>
+ <span class="n">default_params</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">String</span><span class="p">(</span><span class="mi">5000</span><span class="p">),</span> <span class="n">default</span><span class="o">=</span><span class="s2">"{}"</span><span class="p">)</span>
+ <span class="n">owner</span> <span class="o">=</span> <span class="n">relationship</span><span class="p">(</span>
+ <span class="s2">"User"</span><span class="p">,</span> <span class="n">cascade</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span> <span class="n">cascade_backrefs</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span> <span class="n">backref</span><span class="o">=</span><span class="s1">'charts'</span><span class="p">)</span>
+ <span class="n">x_is_date</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Boolean</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+ <span class="n">iteration_no</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Integer</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+ <span class="n">last_modified</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">DateTime</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="n">func</span><span class="o">.</span><span class="n">now</span><span class="p">())</span>
+
+ <span class="k">def</span> <span class="nf">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">label</span>
+
+
+<span class="k">class</span> <span class="nc">KnownEventType</span><span class="p">(</span><span class="n">Base</span><span class="p">):</span>
+ <span class="n">__tablename__</span> <span class="o">=</span> <span class="s2">"known_event_type"</span>
+
+ <span class="nb">id</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Integer</span><span class="p">,</span> <span class="n">primary_key</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+ <span class="n">know_event_type</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">String</span><span class="p">(</span><span class="mi">200</span><span class="p">))</span>
+
+ <span class="k">def</span> <span class="nf">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">know_event_type</span>
+
+
+<span class="k">class</span> <span class="nc">KnownEvent</span><span class="p">(</span><span class="n">Base</span><span class="p">):</span>
+ <span class="n">__tablename__</span> <span class="o">=</span> <span class="s2">"known_event"</span>
+
+ <span class="nb">id</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Integer</span><span class="p">,</span> <span class="n">primary_key</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+ <span class="n">label</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">String</span><span class="p">(</span><span class="mi">200</span><span class="p">))</span>
+ <span class="n">start_date</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">DateTime</span><span class="p">)</span>
+ <span class="n">end_date</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">DateTime</span><span class="p">)</span>
+ <span class="n">user_id</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Integer</span><span class="p">(),</span> <span class="n">ForeignKey</span><span class="p">(</span><span class="s1">'users.id'</span><span class="p">),)</span>
+ <span class="n">known_event_type_id</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Integer</span><span class="p">(),</span> <span class="n">ForeignKey</span><span class="p">(</span><span class="s1">'known_event_type.id'</span><span class="p">),)</span>
+ <span class="n">reported_by</span> <span class="o">=</span> <span class="n">relationship</span><span class="p">(</span>
+ <span class="s2">"User"</span><span class="p">,</span> <span class="n">cascade</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span> <span class="n">cascade_backrefs</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span> <span class="n">backref</span><span class="o">=</span><span class="s1">'known_events'</span><span class="p">)</span>
+ <span class="n">event_type</span> <span class="o">=</span> <span class="n">relationship</span><span class="p">(</span>
+ <span class="s2">"KnownEventType"</span><span class="p">,</span>
+ <span class="n">cascade</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="n">cascade_backrefs</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span> <span class="n">backref</span><span class="o">=</span><span class="s1">'known_events'</span><span class="p">)</span>
+ <span class="n">description</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Text</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">label</span>
+
+
+<span class="k">class</span> <span class="nc">Variable</span><span class="p">(</span><span class="n">Base</span><span class="p">):</span>
+ <span class="n">__tablename__</span> <span class="o">=</span> <span class="s2">"variable"</span>
+
+ <span class="nb">id</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Integer</span><span class="p">,</span> <span class="n">primary_key</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+ <span class="n">key</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">String</span><span class="p">(</span><span class="n">ID_LEN</span><span class="p">),</span> <span class="n">unique</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+ <span class="n">_val</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="s1">'val'</span><span class="p">,</span> <span class="n">Text</span><span class="p">)</span>
+ <span class="n">is_encrypted</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Boolean</span><span class="p">,</span> <span class="n">unique</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="bp">False</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="c1"># Hiding the value</span>
+ <span class="k">return</span> <span class="s1">'{} : {}'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">key</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_val</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">get_val</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_val</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_encrypted</span><span class="p">:</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">ENCRYPTION_ON</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span>
+ <span class="s2">"Can't decrypt _val, configuration is missing"</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">FERNET</span><span class="o">.</span><span class="n">decrypt</span><span class="p">(</span><span class="nb">bytes</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_val</span><span class="p">,</span> <span class="s1">'utf-8'</span><span class="p">))</span><span class="o">.</span><span class="n">decode</span><span class="p">()</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_val</span>
+
+ <span class="k">def</span> <span class="nf">set_val</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">):</span>
+ <span class="k">if</span> <span class="n">value</span><span class="p">:</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_val</span> <span class="o">=</span> <span class="n">FERNET</span><span class="o">.</span><span class="n">encrypt</span><span class="p">(</span><span class="nb">bytes</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="s1">'utf-8'</span><span class="p">))</span><span class="o">.</span><span class="n">decode</span><span class="p">()</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">is_encrypted</span> <span class="o">=</span> <span class="bp">True</span>
+ <span class="k">except</span> <span class="ne">NameError</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_val</span> <span class="o">=</span> <span class="n">value</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">is_encrypted</span> <span class="o">=</span> <span class="bp">False</span>
+
+ <span class="nd">@declared_attr</span>
+ <span class="k">def</span> <span class="nf">val</span><span class="p">(</span><span class="n">cls</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">synonym</span><span class="p">(</span><span class="s1">'_val'</span><span class="p">,</span>
+ <span class="n">descriptor</span><span class="o">=</span><span class="nb">property</span><span class="p">(</span><span class="n">cls</span><span class="o">.</span><span class="n">get_val</span><span class="p">,</span> <span class="n">cls</span><span class="o">.</span><span class="n">set_val</span><span class="p">))</span>
+ <span class="nd">@classmethod</span>
+ <span class="nd">@provide_session</span>
+ <span class="k">def</span> <span class="nf">get</span><span class="p">(</span><span class="n">cls</span><span class="p">,</span> <span class="n">key</span><span class="p">,</span> <span class="n">default_var</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="n">deserialize_json</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span> <span class="n">session</span><span class="o">=</span><span class="bp">None</spa [...]
+ <span class="n">obj</span> <span class="o">=</span> <span class="n">session</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">cls</span><span class="p">)</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">cls</span><span class="o">.</span><span class="n">key</span> <span class="o">==</span> <span class="n">key</span><span class="p">)</span><span class="o">.</span><span class="n">first< [...]
+ <span class="k">if</span> <span class="n">obj</span> <span class="ow">is</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">default_var</span> <span class="ow">is</span> <span class="ow">not</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">default_var</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'Variable {} does not exist'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">key</span><span class="p">))</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">deserialize_json</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">obj</span><span class="o">.</span><span class="n">val</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">obj</span><span class="o">.</span><span class="n">val</span>
+
+ <span class="nd">@classmethod</span>
+ <span class="nd">@provide_session</span>
+ <span class="k">def</span> <span class="nf">set</span><span class="p">(</span><span class="n">cls</span><span class="p">,</span> <span class="n">key</span><span class="p">,</span> <span class="n">value</span><span class="p">,</span> <span class="n">serialize_json</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span> <span class="n">session</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+
+ <span class="k">if</span> <span class="n">serialize_json</span><span class="p">:</span>
+ <span class="n">stored_value</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">stored_value</span> <span class="o">=</span> <span class="n">value</span>
+
+ <span class="n">session</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">cls</span><span class="p">)</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">cls</span><span class="o">.</span><span class="n">key</span> <span class="o">==</span> <span class="n">key</span><span class="p">)</span><span class="o">.</span><span class="n">delete</span><span class="p">()</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">Variable</span><span class="p">(</span><span class="n">key</span><span class="o">=</span><span class="n">key</span><span class="p">,</span> <span class="n">val</span><span class="o">=</span><span class="n">stored_value</span><span class="p">))</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">flush</span><span class="p">()</span>
+
+
+<span class="k">class</span> <span class="nc">XCom</span><span class="p">(</span><span class="n">Base</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Base class for XCom objects.</span>
+<span class="sd"> """</span>
+ <span class="n">__tablename__</span> <span class="o">=</span> <span class="s2">"xcom"</span>
+
+ <span class="nb">id</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Integer</span><span class="p">,</span> <span class="n">primary_key</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+ <span class="n">key</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">String</span><span class="p">(</span><span class="mi">512</span><span class="p">))</span>
+ <span class="n">value</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">PickleType</span><span class="p">(</span><span class="n">pickler</span><span class="o">=</span><span class="n">dill</span><span class="p">))</span>
+ <span class="n">timestamp</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span>
+ <span class="n">DateTime</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="n">func</span><span class="o">.</span><span class="n">now</span><span class="p">(),</span> <span class="n">nullable</span><span class="o">=</span><span class="bp">False</span><span class="p">)</span>
+ <span class="n">execution_date</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">DateTime</span><span class="p">,</span> <span class="n">nullable</span><span class="o">=</span><span class="bp">False</span><span class="p">)</span>
+
+ <span class="c1"># source information</span>
+ <span class="n">task_id</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">String</span><span class="p">(</span><span class="n">ID_LEN</span><span class="p">),</span> <span class="n">nullable</span><span class="o">=</span><span class="bp">False</span><span class="p">)</span>
+ <span class="n">dag_id</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">String</span><span class="p">(</span><span class="n">ID_LEN</span><span class="p">),</span> <span class="n">nullable</span><span class="o">=</span><span class="bp">False</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="s1">'<XCom "{key}" ({task_id} @ {execution_date})>'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
+ <span class="n">key</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">key</span><span class="p">,</span>
+ <span class="n">task_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">task_id</span><span class="p">,</span>
+ <span class="n">execution_date</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">execution_date</span><span class="p">)</span>
+
+ <span class="nd">@classmethod</span>
+ <span class="nd">@provide_session</span>
+ <span class="k">def</span> <span class="nf">set</span><span class="p">(</span>
+ <span class="n">cls</span><span class="p">,</span>
+ <span class="n">key</span><span class="p">,</span>
+ <span class="n">value</span><span class="p">,</span>
+ <span class="n">execution_date</span><span class="p">,</span>
+ <span class="n">task_id</span><span class="p">,</span>
+ <span class="n">dag_id</span><span class="p">,</span>
+ <span class="n">session</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Store an XCom value.</span>
+<span class="sd"> """</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">expunge_all</span><span class="p">()</span>
+
+ <span class="c1"># remove any duplicate XComs</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">cls</span><span class="p">)</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span>
+ <span class="n">cls</span><span class="o">.</span><span class="n">key</span> <span class="o">==</span> <span class="n">key</span><span class="p">,</span>
+ <span class="n">cls</span><span class="o">.</span><span class="n">execution_date</span> <span class="o">==</span> <span class="n">execution_date</span><span class="p">,</span>
+ <span class="n">cls</span><span class="o">.</span><span class="n">task_id</span> <span class="o">==</span> <span class="n">task_id</span><span class="p">,</span>
+ <span class="n">cls</span><span class="o">.</span><span class="n">dag_id</span> <span class="o">==</span> <span class="n">dag_id</span><span class="p">)</span><span class="o">.</span><span class="n">delete</span><span class="p">()</span>
+
+ <span class="c1"># insert new XCom</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">XCom</span><span class="p">(</span>
+ <span class="n">key</span><span class="o">=</span><span class="n">key</span><span class="p">,</span>
+ <span class="n">value</span><span class="o">=</span><span class="n">value</span><span class="p">,</span>
+ <span class="n">execution_date</span><span class="o">=</span><span class="n">execution_date</span><span class="p">,</span>
+ <span class="n">task_id</span><span class="o">=</span><span class="n">task_id</span><span class="p">,</span>
+ <span class="n">dag_id</span><span class="o">=</span><span class="n">dag_id</span><span class="p">))</span>
+
+ <span class="n">session</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span>
+
+ <span class="nd">@classmethod</span>
+ <span class="nd">@provide_session</span>
+ <span class="k">def</span> <span class="nf">get_one</span><span class="p">(</span>
+ <span class="n">cls</span><span class="p">,</span>
+ <span class="n">execution_date</span><span class="p">,</span>
+ <span class="n">key</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">task_id</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">dag_id</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">include_prior_dates</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="n">session</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Retrieve an XCom value, optionally meeting certain criteria</span>
+<span class="sd"> """</span>
+ <span class="n">filters</span> <span class="o">=</span> <span class="p">[]</span>
+ <span class="k">if</span> <span class="n">key</span><span class="p">:</span>
+ <span class="n">filters</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">cls</span><span class="o">.</span><span class="n">key</span> <span class="o">==</span> <span class="n">key</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">task_id</span><span class="p">:</span>
+ <span class="n">filters</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">cls</span><span class="o">.</span><span class="n">task_id</span> <span class="o">==</span> <span class="n">task_id</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">dag_id</span><span class="p">:</span>
+ <span class="n">filters</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">cls</span><span class="o">.</span><span class="n">dag_id</span> <span class="o">==</span> <span class="n">dag_id</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">include_prior_dates</span><span class="p">:</span>
+ <span class="n">filters</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">cls</span><span class="o">.</span><span class="n">execution_date</span> <span class="o"><=</span> <span class="n">execution_date</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">filters</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">cls</span><span class="o">.</span><span class="n">execution_date</span> <span class="o">==</span> <span class="n">execution_date</span><span class="p">)</span>
+
+ <span class="n">query</span> <span class="o">=</span> <span class="p">(</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">cls</span><span class="o">.</span><span class="n">value</span><span class="p">)</span>
+ <span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">and_</span><span class="p">(</span><span class="o">*</span><span class="n">filters</span><span class="p">))</span>
+ <span class="o">.</span><span class="n">order_by</span><span class="p">(</span><span class="n">cls</span><span class="o">.</span><span class="n">execution_date</span><span class="o">.</span><span class="n">desc</span><span class="p">(),</span> <span class="n">cls</span><span class="o">.</span><span class="n">timestamp</span><span class="o">.</span><span class="n">desc</span><span class="p">())</span>
+ <span class="o">.</span><span class="n">limit</span><span class="p">(</span><span class="mi">1</span><span class="p">))</span>
+
+ <span class="n">result</span> <span class="o">=</span> <span class="n">query</span><span class="o">.</span><span class="n">first</span><span class="p">()</span>
+ <span class="k">if</span> <span class="n">result</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">result</span><span class="o">.</span><span class="n">value</span>
+
+ <span class="nd">@classmethod</span>
+ <span class="nd">@provide_session</span>
+ <span class="k">def</span> <span class="nf">get_many</span><span class="p">(</span>
+ <span class="n">cls</span><span class="p">,</span>
+ <span class="n">execution_date</span><span class="p">,</span>
+ <span class="n">key</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">task_ids</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">dag_ids</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">include_prior_dates</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="n">limit</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span>
+ <span class="n">session</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Retrieve an XCom value, optionally meeting certain criteria</span>
+<span class="sd"> """</span>
+ <span class="n">filters</span> <span class="o">=</span> <span class="p">[]</span>
+ <span class="k">if</span> <span class="n">key</span><span class="p">:</span>
+ <span class="n">filters</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">cls</span><span class="o">.</span><span class="n">key</span> <span class="o">==</span> <span class="n">key</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">task_ids</span><span class="p">:</span>
+ <span class="n">filters</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">cls</span><span class="o">.</span><span class="n">task_id</span><span class="o">.</span><span class="n">in_</span><span class="p">(</span><span class="n">as_tuple</span><span class="p">(</span><span class="n">task_ids</span><span class="p">)))</span>
+ <span class="k">if</span> <span class="n">dag_ids</span><span class="p">:</span>
+ <span class="n">filters</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">cls</span><span class="o">.</span><span class="n">dag_id</span><span class="o">.</span><span class="n">in_</span><span class="p">(</span><span class="n">as_tuple</span><span class="p">(</span><span class="n">dag_ids</span><span class="p">)))</span>
+ <span class="k">if</span> <span class="n">include_prior_dates</span><span class="p">:</span>
+ <span class="n">filters</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">cls</span><span class="o">.</span><span class="n">execution_date</span> <span class="o"><=</span> <span class="n">execution_date</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">filters</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">cls</span><span class="o">.</span><span class="n">execution_date</span> <span class="o">==</span> <span class="n">execution_date</span><span class="p">)</span>
+
+ <span class="n">query</span> <span class="o">=</span> <span class="p">(</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">cls</span><span class="p">)</span>
+ <span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">and_</span><span class="p">(</span><span class="o">*</span><span class="n">filters</span><span class="p">))</span>
+ <span class="o">.</span><span class="n">order_by</span><span class="p">(</span><span class="n">cls</span><span class="o">.</span><span class="n">execution_date</span><span class="o">.</span><span class="n">desc</span><span class="p">(),</span> <span class="n">cls</span><span class="o">.</span><span class="n">timestamp</span><span class="o">.</span><span class="n">desc</span><span class="p">())</span>
+ <span class="o">.</span><span class="n">limit</span><span class="p">(</span><span class="n">limit</span><span class="p">))</span>
+
+ <span class="k">return</span> <span class="n">query</span><span class="o">.</span><span class="n">all</span><span class="p">()</span>
+
+ <span class="nd">@classmethod</span>
+ <span class="nd">@provide_session</span>
+ <span class="k">def</span> <span class="nf">delete</span><span class="p">(</span><span class="n">cls</span><span class="p">,</span> <span class="n">xcoms</span><span class="p">,</span> <span class="n">session</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">xcoms</span><span class="p">,</span> <span class="n">XCom</span><span class="p">):</span>
+ <span class="n">xcoms</span> <span class="o">=</span> <span class="p">[</span><span class="n">xcoms</span><span class="p">]</span>
+ <span class="k">for</span> <span class="n">xcom</span> <span class="ow">in</span> <span class="n">xcoms</span><span class="p">:</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">xcom</span><span class="p">,</span> <span class="n">XCom</span><span class="p">):</span>
+ <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s1">'Expected XCom; received {}'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
+ <span class="n">xcom</span><span class="o">.</span><span class="n">__class__</span><span class="o">.</span><span class="n">__name__</span><span class="p">))</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">delete</span><span class="p">(</span><span class="n">xcom</span><span class="p">)</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span>
+
+
+<span class="k">class</span> <span class="nc">DagRun</span><span class="p">(</span><span class="n">Base</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> DagRun describes an instance of a Dag. It can be created</span>
+<span class="sd"> by the scheduler (for regular runs) or by an external trigger</span>
+<span class="sd"> """</span>
+ <span class="n">__tablename__</span> <span class="o">=</span> <span class="s2">"dag_run"</span>
+
+ <span class="n">ID_PREFIX</span> <span class="o">=</span> <span class="s1">'scheduled__'</span>
+ <span class="n">ID_FORMAT_PREFIX</span> <span class="o">=</span> <span class="n">ID_PREFIX</span> <span class="o">+</span> <span class="s1">'{0}'</span>
+
+ <span class="nb">id</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Integer</span><span class="p">,</span> <span class="n">primary_key</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+ <span class="n">dag_id</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">String</span><span class="p">(</span><span class="n">ID_LEN</span><span class="p">))</span>
+ <span class="n">execution_date</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">DateTime</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="n">func</span><span class="o">.</span><span class="n">now</span><span class="p">())</span>
+ <span class="n">start_date</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">DateTime</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="n">func</span><span class="o">.</span><span class="n">now</span><span class="p">())</span>
+ <span class="n">end_date</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">DateTime</span><span class="p">)</span>
+ <span class="n">state</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">String</span><span class="p">(</span><span class="mi">50</span><span class="p">),</span> <span class="n">default</span><span class="o">=</span><span class="n">State</span><span class="o">.</span><span class="n">RUNNING</span><span class="p">)</span>
+ <span class="n">run_id</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">String</span><span class="p">(</span><span class="n">ID_LEN</span><span class="p">))</span>
+ <span class="n">external_trigger</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Boolean</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+ <span class="n">conf</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">PickleType</span><span class="p">)</span>
+
+ <span class="n">dag</span> <span class="o">=</span> <span class="bp">None</span>
+
+ <span class="n">__table_args__</span> <span class="o">=</span> <span class="p">(</span>
+ <span class="n">Index</span><span class="p">(</span><span class="s1">'dr_run_id'</span><span class="p">,</span> <span class="n">dag_id</span><span class="p">,</span> <span class="n">run_id</span><span class="p">,</span> <span class="n">unique</span><span class="o">=</span><span class="bp">True</span><span class="p">),</span>
+ <span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="p">(</span>
+ <span class="s1">'<DagRun {dag_id} @ {execution_date}: {run_id}, '</span>
+ <span class="s1">'externally triggered: {external_trigger}>'</span>
+ <span class="p">)</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
+ <span class="n">dag_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">dag_id</span><span class="p">,</span>
+ <span class="n">execution_date</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">execution_date</span><span class="p">,</span>
+ <span class="n">run_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">run_id</span><span class="p">,</span>
+ <span class="n">external_trigger</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">external_trigger</span><span class="p">)</span>
+
+ <span class="nd">@classmethod</span>
+ <span class="k">def</span> <span class="nf">id_for_date</span><span class="p">(</span><span class="n">klass</span><span class="p">,</span> <span class="n">date</span><span class="p">,</span> <span class="n">prefix</span><span class="o">=</span><span class="n">ID_FORMAT_PREFIX</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">prefix</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">date</span><span class="o">.</span><span class="n">isoformat</span><span class="p">()[:</span><span class="mi">19</span><span class="p">])</span>
+
+ <span class="nd">@provide_session</span>
+ <span class="k">def</span> <span class="nf">refresh_from_db</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">session</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Reloads the current dagrun from the database</span>
+<span class="sd"> :param session: database session</span>
+<span class="sd"> """</span>
+ <span class="n">DR</span> <span class="o">=</span> <span class="n">DagRun</span>
+
+ <span class="n">dr</span> <span class="o">=</span> <span class="n">session</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">DR</span><span class="p">)</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span>
+ <span class="n">DR</span><span class="o">.</span><span class="n">dag_id</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">dag_id</span><span class="p">,</span>
+ <span class="n">DR</span><span class="o">.</span><span class="n">execution_date</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">execution_date</span><span class="p">,</span>
+ <span class="n">DR</span><span class="o">.</span><span class="n">run_id</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">run_id</span>
+ <span class="p">)</span><span class="o">.</span><span class="n">one</span><span class="p">()</span>
+ <span class="k">if</span> <span class="n">dr</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">id</span> <span class="o">=</span> <span class="n">dr</span><span class="o">.</span><span class="n">id</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">state</span> <span class="o">=</span> <span class="n">dr</span><span class="o">.</span><span class="n">state</span>
+
+ <span class="nd">@staticmethod</span>
+ <span class="nd">@provide_session</span>
+ <span class="k">def</span> <span class="nf">find</span><span class="p">(</span><span class="n">dag_id</span><span class="p">,</span> <span class="n">run_id</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="n">execution_date</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">state</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="n">external_trigger</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="n">session</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Returns a set of dag runs for the given search criteria.</span>
+<span class="sd"> :param run_id: defines the the run id for this dag run</span>
+<span class="sd"> :type run_id: string</span>
+<span class="sd"> :param execution_date: the execution date</span>
+<span class="sd"> :type execution_date: datetime</span>
+<span class="sd"> :param state: the state of the dag run</span>
+<span class="sd"> :type state: State</span>
+<span class="sd"> :param external_trigger: whether this dag run is externally triggered</span>
+<span class="sd"> :type external_trigger: bool</span>
+<span class="sd"> :param session: database session</span>
+<span class="sd"> :type session: Session</span>
+<span class="sd"> """</span>
+ <span class="n">DR</span> <span class="o">=</span> <span class="n">DagRun</span>
+
+ <span class="n">qry</span> <span class="o">=</span> <span class="n">session</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">DR</span><span class="p">)</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">DR</span><span class="o">.</span><span class="n">dag_id</span> <span class="o">==</span> <span class="n">dag_id</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">run_id</span><span class="p">:</span>
+ <span class="n">qry</span> <span class="o">=</span> <span class="n">qry</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">DR</span><span class="o">.</span><span class="n">run_id</span> <span class="o">==</span> <span class="n">run_id</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">execution_date</span><span class="p">:</span>
+ <span class="n">qry</span> <span class="o">=</span> <span class="n">qry</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">DR</span><span class="o">.</span><span class="n">execution_date</span> <span class="o">==</span> <span class="n">execution_date</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">state</span><span class="p">:</span>
+ <span class="n">qry</span> <span class="o">=</span> <span class="n">qry</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">DR</span><span class="o">.</span><span class="n">state</span> <span class="o">==</span> <span class="n">state</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">external_trigger</span><span class="p">:</span>
+ <span class="n">qry</span> <span class="o">=</span> <span class="n">qry</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">DR</span><span class="o">.</span><span class="n">external_trigger</span> <span class="o">==</span> <span class="n">external_trigger</span><span class="p">)</span>
+
+ <span class="n">dr</span> <span class="o">=</span> <span class="n">qry</span><span class="o">.</span><span class="n">order_by</span><span class="p">(</span><span class="n">DR</span><span class="o">.</span><span class="n">execution_date</span><span class="p">)</span><span class="o">.</span><span class="n">all</span><span class="p">()</span>
+
+ <span class="k">return</span> <span class="n">dr</span>
+
+ <span class="nd">@provide_session</span>
+ <span class="k">def</span> <span class="nf">get_task_instances</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">state</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="n">session</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Returns the task instances for this dag run</span>
+<span class="sd"> """</span>
+ <span class="n">TI</span> <span class="o">=</span> <span class="n">TaskInstance</span>
+ <span class="n">tis</span> <span class="o">=</span> <span class="n">session</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">TI</span><span class="p">)</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span>
+ <span class="n">TI</span><span class="o">.</span><span class="n">dag_id</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">dag_id</span><span class="p">,</span>
+ <span class="n">TI</span><span class="o">.</span><span class="n">execution_date</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">execution_date</span><span class="p">,</span>
+ <span class="p">)</span>
+ <span class="k">if</span> <span class="n">state</span><span class="p">:</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">state</span><span class="p">,</span> <span class="n">six</span><span class="o">.</span><span class="n">string_types</span><span class="p">):</span>
+ <span class="n">tis</span> <span class="o">=</span> <span class="n">tis</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">TI</span><span class="o">.</span><span class="n">state</span> <span class="o">==</span> <span class="n">state</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="c1"># this is required to deal with NULL values</span>
+ <span class="k">if</span> <span class="bp">None</span> <span class="ow">in</span> <span class="n">state</span><span class="p">:</span>
+ <span class="n">tis</span> <span class="o">=</span> <span class="n">tis</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">or_</span><span class="p">(</span>
+ <span class="n">TI</span><span class="o">.</span><span class="n">state</span><span class="o">.</span><span class="n">in_</span><span class="p">(</span><span class="n">state</span><span class="p">),</span>
+ <span class="n">TI</span><span class="o">.</span><span class="n">state</span><span class="o">.</span><span class="n">is_</span><span class="p">(</span><span class="bp">None</span><span class="p">))</span>
+ <span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">tis</span> <span class="o">=</span> <span class="n">tis</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">TI</span><span class="o">.</span><span class="n">state</span><span class="o">.</span><span class="n">in_</span><span class="p">(</span><span class="n">state</span><span class="p">))</span>
+
+ <span class="k">return</span> <span class="n">tis</span><span class="o">.</span><span class="n">all</span><span class="p">()</span>
+
+ <span class="nd">@provide_session</span>
+ <span class="k">def</span> <span class="nf">get_task_instance</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">task_id</span><span class="p">,</span> <span class="n">session</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Returns the task instance specified by task_id for this dag run</span>
+<span class="sd"> :param task_id: the task id</span>
+<span class="sd"> """</span>
+
+ <span class="n">TI</span> <span class="o">=</span> <span class="n">TaskInstance</span>
+ <span class="n">ti</span> <span class="o">=</span> <span class="n">session</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">TI</span><span class="p">)</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span>
+ <span class="n">TI</span><span class="o">.</span><span class="n">dag_id</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">dag_id</span><span class="p">,</span>
+ <span class="n">TI</span><span class="o">.</span><span class="n">execution_date</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">execution_date</span><span class="p">,</span>
+ <span class="n">TI</span><span class="o">.</span><span class="n">task_id</span> <span class="o">==</span> <span class="n">task_id</span>
+ <span class="p">)</span><span class="o">.</span><span class="n">one</span><span class="p">()</span>
+
+ <span class="k">return</span> <span class="n">ti</span>
+
+ <span class="k">def</span> <span class="nf">get_dag</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Returns the Dag associated with this DagRun</span>
+<span class="sd"> :param session: database session</span>
+<span class="sd"> :return: DAG</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">dag</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span><span class="s2">"The DAG (.dag) for {} needs to be set"</span>
+ <span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="p">))</span>
+
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">dag</span>
+
+ <span class="nd">@provide_session</span>
+ <span class="k">def</span> <span class="nf">update_state</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">session</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Determines the overall state of the DagRun based on the state</span>
+<span class="sd"> of its TaskInstances.</span>
+<span class="sd"> :returns State:</span>
+<span class="sd"> """</span>
+
+ <span class="n">dag</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_dag</span><span class="p">()</span>
+ <span class="n">tis</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_task_instances</span><span class="p">(</span><span class="n">session</span><span class="o">=</span><span class="n">session</span><span class="p">)</span>
+
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">"Updating state for {} considering {} task(s)"</span>
+ <span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">tis</span><span class="p">)))</span>
+
+ <span class="k">for</span> <span class="n">ti</span> <span class="ow">in</span> <span class="n">tis</span><span class="p">:</span>
+ <span class="n">ti</span><span class="o">.</span><span class="n">task</span> <span class="o">=</span> <span class="n">dag</span><span class="o">.</span><span class="n">get_task</span><span class="p">(</span><span class="n">ti</span><span class="o">.</span><span class="n">task_id</span><span class="p">)</span>
+
+ <span class="c1"># pre-calculate</span>
+ <span class="c1"># db is faster</span>
+ <span class="n">start_dttm</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span>
+ <span class="n">unfinished_tasks</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_task_instances</span><span class="p">(</span>
+ <span class="n">state</span><span class="o">=</span><span class="n">State</span><span class="o">.</span><span class="n">unfinished</span><span class="p">(),</span>
+ <span class="n">session</span><span class="o">=</span><span class="n">session</span>
+ <span class="p">)</span>
+ <span class="n">none_depends_on_past</span> <span class="o">=</span> <span class="nb">all</span><span class="p">(</span><span class="n">t</span><span class="o">.</span><span class="n">task</span><span class="o">.</span><span class="n">depends_on_past</span> <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="n">unfinished_tasks</span><span class="p">)</span>
+
+ <span class="c1"># small speed up</span>
+ <span class="k">if</span> <span class="n">unfinished_tasks</span> <span class="ow">and</span> <span class="n">none_depends_on_past</span><span class="p">:</span>
+ <span class="c1"># todo: this can actually get pretty slow: one task costs between 0.01-015s</span>
+ <span class="n">no_dependencies_met</span> <span class="o">=</span> <span class="nb">all</span><span class="p">(</span><span class="ow">not</span> <span class="n">t</span><span class="o">.</span><span class="n">are_dependencies_met</span><span class="p">(</span><span class="n">session</span><span class="o">=</span><span class="n">session</span><span class="p">)</span>
+ <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="n">unfinished_tasks</span><span class="p">)</span>
+
+ <span class="n">duration</span> <span class="o">=</span> <span class="p">(</span><span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span> <span class="o">-</span> <span class="n">start_dttm</span><span class="p">)</span><span class="o">.</span><span class="n">total_seconds</span><span class="p">()</span> <span class="o">*</span> <span class="mi">1000</span>
+ <span class="n">Stats</span><span class="o">.</span><span class="n">timing</span><span class="p">(</span><span class="s2">"dagrun.dependency-check.{}.{}"</span><span class="o">.</span>
+ <span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dag_id</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">execution_date</span><span class="p">),</span> <span class="n">duration</span><span class="p">)</span>
+
+ <span class="c1"># future: remove the check on adhoc tasks (=active_tasks)</span>
+ <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">tis</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span><span class="n">dag</span><span class="o">.</span><span class="n">active_tasks</span><span class="p">):</span>
+ <span class="c1"># if any roots failed, the run failed</span>
+ <span class="n">root_ids</span> <span class="o">=</span> <span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">task_id</span> <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="n">dag</span><span class="o">.</span><span class="n">roots</span><span class="p">]</span>
+ <span class="n">roots</span> <span class="o">=</span> <span class="p">[</span><span class="n">t</span> <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="n">tis</span> <span class="k">if</span> <span class="n">t</span><span class="o">.</span><span class="n">task_id</span> <span class="ow">in</span> <span class="n">root_ids</span><span class="p">]</span>
+
+ <span class="k">if</span> <span class="nb">any</span><span class="p">(</span><span class="n">r</span><span class="o">.</span><span class="n">state</span> <span class="ow">in</span> <span class="p">(</span><span class="n">State</span><span class="o">.</span><span class="n">FAILED</span><span class="p">,</span> <span class="n">State</span><span class="o">.</span><span class="n">UPSTREAM_FAILED</span><span class="p">)</span>
+ <span class="k">for</span> <span class="n">r</span> <span class="ow">in</span> <span class="n">roots</span><span class="p">):</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Marking run {} failed'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="p">))</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">state</span> <span class="o">=</span> <span class="n">State</span><span class="o">.</span><span class="n">FAILED</span>
+
+ <span class="c1"># if all roots succeeded, the run succeeded</span>
+ <span class="k">elif</span> <span class="nb">all</span><span class="p">(</span><span class="n">r</span><span class="o">.</span><span class="n">state</span> <span class="ow">in</span> <span class="p">(</span><span class="n">State</span><span class="o">.</span><span class="n">SUCCESS</span><span class="p">,</span> <span class="n">State</span><span class="o">.</span><span class="n">SKIPPED</span><span class="p">)</span>
+ <span class="k">for</span> <span class="n">r</span> <span class="ow">in</span> <span class="n">roots</span><span class="p">):</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Marking run {} successful'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="p">))</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">state</span> <span class="o">=</span> <span class="n">State</span><span class="o">.</span><span class="n">SUCCESS</span>
+
+ <span class="c1"># if *all tasks* are deadlocked, the run failed</span>
+ <span class="k">elif</span> <span class="n">unfinished_tasks</span> <span class="ow">and</span> <span class="n">none_depends_on_past</span> <span class="ow">and</span> <span class="n">no_dependencies_met</span><span class="p">:</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span>
+ <span class="s1">'Deadlock; marking run {} failed'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="p">))</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">state</span> <span class="o">=</span> <span class="n">State</span><span class="o">.</span><span class="n">FAILED</span>
+
+ <span class="c1"># finally, if the roots aren't done, the dag is still running</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">state</span> <span class="o">=</span> <span class="n">State</span><span class="o">.</span><span class="n">RUNNING</span>
+
+ <span class="c1"># todo: determine we want to use with_for_update to make sure to lock the run</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">merge</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span>
+
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">state</span>
+
+ <span class="nd">@provide_session</span>
+ <span class="k">def</span> <span class="nf">verify_integrity</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">session</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Verifies the DagRun by checking for removed tasks or tasks that are not in the</span>
+<span class="sd"> database yet. It will set state to removed or add the task if required.</span>
+<span class="sd"> """</span>
+ <span class="n">dag</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_dag</span><span class="p">()</span>
+ <span class="n">tis</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_task_instances</span><span class="p">(</span><span class="n">session</span><span class="o">=</span><span class="n">session</span><span class="p">)</span>
+
+ <span class="c1"># check for removed tasks</span>
+ <span class="n">task_ids</span> <span class="o">=</span> <span class="p">[]</span>
+ <span class="k">for</span> <span class="n">ti</span> <span class="ow">in</span> <span class="n">tis</span><span class="p">:</span>
+ <span class="n">task_ids</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">ti</span><span class="o">.</span><span class="n">task_id</span><span class="p">)</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">dag</span><span class="o">.</span><span class="n">get_task</span><span class="p">(</span><span class="n">ti</span><span class="o">.</span><span class="n">task_id</span><span class="p">)</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">state</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">State</span><span class="o">.</span>< [...]
+ <span class="n">ti</span><span class="o">.</span><span class="n">state</span> <span class="o">=</span> <span class="n">State</span><span class="o">.</span><span class="n">REMOVED</span>
+
+ <span class="c1"># check for missing tasks</span>
+ <span class="k">for</span> <span class="n">task</span> <span class="ow">in</span> <span class="n">dag</span><span class="o">.</span><span class="n">tasks</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">task</span><span class="o">.</span><span class="n">adhoc</span><span class="p">:</span>
+ <span class="k">continue</span>
+
+ <span class="k">if</span> <span class="n">task</span><span class="o">.</span><span class="n">task_id</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">task_ids</span><span class="p">:</span>
+ <span class="n">ti</span> <span class="o">=</span> <span class="n">TaskInstance</span><span class="p">(</span><span class="n">task</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">execution_date</span><span class="p">)</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">ti</span><span class="p">)</span>
+
+ <span class="n">session</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span>
+
+
+<span class="k">class</span> <span class="nc">Pool</span><span class="p">(</span><span class="n">Base</span><span class="p">):</span>
+ <span class="n">__tablename__</span> <span class="o">=</span> <span class="s2">"slot_pool"</span>
+
+ <span class="nb">id</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Integer</span><span class="p">,</span> <span class="n">primary_key</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+ <span class="n">pool</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">String</span><span class="p">(</span><span class="mi">50</span><span class="p">),</span> <span class="n">unique</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+ <span class="n">slots</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Integer</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+ <span class="n">description</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Text</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">pool</span>
+
+ <span class="nd">@provide_session</span>
+ <span class="k">def</span> <span class="nf">used_slots</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">session</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Returns the number of slots used at the moment</span>
+<span class="sd"> """</span>
+ <span class="n">running</span> <span class="o">=</span> <span class="p">(</span>
+ <span class="n">session</span>
+ <span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">TaskInstance</span><span class="p">)</span>
+ <span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">TaskInstance</span><span class="o">.</span><span class="n">pool</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">pool</span><span class="p">)</span>
+ <span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">TaskInstance</span><span class="o">.</span><span class="n">state</span> <span class="o">==</span> <span class="n">State</span><span class="o">.</span><span class="n">RUNNING</span><span class="p">)</span>
+ <span class="o">.</span><span class="n">count</span><span class="p">()</span>
+ <span class="p">)</span>
+ <span class="k">return</span> <span class="n">running</span>
+
+ <span class="nd">@provide_session</span>
+ <span class="k">def</span> <span class="nf">queued_slots</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">session</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Returns the number of slots used at the moment</span>
+<span class="sd"> """</span>
+ <span class="k">return</span> <span class="p">(</span>
+ <span class="n">session</span>
+ <span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">TaskInstance</span><span class="p">)</span>
+ <span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">TaskInstance</span><span class="o">.</span><span class="n">pool</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">pool</span><span class="p">)</span>
+ <span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">TaskInstance</span><span class="o">.</span><span class="n">state</span> <span class="o">==</span> <span class="n">State</span><span class="o">.</span><span class="n">QUEUED</span><span class="p">)</span>
+ <span class="o">.</span><span class="n">count</span><span class="p">()</span>
+ <span class="p">)</span>
+
+ <span class="nd">@provide_session</span>
+ <span class="k">def</span> <span class="nf">open_slots</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">session</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Returns the number of slots open at the moment</span>
+<span class="sd"> """</span>
+ <span class="n">used_slots</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">used_slots</span><span class="p">(</span><span class="n">session</span><span class="o">=</span><span class="n">session</span><span class="p">)</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">slots</span> <span class="o">-</span> <span class="n">used_slots</span>
+
+
+<span class="k">class</span> <span class="nc">SlaMiss</span><span class="p">(</span><span class="n">Base</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Model that stores a history of the SLA that have been missed.</span>
+<span class="sd"> It is used to keep track of SLA failures over time and to avoid double</span>
+<span class="sd"> triggering alert emails.</span>
+<span class="sd"> """</span>
+ <span class="n">__tablename__</span> <span class="o">=</span> <span class="s2">"sla_miss"</span>
+
+ <span class="n">task_id</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">String</span><span class="p">(</span><span class="n">ID_LEN</span><span class="p">),</span> <span class="n">primary_key</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+ <span class="n">dag_id</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">String</span><span class="p">(</span><span class="n">ID_LEN</span><span class="p">),</span> <span class="n">primary_key</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+ <span class="n">execution_date</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">DateTime</span><span class="p">,</span> <span class="n">primary_key</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+ <span class="n">email_sent</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Boolean</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="bp">False</span><span class="p">)</span>
+ <span class="n">timestamp</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">DateTime</span><span class="p">)</span>
+ <span class="n">description</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Text</span><span class="p">)</span>
+ <span class="n">notification_sent</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Boolean</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="bp">False</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="nb">str</span><span class="p">((</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">dag_id</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">task_id</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">execution_date</span><span class="o">.</span><span class="n">isoformat</span><span class="p">()))</span>
+
+
+<span class="k">class</span> <span class="nc">ImportError</span><span class="p">(</span><span class="n">Base</span><span class="p">):</span>
+ <span class="n">__tablename__</span> <span class="o">=</span> <span class="s2">"import_error"</span>
+ <span class="nb">id</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Integer</span><span class="p">,</span> <span class="n">primary_key</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+ <span class="n">timestamp</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">DateTime</span><span class="p">)</span>
+ <span class="n">filename</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">String</span><span class="p">(</span><span class="mi">1024</span><span class="p">))</span>
+ <span class="n">stacktrace</span> <span class="o">=</span> <span class="n">Column</span><span class="p">(</span><span class="n">Text</span><span class="p">)</span>
+</pre></div>
+
+ </div>
+ </div>
+ <footer>
+
+
+ <hr/>
+
+ <div role="contentinfo">
+ <p>
+ © Copyright 2014, Maxime Beauchemin, Airbnb.
+
+ </p>
+ </div>
+ Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
+
+</footer>
+
+ </div>
+ </div>
+
+ </section>
+
+ </div>
+
+
+
+
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT:'../../',
+ VERSION:'',
+ COLLAPSE_INDEX:false,
+ FILE_SUFFIX:'.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../_static/doctools.js"></script>
+
+
+
+
+
+ <script type="text/javascript" src="../../_static/js/theme.js"></script>
+
+
+
+
+ <script type="text/javascript">
+ jQuery(function () {
+ SphinxRtdTheme.StickyNav.enable();
+ });
+ </script>
+
+
+</body>
+</html>
\ No newline at end of file
diff --git a/_modules/airflow/operators/docker_operator.html b/_modules/airflow/operators/docker_operator.html
new file mode 100644
index 0000000..a1d8fca
--- /dev/null
+++ b/_modules/airflow/operators/docker_operator.html
@@ -0,0 +1,383 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+ <meta charset="utf-8">
+
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+ <title>airflow.operators.docker_operator — Airflow Documentation</title>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" />
+
+
+
+
+
+ <link rel="top" title="Airflow Documentation" href="../../../index.html"/>
+ <link rel="up" title="Module code" href="../../index.html"/>
+
+
+ <script src="../../../_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+ <div class="wy-grid-for-nav">
+
+
+ <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+ <div class="wy-side-scroll">
+ <div class="wy-side-nav-search">
+
+
+
+ <a href="../../../index.html" class="icon icon-home"> Airflow
+
+
+
+ </a>
+
+
+
+
+
+
+
+<div role="search">
+ <form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
+ <input type="text" name="q" placeholder="Search docs" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+</div>
+
+
+ </div>
+
+ <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+
+
+
+ <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../project.html">Project</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../license.html">License</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../start.html">Quick Start</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../tutorial.html">Tutorial</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../configuration.html">Configuration</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../ui.html">UI / Screenshots</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../concepts.html">Concepts</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../profiling.html">Data Profiling</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../cli.html">Command Line Interface</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../scheduler.html">Scheduling & Triggers</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../plugins.html">Plugins</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../security.html">Security</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../faq.html">FAQ</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../code.html">API Reference</a></li>
+</ul>
+
+
+
+ </div>
+ </div>
+ </nav>
+
+ <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+
+ <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+ <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+ <a href="../../../index.html">Airflow</a>
+ </nav>
+
+
+
+ <div class="wy-nav-content">
+ <div class="rst-content">
+
+
+
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+ <ul class="wy-breadcrumbs">
+ <li><a href="../../../index.html">Docs</a> »</li>
+
+ <li><a href="../../index.html">Module code</a> »</li>
+
+ <li>airflow.operators.docker_operator</li>
+ <li class="wy-breadcrumbs-aside">
+
+
+
+ </li>
+ </ul>
+ <hr/>
+</div>
+ <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+ <div itemprop="articleBody">
+
+ <h1>Source code for airflow.operators.docker_operator</h1><div class="highlight"><pre>
+<span></span><span class="kn">import</span> <span class="nn">json</span>
+<span class="kn">import</span> <span class="nn">logging</span>
+<span class="kn">from</span> <span class="nn">airflow.exceptions</span> <span class="kn">import</span> <span class="n">AirflowException</span>
+<span class="kn">from</span> <span class="nn">airflow.models</span> <span class="kn">import</span> <span class="n">BaseOperator</span>
+<span class="kn">from</span> <span class="nn">airflow.utils.decorators</span> <span class="kn">import</span> <span class="n">apply_defaults</span>
+<span class="kn">from</span> <span class="nn">airflow.utils.file</span> <span class="kn">import</span> <span class="n">TemporaryDirectory</span>
+<span class="kn">from</span> <span class="nn">docker</span> <span class="kn">import</span> <span class="n">Client</span><span class="p">,</span> <span class="n">tls</span>
+<span class="kn">import</span> <span class="nn">ast</span>
+
+
+<div class="viewcode-block" id="DockerOperator"><a class="viewcode-back" href="../../../code.html#airflow.operators.docker_operator.DockerOperator">[docs]</a><span class="k">class</span> <span class="nc">DockerOperator</span><span class="p">(</span><span class="n">BaseOperator</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Execute a command inside a docker container.</span>
+
+<span class="sd"> A temporary directory is created on the host and mounted into a container to allow storing files</span>
+<span class="sd"> that together exceed the default disk size of 10GB in a container. The path to the mounted</span>
+<span class="sd"> directory can be accessed via the environment variable ``AIRFLOW_TMP_DIR``.</span>
+
+<span class="sd"> :param image: Docker image from which to create the container.</span>
+<span class="sd"> :type image: str</span>
+<span class="sd"> :param api_version: Remote API version.</span>
+<span class="sd"> :type api_version: str</span>
+<span class="sd"> :param command: Command to be run in the container.</span>
+<span class="sd"> :type command: str or list</span>
+<span class="sd"> :param cpus: Number of CPUs to assign to the container.</span>
+<span class="sd"> This value gets multiplied with 1024. See</span>
+<span class="sd"> https://docs.docker.com/engine/reference/run/#cpu-share-constraint</span>
+<span class="sd"> :type cpus: float</span>
+<span class="sd"> :param docker_url: URL of the host running the docker daemon.</span>
+<span class="sd"> :type docker_url: str</span>
+<span class="sd"> :param environment: Environment variables to set in the container.</span>
+<span class="sd"> :type environment: dict</span>
+<span class="sd"> :param force_pull: Pull the docker image on every run.</span>
+<span class="sd"> :type force_pull: bool</span>
+<span class="sd"> :param mem_limit: Maximum amount of memory the container can use. Either a float value, which</span>
+<span class="sd"> represents the limit in bytes, or a string like ``128m`` or ``1g``.</span>
+<span class="sd"> :type mem_limit: float or str</span>
+<span class="sd"> :param network_mode: Network mode for the container.</span>
+<span class="sd"> :type network_mode: str</span>
+<span class="sd"> :param tls_ca_cert: Path to a PEM-encoded certificate authority to secure the docker connection.</span>
+<span class="sd"> :type tls_ca_cert: str</span>
+<span class="sd"> :param tls_client_cert: Path to the PEM-encoded certificate used to authenticate docker client.</span>
+<span class="sd"> :type tls_client_cert: str</span>
+<span class="sd"> :param tls_client_key: Path to the PEM-encoded key used to authenticate docker client.</span>
+<span class="sd"> :type tls_client_key: str</span>
+<span class="sd"> :param tls_hostname: Hostname to match against the docker server certificate or False to</span>
+<span class="sd"> disable the check.</span>
+<span class="sd"> :type tls_hostname: str or bool</span>
+<span class="sd"> :param tls_ssl_version: Version of SSL to use when communicating with docker daemon.</span>
+<span class="sd"> :type tls_ssl_version: str</span>
+<span class="sd"> :param tmp_dir: Mount point inside the container to a temporary directory created on the host by</span>
+<span class="sd"> the operator. The path is also made available via the environment variable</span>
+<span class="sd"> ``AIRFLOW_TMP_DIR`` inside the container.</span>
+<span class="sd"> :type tmp_dir: str</span>
+<span class="sd"> :param user: Default user inside the docker container.</span>
+<span class="sd"> :type user: int or str</span>
+<span class="sd"> :param volumes: List of volumes to mount into the container, e.g.</span>
+<span class="sd"> ``['/host/path:/container/path', '/host/path2:/container/path2:ro']``.</span>
+<span class="sd"> :param xcom_push: Does the stdout will be pushed to the next step using XCom.</span>
+<span class="sd"> The default is False.</span>
+<span class="sd"> :type xcom_push: bool</span>
+<span class="sd"> :param xcom_all: Push all the stdout or just the last line. The default is False (last line).</span>
+<span class="sd"> :type xcom_all: bool</span>
+<span class="sd"> """</span>
+ <span class="n">template_fields</span> <span class="o">=</span> <span class="p">(</span><span class="s1">'command'</span><span class="p">,)</span>
+ <span class="n">template_ext</span> <span class="o">=</span> <span class="p">(</span><span class="s1">'.sh'</span><span class="p">,</span> <span class="s1">'.bash'</span><span class="p">,)</span>
+
+ <span class="nd">@apply_defaults</span>
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span>
+ <span class="n">image</span><span class="p">,</span>
+ <span class="n">api_version</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">command</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">cpus</span><span class="o">=</span><span class="mf">1.0</span><span class="p">,</span>
+ <span class="n">docker_url</span><span class="o">=</span><span class="s1">'unix://var/run/docker.sock'</span><span class="p">,</span>
+ <span class="n">environment</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">force_pull</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="n">mem_limit</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">network_mode</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">tls_ca_cert</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">tls_client_cert</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">tls_client_key</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">tls_hostname</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">tls_ssl_version</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">tmp_dir</span><span class="o">=</span><span class="s1">'/tmp/airflow'</span><span class="p">,</span>
+ <span class="n">user</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">volumes</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">xcom_push</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="n">xcom_all</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="o">*</span><span class="n">args</span><span class="p">,</span>
+ <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+
+ <span class="nb">super</span><span class="p">(</span><span class="n">DockerOperator</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">api_version</span> <span class="o">=</span> <span class="n">api_version</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">command</span> <span class="o">=</span> <span class="n">command</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">cpus</span> <span class="o">=</span> <span class="n">cpus</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">docker_url</span> <span class="o">=</span> <span class="n">docker_url</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">environment</span> <span class="o">=</span> <span class="n">environment</span> <span class="ow">or</span> <span class="p">{}</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">force_pull</span> <span class="o">=</span> <span class="n">force_pull</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">image</span> <span class="o">=</span> <span class="n">image</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">mem_limit</span> <span class="o">=</span> <span class="n">mem_limit</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">network_mode</span> <span class="o">=</span> <span class="n">network_mode</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">tls_ca_cert</span> <span class="o">=</span> <span class="n">tls_ca_cert</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">tls_client_cert</span> <span class="o">=</span> <span class="n">tls_client_cert</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">tls_client_key</span> <span class="o">=</span> <span class="n">tls_client_key</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">tls_hostname</span> <span class="o">=</span> <span class="n">tls_hostname</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">tls_ssl_version</span> <span class="o">=</span> <span class="n">tls_ssl_version</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">tmp_dir</span> <span class="o">=</span> <span class="n">tmp_dir</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">user</span> <span class="o">=</span> <span class="n">user</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">volumes</span> <span class="o">=</span> <span class="n">volumes</span> <span class="ow">or</span> <span class="p">[]</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">xcom_push</span> <span class="o">=</span> <span class="n">xcom_push</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">xcom_all</span> <span class="o">=</span> <span class="n">xcom_all</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">cli</span> <span class="o">=</span> <span class="bp">None</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">container</span> <span class="o">=</span> <span class="bp">None</span>
+
+ <span class="k">def</span> <span class="nf">execute</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">context</span><span class="p">):</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Starting docker container from image '</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">image</span><span class="p">)</span>
+
+ <span class="n">tls_config</span> <span class="o">=</span> <span class="bp">None</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">tls_ca_cert</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">tls_client_cert</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">tls_client_key</span><span class="p">:</span>
+ <span class="n">tls_config</span> <span class="o">=</span> <span class="n">tls</span><span class="o">.</span><span class="n">TLSConfig</span><span class="p">(</span>
+ <span class="n">ca_cert</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">tls_ca_cert</span><span class="p">,</span>
+ <span class="n">client_cert</span><span class="o">=</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">tls_client_cert</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">tls_client_key</span><span class="p">),</span>
+ <span class="n">verify</span><span class="o">=</span><span class="bp">True</span><span class="p">,</span>
+ <span class="n">ssl_version</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">tls_ssl_version</span><span class="p">,</span>
+ <span class="n">assert_hostname</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">tls_hostname</span>
+ <span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">docker_url</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">docker_url</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'tcp://'</span><span class="p">,</span> <span class="s1">'https://'</span><span class="p">)</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">cli</span> <span class="o">=</span> <span class="n">Client</span><span class="p">(</span><span class="n">base_url</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">docker_url</span><span class="p">,</span> <span class="n">version</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">api_version</span><span class="p">,</spa [...]
+
+ <span class="k">if</span> <span class="s1">':'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">image</span><span class="p">:</span>
+ <span class="n">image</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">image</span> <span class="o">+</span> <span class="s1">':latest'</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">image</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">image</span>
+
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">force_pull</span> <span class="ow">or</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">cli</span><span class="o">.</span><span class="n">images</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="n">image</span><span class="p">))</span> <span class="o">==</span> <span cl [...]
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Pulling docker image '</span> <span class="o">+</span> <span class="n">image</span><span class="p">)</span>
+ <span class="k">for</span> <span class="n">l</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">cli</span><span class="o">.</span><span class="n">pull</span><span class="p">(</span><span class="n">image</span><span class="p">,</span> <span class="n">stream</span><span class="o">=</span><span class="bp">True</span><span class="p">):</span>
+ <span class="n">output</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">l</span><span class="p">)</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">"{}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">output</span><span class="p">[</span><span class="s1">'status'</span><span class="p">]))</span>
+
+ <span class="n">cpu_shares</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="nb">round</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">cpus</span> <span class="o">*</span> <span class="mi">1024</span><span class="p">))</span>
+
+ <span class="k">with</span> <span class="n">TemporaryDirectory</span><span class="p">(</span><span class="n">prefix</span><span class="o">=</span><span class="s1">'airflowtmp'</span><span class="p">)</span> <span class="k">as</span> <span class="n">host_tmp_dir</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">environment</span><span class="p">[</span><span class="s1">'AIRFLOW_TMP_DIR'</span><span class="p">]</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">tmp_dir</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">volumes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="s1">'{0}:{1}'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">host_tmp_dir</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">tmp_dir</span><span class="p">))</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">container</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">cli</span><span class="o">.</span><span class="n">create_container</span><span class="p">(</span>
+ <span class="n">command</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">get_command</span><span class="p">(),</span>
+ <span class="n">cpu_shares</span><span class="o">=</span><span class="n">cpu_shares</span><span class="p">,</span>
+ <span class="n">environment</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">environment</span><span class="p">,</span>
+ <span class="n">host_config</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">cli</span><span class="o">.</span><span class="n">create_host_config</span><span class="p">(</span><span class="n">binds</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">volumes</span><span class="p">,</span>
+ <span class="n">network_mode</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">network_mode</span><span class="p">),</span>
+ <span class="n">image</span><span class="o">=</span><span class="n">image</span><span class="p">,</span>
+ <span class="n">mem_limit</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">mem_limit</span><span class="p">,</span>
+ <span class="n">user</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">user</span>
+ <span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">cli</span><span class="o">.</span><span class="n">start</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">container</span><span class="p">[</span><span class="s1">'Id'</span><span class="p">])</span>
+
+ <span class="n">line</span> <span class="o">=</span> <span class="s1">''</span>
+ <span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">cli</span><span class="o">.</span><span class="n">logs</span><span class="p">(</span><span class="n">container</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">container</span><span class="p">[</span><span class="s1">'Id'</span><span class="p">],</span> <span class="n">str [...]
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">"{}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">line</span><span class="o">.</span><span class="n">strip</span><span class="p">()))</span>
+
+ <span class="n">exit_code</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">cli</span><span class="o">.</span><span class="n">wait</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">container</span><span class="p">[</span><span class="s1">'Id'</span><span class="p">])</span>
+ <span class="k">if</span> <span class="n">exit_code</span> <span class="o">!=</span> <span class="mi">0</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span><span class="s1">'docker container failed'</span><span class="p">)</span>
+
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">xcom_push</span><span class="p">:</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">cli</span><span class="o">.</span><span class="n">logs</span><span class="p">(</span><span class="n">container</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">container</span><span class="p">[</span><span class="s1">'Id'</span><span class="p">])</span> <span class="k">if</span> <span class="bp">self</span><span class=" [...]
+
+ <span class="k">def</span> <span class="nf">get_command</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">command</span> <span class="ow">is</span> <span class="ow">not</span> <span class="bp">None</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">command</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">'['< [...]
+ <span class="n">commands</span> <span class="o">=</span> <span class="n">ast</span><span class="o">.</span><span class="n">literal_eval</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">command</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">commands</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">command</span>
+ <span class="k">return</span> <span class="n">commands</span>
+
+ <span class="k">def</span> <span class="nf">on_kill</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">cli</span> <span class="ow">is</span> <span class="ow">not</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Stopping docker container'</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">cli</span><span class="o">.</span><span class="n">stop</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">container</span><span class="p">[</span><span class="s1">'Id'</span><span class="p">])</span></div>
+</pre></div>
+
+ </div>
+ </div>
+ <footer>
+
+
+ <hr/>
+
+ <div role="contentinfo">
+ <p>
+ © Copyright 2014, Maxime Beauchemin, Airbnb.
+
+ </p>
+ </div>
+ Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
+
+</footer>
+
+ </div>
+ </div>
+
+ </section>
+
+ </div>
+
+
+
+
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT:'../../../',
+ VERSION:'',
+ COLLAPSE_INDEX:false,
+ FILE_SUFFIX:'.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../../_static/doctools.js"></script>
+
+
+
+
+
+ <script type="text/javascript" src="../../../_static/js/theme.js"></script>
+
+
+
+
+ <script type="text/javascript">
+ jQuery(function () {
+ SphinxRtdTheme.StickyNav.enable();
+ });
+ </script>
+
+
+</body>
+</html>
\ No newline at end of file
diff --git a/_modules/airflow/operators/sensors.html b/_modules/airflow/operators/sensors.html
new file mode 100644
index 0000000..b48da21
--- /dev/null
+++ b/_modules/airflow/operators/sensors.html
@@ -0,0 +1,721 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+ <meta charset="utf-8">
+
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+ <title>airflow.operators.sensors — Airflow Documentation</title>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" />
+
+
+
+
+
+ <link rel="top" title="Airflow Documentation" href="../../../index.html"/>
+ <link rel="up" title="Module code" href="../../index.html"/>
+
+
+ <script src="../../../_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+ <div class="wy-grid-for-nav">
+
+
+ <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+ <div class="wy-side-scroll">
+ <div class="wy-side-nav-search">
+
+
+
+ <a href="../../../index.html" class="icon icon-home"> Airflow
+
+
+
+ </a>
+
+
+
+
+
+
+
+<div role="search">
+ <form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
+ <input type="text" name="q" placeholder="Search docs" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+</div>
+
+
+ </div>
+
+ <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+
+
+
+ <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../project.html">Project</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../license.html">License</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../start.html">Quick Start</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../tutorial.html">Tutorial</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../configuration.html">Configuration</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../ui.html">UI / Screenshots</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../concepts.html">Concepts</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../profiling.html">Data Profiling</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../cli.html">Command Line Interface</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../scheduler.html">Scheduling & Triggers</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../plugins.html">Plugins</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../security.html">Security</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../faq.html">FAQ</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../code.html">API Reference</a></li>
+</ul>
+
+
+
+ </div>
+ </div>
+ </nav>
+
+ <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+
+ <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+ <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+ <a href="../../../index.html">Airflow</a>
+ </nav>
+
+
+
+ <div class="wy-nav-content">
+ <div class="rst-content">
+
+
+
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+ <ul class="wy-breadcrumbs">
+ <li><a href="../../../index.html">Docs</a> »</li>
+
+ <li><a href="../../index.html">Module code</a> »</li>
+
+ <li>airflow.operators.sensors</li>
+ <li class="wy-breadcrumbs-aside">
+
+
+
+ </li>
+ </ul>
+ <hr/>
+</div>
+ <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+ <div itemprop="articleBody">
+
+ <h1>Source code for airflow.operators.sensors</h1><div class="highlight"><pre>
+<span></span><span class="kn">from</span> <span class="nn">__future__</span> <span class="kn">import</span> <span class="n">print_function</span>
+<span class="kn">from</span> <span class="nn">future</span> <span class="kn">import</span> <span class="n">standard_library</span>
+<span class="n">standard_library</span><span class="o">.</span><span class="n">install_aliases</span><span class="p">()</span>
+<span class="kn">from</span> <span class="nn">builtins</span> <span class="kn">import</span> <span class="nb">str</span>
+<span class="kn">from</span> <span class="nn">datetime</span> <span class="kn">import</span> <span class="n">datetime</span>
+<span class="kn">import</span> <span class="nn">logging</span>
+<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlparse</span>
+<span class="kn">from</span> <span class="nn">time</span> <span class="kn">import</span> <span class="n">sleep</span>
+
+<span class="kn">from</span> <span class="nn">airflow</span> <span class="kn">import</span> <span class="n">hooks</span><span class="p">,</span> <span class="n">settings</span>
+<span class="kn">from</span> <span class="nn">airflow.exceptions</span> <span class="kn">import</span> <span class="n">AirflowException</span><span class="p">,</span> <span class="n">AirflowSensorTimeout</span><span class="p">,</span> <span class="n">AirflowSkipException</span>
+<span class="kn">from</span> <span class="nn">airflow.models</span> <span class="kn">import</span> <span class="n">BaseOperator</span><span class="p">,</span> <span class="n">TaskInstance</span><span class="p">,</span> <span class="n">Connection</span> <span class="k">as</span> <span class="n">DB</span>
+<span class="kn">from</span> <span class="nn">airflow.hooks</span> <span class="kn">import</span> <span class="n">BaseHook</span>
+<span class="kn">from</span> <span class="nn">airflow.utils.state</span> <span class="kn">import</span> <span class="n">State</span>
+<span class="kn">from</span> <span class="nn">airflow.utils.decorators</span> <span class="kn">import</span> <span class="n">apply_defaults</span>
+
+
+<div class="viewcode-block" id="BaseSensorOperator"><a class="viewcode-back" href="../../../code.html#airflow.operators.sensors.BaseSensorOperator">[docs]</a><span class="k">class</span> <span class="nc">BaseSensorOperator</span><span class="p">(</span><span class="n">BaseOperator</span><span class="p">):</span>
+ <span class="sd">'''</span>
+<span class="sd"> Sensor operators are derived from this class an inherit these attributes.</span>
+
+<span class="sd"> Sensor operators keep executing at a time interval and succeed when</span>
+<span class="sd"> a criteria is met and fail if and when they time out.</span>
+
+<span class="sd"> :param soft_fail: Set to true to mark the task as SKIPPED on failure</span>
+<span class="sd"> :type soft_fail: bool</span>
+<span class="sd"> :param poke_interval: Time in seconds that the job should wait in</span>
+<span class="sd"> between each tries</span>
+<span class="sd"> :type poke_interval: int</span>
+<span class="sd"> :param timeout: Time, in seconds before the task times out and fails.</span>
+<span class="sd"> :type timeout: int</span>
+<span class="sd"> '''</span>
+ <span class="n">ui_color</span> <span class="o">=</span> <span class="s1">'#e6f1f2'</span>
+
+ <span class="nd">@apply_defaults</span>
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span>
+ <span class="n">poke_interval</span><span class="o">=</span><span class="mi">60</span><span class="p">,</span>
+ <span class="n">timeout</span><span class="o">=</span><span class="mi">60</span><span class="o">*</span><span class="mi">60</span><span class="o">*</span><span class="mi">24</span><span class="o">*</span><span class="mi">7</span><span class="p">,</span>
+ <span class="n">soft_fail</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+ <span class="nb">super</span><span class="p">(</span><span class="n">BaseSensorOperator</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">poke_interval</span> <span class="o">=</span> <span class="n">poke_interval</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">soft_fail</span> <span class="o">=</span> <span class="n">soft_fail</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">timeout</span> <span class="o">=</span> <span class="n">timeout</span>
+
+ <span class="k">def</span> <span class="nf">poke</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">context</span><span class="p">):</span>
+ <span class="sd">'''</span>
+<span class="sd"> Function that the sensors defined while deriving this class should</span>
+<span class="sd"> override.</span>
+<span class="sd"> '''</span>
+ <span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span><span class="s1">'Override me.'</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">execute</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">context</span><span class="p">):</span>
+ <span class="n">started_at</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span>
+ <span class="k">while</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">poke</span><span class="p">(</span><span class="n">context</span><span class="p">):</span>
+ <span class="n">sleep</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">poke_interval</span><span class="p">)</span>
+ <span class="k">if</span> <span class="p">(</span><span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span> <span class="o">-</span> <span class="n">started_at</span><span class="p">)</span><span class="o">.</span><span class="n">seconds</span> <span class="o">></span> <span class="bp">self</span><span class="o">.</span><span class="n">timeout</span><span class="p">:</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">soft_fail</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">AirflowSkipException</span><span class="p">(</span><span class="s1">'Snap. Time is OUT.'</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">AirflowSensorTimeout</span><span class="p">(</span><span class="s1">'Snap. Time is OUT.'</span><span class="p">)</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">"Success criteria met. Exiting."</span><span class="p">)</span></div>
+
+
+<span class="k">class</span> <span class="nc">SqlSensor</span><span class="p">(</span><span class="n">BaseSensorOperator</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Runs a sql statement until a criteria is met. It will keep trying until</span>
+<span class="sd"> sql returns no row, or if the first cell in (0, '0', '').</span>
+
+<span class="sd"> :param conn_id: The connection to run the sensor against</span>
+<span class="sd"> :type conn_id: string</span>
+<span class="sd"> :param sql: The sql to run. To pass, it needs to return at least one cell</span>
+<span class="sd"> that contains a non-zero / empty string value.</span>
+<span class="sd"> """</span>
+ <span class="n">template_fields</span> <span class="o">=</span> <span class="p">(</span><span class="s1">'sql'</span><span class="p">,)</span>
+ <span class="n">template_ext</span> <span class="o">=</span> <span class="p">(</span><span class="s1">'.hql'</span><span class="p">,</span> <span class="s1">'.sql'</span><span class="p">,)</span>
+
+ <span class="nd">@apply_defaults</span>
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">conn_id</span><span class="p">,</span> <span class="n">sql</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">sql</span> <span class="o">=</span> <span class="n">sql</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">conn_id</span> <span class="o">=</span> <span class="n">conn_id</span>
+ <span class="nb">super</span><span class="p">(</span><span class="n">SqlSensor</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">poke</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">context</span><span class="p">):</span>
+ <span class="n">hook</span> <span class="o">=</span> <span class="n">BaseHook</span><span class="o">.</span><span class="n">get_connection</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">conn_id</span><span class="p">)</span><span class="o">.</span><span class="n">get_hook</span><span class="p">()</span>
+
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Poking: '</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">sql</span><span class="p">)</span>
+ <span class="n">records</span> <span class="o">=</span> <span class="n">hook</span><span class="o">.</span><span class="n">get_records</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">sql</span><span class="p">)</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">records</span><span class="p">:</span>
+ <span class="k">return</span> <span class="bp">False</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">if</span> <span class="nb">str</span><span class="p">(</span><span class="n">records</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="mi">0</span><span class="p">])</span> <span class="ow">in</span> <span class="p">(</span><span class="s1">'0'</span><span class="p">,</span> <span class="s1">''</span><span class="p">,):</span>
+ <span class="k">return</span> <span class="bp">False</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">return</span> <span class="bp">True</span>
+ <span class="k">print</span><span class="p">(</span><span class="n">records</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="mi">0</span><span class="p">])</span>
+
+
+<span class="k">class</span> <span class="nc">MetastorePartitionSensor</span><span class="p">(</span><span class="n">SqlSensor</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> An alternative to the HivePartitionSensor that talk directly to the</span>
+<span class="sd"> MySQL db. This was created as a result of observing sub optimal</span>
+<span class="sd"> queries generated by the Metastore thrift service when hitting</span>
+<span class="sd"> subpartitioned tables. The Thrift service's queries were written in a</span>
+<span class="sd"> way that wouldn't leverage the indexes.</span>
+
+<span class="sd"> :param schema: the schema</span>
+<span class="sd"> :type schema: str</span>
+<span class="sd"> :param table: the table</span>
+<span class="sd"> :type table: str</span>
+<span class="sd"> :param partition_name: the partition name, as defined in the PARTITIONS</span>
+<span class="sd"> table of the Metastore. Order of the fields does matter.</span>
+<span class="sd"> Examples: ``ds=2016-01-01`` or</span>
+<span class="sd"> ``ds=2016-01-01/sub=foo`` for a sub partitioned table</span>
+<span class="sd"> :type partition_name: str</span>
+<span class="sd"> :param mysql_conn_id: a reference to the MySQL conn_id for the metastore</span>
+<span class="sd"> :type mysql_conn_id: str</span>
+<span class="sd"> """</span>
+ <span class="n">template_fields</span> <span class="o">=</span> <span class="p">(</span><span class="s1">'partition_name'</span><span class="p">,</span> <span class="s1">'table'</span><span class="p">,</span> <span class="s1">'schema'</span><span class="p">)</span>
+
+ <span class="nd">@apply_defaults</span>
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span> <span class="n">table</span><span class="p">,</span> <span class="n">partition_name</span><span class="p">,</span> <span class="n">schema</span><span class="o">=</span><span class="s2">"default"</span><span class="p">,</span>
+ <span class="n">mysql_conn_id</span><span class="o">=</span><span class="s2">"metastore_mysql"</span><span class="p">,</span>
+ <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">partition_name</span> <span class="o">=</span> <span class="n">partition_name</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">table</span> <span class="o">=</span> <span class="n">table</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">schema</span> <span class="o">=</span> <span class="n">schema</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">first_poke</span> <span class="o">=</span> <span class="bp">True</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">conn_id</span> <span class="o">=</span> <span class="n">mysql_conn_id</span>
+ <span class="nb">super</span><span class="p">(</span><span class="n">SqlSensor</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">poke</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">context</span><span class="p">):</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">first_poke</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">first_poke</span> <span class="o">=</span> <span class="bp">False</span>
+ <span class="k">if</span> <span class="s1">'.'</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">table</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">schema</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">table</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">table</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'.'</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">sql</span> <span class="o">=</span> <span class="s2">"""</span>
+<span class="s2"> SELECT 'X'</span>
+<span class="s2"> FROM PARTITIONS A0</span>
+<span class="s2"> LEFT OUTER JOIN TBLS B0 ON A0.TBL_ID = B0.TBL_ID</span>
+<span class="s2"> LEFT OUTER JOIN DBS C0 ON B0.DB_ID = C0.DB_ID</span>
+<span class="s2"> WHERE</span>
+<span class="s2"> B0.TBL_NAME = '{self.table}' AND</span>
+<span class="s2"> C0.NAME = '{self.schema}' AND</span>
+<span class="s2"> A0.PART_NAME = '{self.partition_name}';</span>
+<span class="s2"> """</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">=</span><span class="bp">self</span><span class="p">)</span>
+ <span class="k">return</span> <span class="nb">super</span><span class="p">(</span><span class="n">MetastorePartitionSensor</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">poke</span><span class="p">(</span><span class="n">context</span><span class="p">)</span>
+
+
+<span class="k">class</span> <span class="nc">ExternalTaskSensor</span><span class="p">(</span><span class="n">BaseSensorOperator</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Waits for a task to complete in a different DAG</span>
+
+<span class="sd"> :param external_dag_id: The dag_id that contains the task you want to</span>
+<span class="sd"> wait for</span>
+<span class="sd"> :type external_dag_id: string</span>
+<span class="sd"> :param external_task_id: The task_id that contains the task you want to</span>
+<span class="sd"> wait for</span>
+<span class="sd"> :type external_task_id: string</span>
+<span class="sd"> :param allowed_states: list of allowed states, default is ``['success']``</span>
+<span class="sd"> :type allowed_states: list</span>
+<span class="sd"> :param execution_delta: time difference with the previous execution to</span>
+<span class="sd"> look at, the default is the same execution_date as the current task.</span>
+<span class="sd"> For yesterday, use [positive!] datetime.timedelta(days=1)</span>
+<span class="sd"> :type execution_delta: datetime.timedelta</span>
+<span class="sd"> """</span>
+
+ <span class="nd">@apply_defaults</span>
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span>
+ <span class="n">external_dag_id</span><span class="p">,</span>
+ <span class="n">external_task_id</span><span class="p">,</span>
+ <span class="n">allowed_states</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">execution_delta</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+ <span class="nb">super</span><span class="p">(</span><span class="n">ExternalTaskSensor</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">allowed_states</span> <span class="o">=</span> <span class="n">allowed_states</span> <span class="ow">or</span> <span class="p">[</span><span class="n">State</span><span class="o">.</span><span class="n">SUCCESS</span><span class="p">]</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">execution_delta</span> <span class="o">=</span> <span class="n">execution_delta</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">external_dag_id</span> <span class="o">=</span> <span class="n">external_dag_id</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">external_task_id</span> <span class="o">=</span> <span class="n">external_task_id</span>
+
+ <span class="k">def</span> <span class="nf">poke</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">context</span><span class="p">):</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">execution_delta</span><span class="p">:</span>
+ <span class="n">dttm</span> <span class="o">=</span> <span class="n">context</span><span class="p">[</span><span class="s1">'execution_date'</span><span class="p">]</span> <span class="o">-</span> <span class="bp">self</span><span class="o">.</span><span class="n">execution_delta</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">dttm</span> <span class="o">=</span> <span class="n">context</span><span class="p">[</span><span class="s1">'execution_date'</span><span class="p">]</span>
+
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span>
+ <span class="s1">'Poking for '</span>
+ <span class="s1">'{self.external_dag_id}.'</span>
+ <span class="s1">'{self.external_task_id} on '</span>
+ <span class="s1">'{dttm} ... '</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="nb">locals</span><span class="p">()))</span>
+ <span class="n">TI</span> <span class="o">=</span> <span class="n">TaskInstance</span>
+
+ <span class="n">session</span> <span class="o">=</span> <span class="n">settings</span><span class="o">.</span><span class="n">Session</span><span class="p">()</span>
+ <span class="n">count</span> <span class="o">=</span> <span class="n">session</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">TI</span><span class="p">)</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span>
+ <span class="n">TI</span><span class="o">.</span><span class="n">dag_id</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">external_dag_id</span><span class="p">,</span>
+ <span class="n">TI</span><span class="o">.</span><span class="n">task_id</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">external_task_id</span><span class="p">,</span>
+ <span class="n">TI</span><span class="o">.</span><span class="n">state</span><span class="o">.</span><span class="n">in_</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">allowed_states</span><span class="p">),</span>
+ <span class="n">TI</span><span class="o">.</span><span class="n">execution_date</span> <span class="o">==</span> <span class="n">dttm</span><span class="p">,</span>
+ <span class="p">)</span><span class="o">.</span><span class="n">count</span><span class="p">()</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
+ <span class="k">return</span> <span class="n">count</span>
+
+
+<span class="k">class</span> <span class="nc">HivePartitionSensor</span><span class="p">(</span><span class="n">BaseSensorOperator</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Waits for a partition to show up in Hive</span>
+
+<span class="sd"> :param table: The name of the table to wait for, supports the dot</span>
+<span class="sd"> notation (my_database.my_table)</span>
+<span class="sd"> :type table: string</span>
+<span class="sd"> :param partition: The partition clause to wait for. This is passed as</span>
+<span class="sd"> is to the Metastore Thrift client "get_partitions_by_filter" method,</span>
+<span class="sd"> and apparently supports SQL like notation as in `ds='2015-01-01'</span>
+<span class="sd"> AND type='value'` and > < sings as in "ds>=2015-01-01"</span>
+<span class="sd"> :type partition: string</span>
+<span class="sd"> :param metastore_conn_id: reference to the metastore thrift service</span>
+<span class="sd"> connection id</span>
+<span class="sd"> :type metastore_conn_id: str</span>
+<span class="sd"> """</span>
+ <span class="n">template_fields</span> <span class="o">=</span> <span class="p">(</span><span class="s1">'schema'</span><span class="p">,</span> <span class="s1">'table'</span><span class="p">,</span> <span class="s1">'partition'</span><span class="p">,)</span>
+
+ <span class="nd">@apply_defaults</span>
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span>
+ <span class="n">table</span><span class="p">,</span> <span class="n">partition</span><span class="o">=</span><span class="s2">"ds='{{ ds }}'"</span><span class="p">,</span>
+ <span class="n">metastore_conn_id</span><span class="o">=</span><span class="s1">'metastore_default'</span><span class="p">,</span>
+ <span class="n">schema</span><span class="o">=</span><span class="s1">'default'</span><span class="p">,</span>
+ <span class="n">poke_interval</span><span class="o">=</span><span class="mi">60</span><span class="o">*</span><span class="mi">3</span><span class="p">,</span>
+ <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+ <span class="nb">super</span><span class="p">(</span><span class="n">HivePartitionSensor</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">__init__</span><span class="p">(</span>
+ <span class="n">poke_interval</span><span class="o">=</span><span class="n">poke_interval</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">partition</span><span class="p">:</span>
+ <span class="n">partition</span> <span class="o">=</span> <span class="s2">"ds='{{ ds }}'"</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">metastore_conn_id</span> <span class="o">=</span> <span class="n">metastore_conn_id</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">table</span> <span class="o">=</span> <span class="n">table</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">partition</span> <span class="o">=</span> <span class="n">partition</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">schema</span> <span class="o">=</span> <span class="n">schema</span>
+
+ <span class="k">def</span> <span class="nf">poke</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">context</span><span class="p">):</span>
+ <span class="k">if</span> <span class="s1">'.'</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">table</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">schema</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">table</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">table</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'.'</span><span class="p">)</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span>
+ <span class="s1">'Poking for table {self.schema}.{self.table}, '</span>
+ <span class="s1">'partition {self.partition}'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="nb">locals</span><span class="p">()))</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s1">'hook'</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">hook</span> <span class="o">=</span> <span class="n">hooks</span><span class="o">.</span><span class="n">HiveMetastoreHook</span><span class="p">(</span>
+ <span class="n">metastore_conn_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">metastore_conn_id</span><span class="p">)</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">hook</span><span class="o">.</span><span class="n">check_for_partition</span><span class="p">(</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">schema</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">table</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">partition</span><span class="p">)</span>
+
+
+<span class="k">class</span> <span class="nc">HdfsSensor</span><span class="p">(</span><span class="n">BaseSensorOperator</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Waits for a file or folder to land in HDFS</span>
+<span class="sd"> """</span>
+ <span class="n">template_fields</span> <span class="o">=</span> <span class="p">(</span><span class="s1">'filepath'</span><span class="p">,)</span>
+
+ <span class="nd">@apply_defaults</span>
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span>
+ <span class="n">filepath</span><span class="p">,</span>
+ <span class="n">hdfs_conn_id</span><span class="o">=</span><span class="s1">'hdfs_default'</span><span class="p">,</span>
+ <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+ <span class="nb">super</span><span class="p">(</span><span class="n">HdfsSensor</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">filepath</span> <span class="o">=</span> <span class="n">filepath</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">hdfs_conn_id</span> <span class="o">=</span> <span class="n">hdfs_conn_id</span>
+
+ <span class="k">def</span> <span class="nf">poke</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">context</span><span class="p">):</span>
+ <span class="n">sb</span> <span class="o">=</span> <span class="n">hooks</span><span class="o">.</span><span class="n">HDFSHook</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">hdfs_conn_id</span><span class="p">)</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">getLogger</span><span class="p">(</span><span class="s2">"snakebite"</span><span class="p">)</span><span class="o">.</span><span class="n">setLevel</span><span class="p">(</span><span class="n">logging</span><span class="o">.</span><span class="n">WARNING</span><span class="p">)</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span>
+ <span class="s1">'Poking for file {self.filepath} '</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="nb">locals</span><span class="p">()))</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="n">files</span> <span class="o">=</span> <span class="p">[</span><span class="n">f</span> <span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">sb</span><span class="o">.</span><span class="n">ls</span><span class="p">([</span><span class="bp">self</span><span class="o">.</span><span class="n">filepath</span><span class="p">])]</span>
+ <span class="k">except</span><span class="p">:</span>
+ <span class="k">return</span> <span class="bp">False</span>
+ <span class="k">return</span> <span class="bp">True</span>
+
+
+<span class="k">class</span> <span class="nc">WebHdfsSensor</span><span class="p">(</span><span class="n">BaseSensorOperator</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Waits for a file or folder to land in HDFS</span>
+<span class="sd"> """</span>
+ <span class="n">template_fields</span> <span class="o">=</span> <span class="p">(</span><span class="s1">'filepath'</span><span class="p">,)</span>
+
+ <span class="nd">@apply_defaults</span>
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span>
+ <span class="n">filepath</span><span class="p">,</span>
+ <span class="n">webhdfs_conn_id</span><span class="o">=</span><span class="s1">'webhdfs_default'</span><span class="p">,</span>
+ <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+ <span class="nb">super</span><span class="p">(</span><span class="n">WebHdfsSensor</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">filepath</span> <span class="o">=</span> <span class="n">filepath</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">webhdfs_conn_id</span> <span class="o">=</span> <span class="n">webhdfs_conn_id</span>
+
+ <span class="k">def</span> <span class="nf">poke</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">context</span><span class="p">):</span>
+ <span class="n">c</span> <span class="o">=</span> <span class="n">hooks</span><span class="o">.</span><span class="n">WebHDFSHook</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">webhdfs_conn_id</span><span class="p">)</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span>
+ <span class="s1">'Poking for file {self.filepath} '</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="nb">locals</span><span class="p">()))</span>
+ <span class="k">return</span> <span class="n">c</span><span class="o">.</span><span class="n">check_for_path</span><span class="p">(</span><span class="n">hdfs_path</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">filepath</span><span class="p">)</span>
+
+
+<span class="k">class</span> <span class="nc">S3KeySensor</span><span class="p">(</span><span class="n">BaseSensorOperator</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Waits for a key (a file-like instance on S3) to be present in a S3 bucket.</span>
+<span class="sd"> S3 being a key/value it does not support folders. The path is just a key</span>
+<span class="sd"> a resource.</span>
+
+<span class="sd"> :param bucket_key: The key being waited on. Supports full s3:// style url</span>
+<span class="sd"> or relative path from root level.</span>
+<span class="sd"> :type bucket_key: str</span>
+<span class="sd"> :param bucket_name: Name of the S3 bucket</span>
+<span class="sd"> :type bucket_name: str</span>
+<span class="sd"> :param wildcard_match: whether the bucket_key should be interpreted as a</span>
+<span class="sd"> Unix wildcard pattern</span>
+<span class="sd"> :type wildcard_match: bool</span>
+<span class="sd"> :param s3_conn_id: a reference to the s3 connection</span>
+<span class="sd"> :type s3_conn_id: str</span>
+<span class="sd"> """</span>
+ <span class="n">template_fields</span> <span class="o">=</span> <span class="p">(</span><span class="s1">'bucket_key'</span><span class="p">,</span> <span class="s1">'bucket_name'</span><span class="p">)</span>
+
+ <span class="nd">@apply_defaults</span>
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span> <span class="n">bucket_key</span><span class="p">,</span>
+ <span class="n">bucket_name</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">wildcard_match</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="n">s3_conn_id</span><span class="o">=</span><span class="s1">'s3_default'</span><span class="p">,</span>
+ <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+ <span class="nb">super</span><span class="p">(</span><span class="n">S3KeySensor</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
+ <span class="n">session</span> <span class="o">=</span> <span class="n">settings</span><span class="o">.</span><span class="n">Session</span><span class="p">()</span>
+ <span class="n">db</span> <span class="o">=</span> <span class="n">session</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">DB</span><span class="p">)</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">DB</span><span class="o">.</span><span class="n">conn_id</span> <span class="o">==</span> <span class="n">s3_conn_id</span><span class="p">)</span><span class="o">.</span><span class="n [...]
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">db</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span><span class="s2">"conn_id doesn't exist in the repository"</span><span class="p">)</span>
+ <span class="c1"># Parse</span>
+ <span class="k">if</span> <span class="n">bucket_name</span> <span class="ow">is</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="n">parsed_url</span> <span class="o">=</span> <span class="n">urlparse</span><span class="p">(</span><span class="n">bucket_key</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">netloc</span> <span class="o">==</span> <span class="s1">''</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span><span class="s1">'Please provide a bucket_name'</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">bucket_name</span> <span class="o">=</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">netloc</span>
+ <span class="k">if</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">path</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">==</span> <span class="s1">'/'</span><span class="p">:</span>
+ <span class="n">bucket_key</span> <span class="o">=</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">path</span><span class="p">[</span><span class="mi">1</span><span class="p">:]</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">bucket_key</span> <span class="o">=</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">path</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">bucket_name</span> <span class="o">=</span> <span class="n">bucket_name</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">bucket_key</span> <span class="o">=</span> <span class="n">bucket_key</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">wildcard_match</span> <span class="o">=</span> <span class="n">wildcard_match</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">s3_conn_id</span> <span class="o">=</span> <span class="n">s3_conn_id</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
+
+ <span class="k">def</span> <span class="nf">poke</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">context</span><span class="p">):</span>
+ <span class="n">hook</span> <span class="o">=</span> <span class="n">hooks</span><span class="o">.</span><span class="n">S3Hook</span><span class="p">(</span><span class="n">s3_conn_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">s3_conn_id</span><span class="p">)</span>
+ <span class="n">full_url</span> <span class="o">=</span> <span class="s2">"s3://"</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">bucket_name</span> <span class="o">+</span> <span class="s2">"/"</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">bucket_key</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Poking for key : {full_url}'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="nb">locals</span><span class="p">()))</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">wildcard_match</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">hook</span><span class="o">.</span><span class="n">check_for_wildcard_key</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">bucket_key</span><span class="p">,</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">bucket_name</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">hook</span><span class="o">.</span><span class="n">check_for_key</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">bucket_key</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">bucket_name</span><span class="p">)</span>
+
+
+<span class="k">class</span> <span class="nc">S3PrefixSensor</span><span class="p">(</span><span class="n">BaseSensorOperator</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Waits for a prefix to exist. A prefix is the first part of a key,</span>
+<span class="sd"> thus enabling checking of constructs similar to glob airfl* or</span>
+<span class="sd"> SQL LIKE 'airfl%'. There is the possibility to precise a delimiter to</span>
+<span class="sd"> indicate the hierarchy or keys, meaning that the match will stop at that</span>
+<span class="sd"> delimiter. Current code accepts sane delimiters, i.e. characters that</span>
+<span class="sd"> are NOT special characters in the Python regex engine.</span>
+
+<span class="sd"> :param bucket_name: Name of the S3 bucket</span>
+<span class="sd"> :type bucket_name: str</span>
+<span class="sd"> :param prefix: The prefix being waited on. Relative path from bucket root level.</span>
+<span class="sd"> :type prefix: str</span>
+<span class="sd"> :param delimiter: The delimiter intended to show hierarchy.</span>
+<span class="sd"> Defaults to '/'.</span>
+<span class="sd"> :type delimiter: str</span>
+<span class="sd"> """</span>
+ <span class="n">template_fields</span> <span class="o">=</span> <span class="p">(</span><span class="s1">'prefix'</span><span class="p">,</span> <span class="s1">'bucket_name'</span><span class="p">)</span>
+
+ <span class="nd">@apply_defaults</span>
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span> <span class="n">bucket_name</span><span class="p">,</span>
+ <span class="n">prefix</span><span class="p">,</span> <span class="n">delimiter</span><span class="o">=</span><span class="s1">'/'</span><span class="p">,</span>
+ <span class="n">s3_conn_id</span><span class="o">=</span><span class="s1">'s3_default'</span><span class="p">,</span>
+ <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+ <span class="nb">super</span><span class="p">(</span><span class="n">S3PrefixSensor</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
+ <span class="n">session</span> <span class="o">=</span> <span class="n">settings</span><span class="o">.</span><span class="n">Session</span><span class="p">()</span>
+ <span class="n">db</span> <span class="o">=</span> <span class="n">session</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">DB</span><span class="p">)</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">DB</span><span class="o">.</span><span class="n">conn_id</span> <span class="o">==</span> <span class="n">s3_conn_id</span><span class="p">)</span><span class="o">.</span><span class="n [...]
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">db</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span><span class="s2">"conn_id doesn't exist in the repository"</span><span class="p">)</span>
+ <span class="c1"># Parse</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">bucket_name</span> <span class="o">=</span> <span class="n">bucket_name</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">prefix</span> <span class="o">=</span> <span class="n">prefix</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">delimiter</span> <span class="o">=</span> <span class="n">delimiter</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">full_url</span> <span class="o">=</span> <span class="s2">"s3://"</span> <span class="o">+</span> <span class="n">bucket_name</span> <span class="o">+</span> <span class="s1">'/'</span> <span class="o">+</span> <span class="n">prefix</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">s3_conn_id</span> <span class="o">=</span> <span class="n">s3_conn_id</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
+
+ <span class="k">def</span> <span class="nf">poke</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">context</span><span class="p">):</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Poking for prefix : {self.prefix}</span><span class="se">\n</span><span class="s1">'</span>
+ <span class="s1">'in bucket s3://{self.bucket_name}'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="nb">locals</span><span class="p">()))</span>
+ <span class="n">hook</span> <span class="o">=</span> <span class="n">hooks</span><span class="o">.</span><span class="n">S3Hook</span><span class="p">(</span><span class="n">s3_conn_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">s3_conn_id</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">hook</span><span class="o">.</span><span class="n">check_for_prefix</span><span class="p">(</span>
+ <span class="n">prefix</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">prefix</span><span class="p">,</span>
+ <span class="n">delimiter</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">delimiter</span><span class="p">,</span>
+ <span class="n">bucket_name</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">bucket_name</span><span class="p">)</span>
+
+
+<span class="k">class</span> <span class="nc">TimeSensor</span><span class="p">(</span><span class="n">BaseSensorOperator</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Waits until the specified time of the day.</span>
+
+<span class="sd"> :param target_time: time after which the job succeeds</span>
+<span class="sd"> :type target_time: datetime.time</span>
+<span class="sd"> """</span>
+ <span class="n">template_fields</span> <span class="o">=</span> <span class="nb">tuple</span><span class="p">()</span>
+
+ <span class="nd">@apply_defaults</span>
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">target_time</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+ <span class="nb">super</span><span class="p">(</span><span class="n">TimeSensor</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">target_time</span> <span class="o">=</span> <span class="n">target_time</span>
+
+ <span class="k">def</span> <span class="nf">poke</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">context</span><span class="p">):</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span>
+ <span class="s1">'Checking if the time ({0}) has come'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">target_time</span><span class="p">))</span>
+ <span class="k">return</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span><span class="o">.</span><span class="n">time</span><span class="p">()</span> <span class="o">></span> <span class="bp">self</span><span class="o">.</span><span class="n">target_time</span>
+
+
+<span class="k">class</span> <span class="nc">TimeDeltaSensor</span><span class="p">(</span><span class="n">BaseSensorOperator</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Waits for a timedelta after the task's execution_date + schedule_interval.</span>
+<span class="sd"> In Airflow, the daily task stamped with ``execution_date``</span>
+<span class="sd"> 2016-01-01 can only start running on 2016-01-02. The timedelta here</span>
+<span class="sd"> represents the time after the execution period has closed.</span>
+
+<span class="sd"> :param delta: time length to wait after execution_date before succeeding</span>
+<span class="sd"> :type delta: datetime.timedelta</span>
+<span class="sd"> """</span>
+ <span class="n">template_fields</span> <span class="o">=</span> <span class="nb">tuple</span><span class="p">()</span>
+
+ <span class="nd">@apply_defaults</span>
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">delta</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+ <span class="nb">super</span><span class="p">(</span><span class="n">TimeDeltaSensor</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">delta</span> <span class="o">=</span> <span class="n">delta</span>
+
+ <span class="k">def</span> <span class="nf">poke</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">context</span><span class="p">):</span>
+ <span class="n">dag</span> <span class="o">=</span> <span class="n">context</span><span class="p">[</span><span class="s1">'dag'</span><span class="p">]</span>
+ <span class="n">target_dttm</span> <span class="o">=</span> <span class="n">dag</span><span class="o">.</span><span class="n">following_schedule</span><span class="p">(</span><span class="n">context</span><span class="p">[</span><span class="s1">'execution_date'</span><span class="p">])</span>
+ <span class="n">target_dttm</span> <span class="o">+=</span> <span class="bp">self</span><span class="o">.</span><span class="n">delta</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Checking if the time ({0}) has come'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">target_dttm</span><span class="p">))</span>
+ <span class="k">return</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span> <span class="o">></span> <span class="n">target_dttm</span>
+
+
+<span class="k">class</span> <span class="nc">HttpSensor</span><span class="p">(</span><span class="n">BaseSensorOperator</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Executes a HTTP get statement and returns False on failure:</span>
+<span class="sd"> 404 not found or response_check function returned False</span>
+
+<span class="sd"> :param http_conn_id: The connection to run the sensor against</span>
+<span class="sd"> :type http_conn_id: string</span>
+<span class="sd"> :param endpoint: The relative part of the full url</span>
+<span class="sd"> :type endpoint: string</span>
+<span class="sd"> :param params: The parameters to be added to the GET url</span>
+<span class="sd"> :type params: a dictionary of string key/value pairs</span>
+<span class="sd"> :param headers: The HTTP headers to be added to the GET request</span>
+<span class="sd"> :type headers: a dictionary of string key/value pairs</span>
+<span class="sd"> :param response_check: A check against the 'requests' response object.</span>
+<span class="sd"> Returns True for 'pass' and False otherwise.</span>
+<span class="sd"> :type response_check: A lambda or defined function.</span>
+<span class="sd"> :param extra_options: Extra options for the 'requests' library, see the</span>
+<span class="sd"> 'requests' documentation (options to modify timeout, ssl, etc.)</span>
+<span class="sd"> :type extra_options: A dictionary of options, where key is string and value</span>
+<span class="sd"> depends on the option that's being modified.</span>
+<span class="sd"> """</span>
+
+ <span class="n">template_fields</span> <span class="o">=</span> <span class="p">(</span><span class="s1">'endpoint'</span><span class="p">,)</span>
+
+ <span class="nd">@apply_defaults</span>
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
+ <span class="n">endpoint</span><span class="p">,</span>
+ <span class="n">http_conn_id</span><span class="o">=</span><span class="s1">'http_default'</span><span class="p">,</span>
+ <span class="n">params</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">headers</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">response_check</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">extra_options</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+ <span class="nb">super</span><span class="p">(</span><span class="n">HttpSensor</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">endpoint</span> <span class="o">=</span> <span class="n">endpoint</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">http_conn_id</span> <span class="o">=</span> <span class="n">http_conn_id</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">params</span> <span class="o">=</span> <span class="n">params</span> <span class="ow">or</span> <span class="p">{}</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">headers</span> <span class="o">=</span> <span class="n">headers</span> <span class="ow">or</span> <span class="p">{}</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">extra_options</span> <span class="o">=</span> <span class="n">extra_options</span> <span class="ow">or</span> <span class="p">{}</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">response_check</span> <span class="o">=</span> <span class="n">response_check</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">hook</span> <span class="o">=</span> <span class="n">hooks</span><span class="o">.</span><span class="n">HttpHook</span><span class="p">(</span><span class="n">method</span><span class="o">=</span><span class="s1">'GET'</span><span class="p">,</span> <span class="n">http_conn_id</span><span class="o">=</span><span class="n">http_conn_id</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">poke</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">context</span><span class="p">):</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Poking: '</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">endpoint</span><span class="p">)</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="n">response</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">hook</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">endpoint</span><span class="p">,</span>
+ <span class="n">data</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="p">,</span>
+ <span class="n">headers</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">headers</span><span class="p">,</span>
+ <span class="n">extra_options</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">extra_options</span><span class="p">)</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">response_check</span><span class="p">:</span>
+ <span class="c1"># run content check on response</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">response_check</span><span class="p">(</span><span class="n">response</span><span class="p">)</span>
+ <span class="k">except</span> <span class="n">AirflowException</span> <span class="k">as</span> <span class="n">ae</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">ae</span><span class="o">.</span><span class="n">message</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s2">"404"</span><span class="p">):</span>
+ <span class="k">return</span> <span class="bp">False</span>
+
+ <span class="k">raise</span> <span class="n">ae</span>
+
+ <span class="k">return</span> <span class="bp">True</span>
+</pre></div>
+
+ </div>
+ </div>
+ <footer>
+
+
+ <hr/>
+
+ <div role="contentinfo">
+ <p>
+ © Copyright 2014, Maxime Beauchemin, Airbnb.
+
+ </p>
+ </div>
+ Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
+
+</footer>
+
+ </div>
+ </div>
+
+ </section>
+
+ </div>
+
+
+
+
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT:'../../../',
+ VERSION:'',
+ COLLAPSE_INDEX:false,
+ FILE_SUFFIX:'.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../../_static/doctools.js"></script>
+
+
+
+
+
+ <script type="text/javascript" src="../../../_static/js/theme.js"></script>
+
+
+
+
+ <script type="text/javascript">
+ jQuery(function () {
+ SphinxRtdTheme.StickyNav.enable();
+ });
+ </script>
+
+
+</body>
+</html>
\ No newline at end of file
diff --git a/_modules/bash_operator.html b/_modules/bash_operator.html
new file mode 100644
index 0000000..98de858
--- /dev/null
+++ b/_modules/bash_operator.html
@@ -0,0 +1,290 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+ <meta charset="utf-8">
+
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+ <title>bash_operator — Airflow Documentation</title>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
+
+
+
+
+
+ <link rel="top" title="Airflow Documentation" href="../index.html"/>
+ <link rel="up" title="Module code" href="index.html"/>
+
+
+ <script src="../_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+ <div class="wy-grid-for-nav">
+
+
+ <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+ <div class="wy-side-scroll">
+ <div class="wy-side-nav-search">
+
+
+
+ <a href="../index.html" class="icon icon-home"> Airflow
+
+
+
+ </a>
+
+
+
+
+
+
+
+<div role="search">
+ <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
+ <input type="text" name="q" placeholder="Search docs" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+</div>
+
+
+ </div>
+
+ <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+
+
+
+ <ul>
+<li class="toctree-l1"><a class="reference internal" href="../project.html">Project</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../license.html">License</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../start.html">Quick Start</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../tutorial.html">Tutorial</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../configuration.html">Configuration</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../ui.html">UI / Screenshots</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../concepts.html">Concepts</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../profiling.html">Data Profiling</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../cli.html">Command Line Interface</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../scheduler.html">Scheduling & Triggers</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../plugins.html">Plugins</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../security.html">Security</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../faq.html">FAQ</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../code.html">API Reference</a></li>
+</ul>
+
+
+
+ </div>
+ </div>
+ </nav>
+
+ <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+
+ <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+ <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+ <a href="../index.html">Airflow</a>
+ </nav>
+
+
+
+ <div class="wy-nav-content">
+ <div class="rst-content">
+
+
+
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+ <ul class="wy-breadcrumbs">
+ <li><a href="../index.html">Docs</a> »</li>
+
+ <li><a href="index.html">Module code</a> »</li>
+
+ <li>bash_operator</li>
+ <li class="wy-breadcrumbs-aside">
+
+
+
+ </li>
+ </ul>
+ <hr/>
+</div>
+ <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+ <div itemprop="articleBody">
+
+ <h1>Source code for bash_operator</h1><div class="highlight"><pre>
+<span></span>
+<span class="kn">from</span> <span class="nn">builtins</span> <span class="kn">import</span> <span class="nb">bytes</span>
+<span class="kn">import</span> <span class="nn">logging</span>
+<span class="kn">from</span> <span class="nn">subprocess</span> <span class="kn">import</span> <span class="n">Popen</span><span class="p">,</span> <span class="n">STDOUT</span><span class="p">,</span> <span class="n">PIPE</span>
+<span class="kn">from</span> <span class="nn">tempfile</span> <span class="kn">import</span> <span class="n">gettempdir</span><span class="p">,</span> <span class="n">NamedTemporaryFile</span>
+
+<span class="kn">from</span> <span class="nn">airflow.exceptions</span> <span class="kn">import</span> <span class="n">AirflowException</span>
+<span class="kn">from</span> <span class="nn">airflow.models</span> <span class="kn">import</span> <span class="n">BaseOperator</span>
+<span class="kn">from</span> <span class="nn">airflow.utils.decorators</span> <span class="kn">import</span> <span class="n">apply_defaults</span>
+<span class="kn">from</span> <span class="nn">airflow.utils.file</span> <span class="kn">import</span> <span class="n">TemporaryDirectory</span>
+
+
+<div class="viewcode-block" id="BashOperator"><a class="viewcode-back" href="../code.html#airflow.operators.BashOperator">[docs]</a><span class="k">class</span> <span class="nc">BashOperator</span><span class="p">(</span><span class="n">BaseOperator</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Execute a Bash script, command or set of commands.</span>
+
+<span class="sd"> :param bash_command: The command, set of commands or reference to a</span>
+<span class="sd"> bash script (must be '.sh') to be executed.</span>
+<span class="sd"> :type bash_command: string</span>
+<span class="sd"> :param env: If env is not None, it must be a mapping that defines the</span>
+<span class="sd"> environment variables for the new process; these are used instead</span>
+<span class="sd"> of inheriting the current process environment, which is the default</span>
+<span class="sd"> behavior. (templated)</span>
+<span class="sd"> :type env: dict</span>
+<span class="sd"> :type output_encoding: output encoding of bash command</span>
+<span class="sd"> """</span>
+ <span class="n">template_fields</span> <span class="o">=</span> <span class="p">(</span><span class="s1">'bash_command'</span><span class="p">,</span> <span class="s1">'env'</span><span class="p">)</span>
+ <span class="n">template_ext</span> <span class="o">=</span> <span class="p">(</span><span class="s1">'.sh'</span><span class="p">,</span> <span class="s1">'.bash'</span><span class="p">,)</span>
+ <span class="n">ui_color</span> <span class="o">=</span> <span class="s1">'#f0ede4'</span>
+
+ <span class="nd">@apply_defaults</span>
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span>
+ <span class="n">bash_command</span><span class="p">,</span>
+ <span class="n">xcom_push</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="n">env</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">output_encoding</span><span class="o">=</span><span class="s1">'utf-8'</span><span class="p">,</span>
+ <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> If xcom_push is True, the last line written to stdout will also</span>
+<span class="sd"> be pushed to an XCom when the bash command completes.</span>
+<span class="sd"> """</span>
+ <span class="nb">super</span><span class="p">(</span><span class="n">BashOperator</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">bash_command</span> <span class="o">=</span> <span class="n">bash_command</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">env</span> <span class="o">=</span> <span class="n">env</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">xcom_push_flag</span> <span class="o">=</span> <span class="n">xcom_push</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">output_encoding</span> <span class="o">=</span> <span class="n">output_encoding</span>
+
+<div class="viewcode-block" id="BashOperator.execute"><a class="viewcode-back" href="../code.html#airflow.operators.BashOperator.execute">[docs]</a> <span class="k">def</span> <span class="nf">execute</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">context</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Execute the bash command in a temporary directory</span>
+<span class="sd"> which will be cleaned afterwards</span>
+<span class="sd"> """</span>
+ <span class="n">bash_command</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">bash_command</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">"tmp dir root location: </span><span class="se">\n</span><span class="s2">"</span> <span class="o">+</span> <span class="n">gettempdir</span><span class="p">())</span>
+ <span class="k">with</span> <span class="n">TemporaryDirectory</span><span class="p">(</span><span class="n">prefix</span><span class="o">=</span><span class="s1">'airflowtmp'</span><span class="p">)</span> <span class="k">as</span> <span class="n">tmp_dir</span><span class="p">:</span>
+ <span class="k">with</span> <span class="n">NamedTemporaryFile</span><span class="p">(</span><span class="nb">dir</span><span class="o">=</span><span class="n">tmp_dir</span><span class="p">,</span> <span class="n">prefix</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">task_id</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
+
+ <span class="n">f</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="nb">bytes</span><span class="p">(</span><span class="n">bash_command</span><span class="p">,</span> <span class="s1">'utf_8'</span><span class="p">))</span>
+ <span class="n">f</span><span class="o">.</span><span class="n">flush</span><span class="p">()</span>
+ <span class="n">fname</span> <span class="o">=</span> <span class="n">f</span><span class="o">.</span><span class="n">name</span>
+ <span class="n">script_location</span> <span class="o">=</span> <span class="n">tmp_dir</span> <span class="o">+</span> <span class="s2">"/"</span> <span class="o">+</span> <span class="n">fname</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">"Temporary script "</span>
+ <span class="s2">"location :{0}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">script_location</span><span class="p">))</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">"Running command: "</span> <span class="o">+</span> <span class="n">bash_command</span><span class="p">)</span>
+ <span class="n">sp</span> <span class="o">=</span> <span class="n">Popen</span><span class="p">(</span>
+ <span class="p">[</span><span class="s1">'bash'</span><span class="p">,</span> <span class="n">fname</span><span class="p">],</span>
+ <span class="n">stdout</span><span class="o">=</span><span class="n">PIPE</span><span class="p">,</span> <span class="n">stderr</span><span class="o">=</span><span class="n">STDOUT</span><span class="p">,</span>
+ <span class="n">cwd</span><span class="o">=</span><span class="n">tmp_dir</span><span class="p">,</span> <span class="n">env</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">env</span><span class="p">)</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">sp</span> <span class="o">=</span> <span class="n">sp</span>
+
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">"Output:"</span><span class="p">)</span>
+ <span class="n">line</span> <span class="o">=</span> <span class="s1">''</span>
+ <span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="nb">iter</span><span class="p">(</span><span class="n">sp</span><span class="o">.</span><span class="n">stdout</span><span class="o">.</span><span class="n">readline</span><span class="p">,</span> <span class="n">b</span><span class="s1">''</span><span class="p">):</span>
+ <span class="n">line</span> <span class="o">=</span> <span class="n">line</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">output_encoding</span><span class="p">)</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="n">line</span><span class="p">)</span>
+ <span class="n">sp</span><span class="o">.</span><span class="n">wait</span><span class="p">()</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">"Command exited with "</span>
+ <span class="s2">"return code {0}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">sp</span><span class="o">.</span><span class="n">returncode</span><span class="p">))</span>
+
+ <span class="k">if</span> <span class="n">sp</span><span class="o">.</span><span class="n">returncode</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span><span class="s2">"Bash command failed"</span><span class="p">)</span>
+
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">xcom_push_flag</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">line</span></div>
+
+ <span class="k">def</span> <span class="nf">on_kill</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Sending SIGTERM signal to bash subprocess'</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">sp</span><span class="o">.</span><span class="n">terminate</span><span class="p">()</span></div>
+</pre></div>
+
+ </div>
+ </div>
+ <footer>
+
+
+ <hr/>
+
+ <div role="contentinfo">
+ <p>
+ © Copyright 2014, Maxime Beauchemin, Airbnb.
+
+ </p>
+ </div>
+ Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
+
+</footer>
+
+ </div>
+ </div>
+
+ </section>
+
+ </div>
+
+
+
+
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT:'../',
+ VERSION:'',
+ COLLAPSE_INDEX:false,
+ FILE_SUFFIX:'.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../_static/jquery.js"></script>
+ <script type="text/javascript" src="../_static/underscore.js"></script>
+ <script type="text/javascript" src="../_static/doctools.js"></script>
+
+
+
+
+
+ <script type="text/javascript" src="../_static/js/theme.js"></script>
+
+
+
+
+ <script type="text/javascript">
+ jQuery(function () {
+ SphinxRtdTheme.StickyNav.enable();
+ });
+ </script>
+
+
+</body>
+</html>
\ No newline at end of file
diff --git a/_modules/cloudant_hook.html b/_modules/cloudant_hook.html
new file mode 100644
index 0000000..4c5bb1f
--- /dev/null
+++ b/_modules/cloudant_hook.html
@@ -0,0 +1,274 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+ <meta charset="utf-8">
+
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+ <title>cloudant_hook — Airflow Documentation</title>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
+
+
+
+
+
+ <link rel="top" title="Airflow Documentation" href="../index.html"/>
+ <link rel="up" title="Module code" href="index.html"/>
+
+
+ <script src="../_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+ <div class="wy-grid-for-nav">
+
+
+ <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+ <div class="wy-side-scroll">
+ <div class="wy-side-nav-search">
+
+
+
+ <a href="../index.html" class="icon icon-home"> Airflow
+
+
+
+ </a>
+
+
+
+
+
+
+
+<div role="search">
+ <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
+ <input type="text" name="q" placeholder="Search docs" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+</div>
+
+
+ </div>
+
+ <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+
+
+
+ <ul>
+<li class="toctree-l1"><a class="reference internal" href="../project.html">Project</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../license.html">License</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../start.html">Quick Start</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../tutorial.html">Tutorial</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../configuration.html">Configuration</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../ui.html">UI / Screenshots</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../concepts.html">Concepts</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../profiling.html">Data Profiling</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../cli.html">Command Line Interface</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../scheduler.html">Scheduling & Triggers</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../plugins.html">Plugins</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../security.html">Security</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../faq.html">FAQ</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../code.html">API Reference</a></li>
+</ul>
+
+
+
+ </div>
+ </div>
+ </nav>
+
+ <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+
+ <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+ <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+ <a href="../index.html">Airflow</a>
+ </nav>
+
+
+
+ <div class="wy-nav-content">
+ <div class="rst-content">
+
+
+
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+ <ul class="wy-breadcrumbs">
+ <li><a href="../index.html">Docs</a> »</li>
+
+ <li><a href="index.html">Module code</a> »</li>
+
+ <li>cloudant_hook</li>
+ <li class="wy-breadcrumbs-aside">
+
+
+
+ </li>
+ </ul>
+ <hr/>
+</div>
+ <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+ <div itemprop="articleBody">
+
+ <h1>Source code for cloudant_hook</h1><div class="highlight"><pre>
+<span></span><span class="c1"># -*- coding: utf-8 -*-</span>
+<span class="c1">#</span>
+<span class="c1"># Licensed under the Apache License, Version 2.0 (the "License");</span>
+<span class="c1"># you may not use this file except in compliance with the License.</span>
+<span class="c1"># You may obtain a copy of the License at</span>
+<span class="c1">#</span>
+<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
+<span class="c1">#</span>
+<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
+<span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span>
+<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
+<span class="c1"># See the License for the specific language governing permissions and</span>
+<span class="c1"># limitations under the License.</span>
+<span class="kn">import</span> <span class="nn">logging</span>
+
+<span class="kn">import</span> <span class="nn">cloudant</span>
+
+<span class="kn">from</span> <span class="nn">airflow.exceptions</span> <span class="kn">import</span> <span class="n">AirflowException</span>
+<span class="kn">from</span> <span class="nn">airflow.hooks.base_hook</span> <span class="kn">import</span> <span class="n">BaseHook</span>
+
+
+<div class="viewcode-block" id="CloudantHook"><a class="viewcode-back" href="../code.html#airflow.contrib.hooks.CloudantHook">[docs]</a><span class="k">class</span> <span class="nc">CloudantHook</span><span class="p">(</span><span class="n">BaseHook</span><span class="p">):</span>
+ <span class="sd">"""Interact with Cloudant.</span>
+
+<span class="sd"> This class is a thin wrapper around the cloudant python library. See the</span>
+<span class="sd"> documentation `here <https://github.com/cloudant-labs/cloudant-python>`_.</span>
+<span class="sd"> """</span>
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">cloudant_conn_id</span><span class="o">=</span><span class="s1">'cloudant_default'</span><span class="p">):</span>
+ <span class="nb">super</span><span class="p">(</span><span class="n">CloudantHook</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">__init__</span><span class="p">(</span><span class="s1">'cloudant'</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">cloudant_conn_id</span> <span class="o">=</span> <span class="n">cloudant_conn_id</span>
+
+ <span class="k">def</span> <span class="nf">get_conn</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">def</span> <span class="nf">_str</span><span class="p">(</span><span class="n">s</span><span class="p">):</span>
+ <span class="c1"># cloudant-python doesn't support unicode.</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">s</span><span class="p">,</span> <span class="nb">unicode</span><span class="p">):</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">debug</span><span class="p">((</span><span class="s1">'cloudant-python does not support unicode. '</span>
+ <span class="s1">'Encoding </span><span class="si">%s</span><span class="s1"> as ascii using "ignore".'</span><span class="p">),</span>
+ <span class="n">s</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">s</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="s1">'ascii'</span><span class="p">,</span> <span class="s1">'ignore'</span><span class="p">)</span>
+
+ <span class="k">return</span> <span class="n">s</span>
+
+ <span class="n">conn</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_connection</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">cloudant_conn_id</span><span class="p">)</span>
+
+ <span class="k">for</span> <span class="n">conn_param</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">'host'</span><span class="p">,</span> <span class="s1">'password'</span><span class="p">,</span> <span class="s1">'schema'</span><span class="p">]:</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">conn</span><span class="p">,</span> <span class="n">conn_param</span><span class="p">)</span> <span class="ow">or</span> <span class="ow">not</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">conn</span><span class="p">,</span> <span class="n">conn_param</span><span class="p">):</span>
+ <span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span>
+ <span class="s1">'missing connection parameter {0}'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">conn_param</span><span class="p">)</span>
+ <span class="p">)</span>
+
+ <span class="c1"># In the connection form:</span>
+ <span class="c1"># - 'host' is renamed to 'Account'</span>
+ <span class="c1"># - 'login' is renamed 'Username (or API Key)'</span>
+ <span class="c1"># - 'schema' is renamed to 'Database'</span>
+ <span class="c1">#</span>
+ <span class="c1"># So, use the 'host' attribute as the account name, and, if login is</span>
+ <span class="c1"># defined, use that as the username.</span>
+ <span class="n">account</span> <span class="o">=</span> <span class="n">cloudant</span><span class="o">.</span><span class="n">Account</span><span class="p">(</span><span class="n">_str</span><span class="p">(</span><span class="n">conn</span><span class="o">.</span><span class="n">host</span><span class="p">))</span>
+
+ <span class="n">username</span> <span class="o">=</span> <span class="n">_str</span><span class="p">(</span><span class="n">conn</span><span class="o">.</span><span class="n">login</span> <span class="ow">or</span> <span class="n">conn</span><span class="o">.</span><span class="n">host</span><span class="p">)</span>
+
+ <span class="n">account</span><span class="o">.</span><span class="n">login</span><span class="p">(</span>
+ <span class="n">username</span><span class="p">,</span>
+ <span class="n">_str</span><span class="p">(</span><span class="n">conn</span><span class="o">.</span><span class="n">password</span><span class="p">))</span><span class="o">.</span><span class="n">raise_for_status</span><span class="p">()</span>
+
+ <span class="k">return</span> <span class="n">account</span><span class="o">.</span><span class="n">database</span><span class="p">(</span><span class="n">_str</span><span class="p">(</span><span class="n">conn</span><span class="o">.</span><span class="n">schema</span><span class="p">))</span>
+
+<div class="viewcode-block" id="CloudantHook.db"><a class="viewcode-back" href="../code.html#airflow.contrib.hooks.CloudantHook.db">[docs]</a> <span class="k">def</span> <span class="nf">db</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""Returns the Database object for this hook.</span>
+
+<span class="sd"> See the documentation for cloudant-python here</span>
+<span class="sd"> https://github.com/cloudant-labs/cloudant-python.</span>
+<span class="sd"> """</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span></div></div>
+</pre></div>
+
+ </div>
+ </div>
+ <footer>
+
+
+ <hr/>
+
+ <div role="contentinfo">
+ <p>
+ © Copyright 2014, Maxime Beauchemin, Airbnb.
+
+ </p>
+ </div>
+ Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
+
+</footer>
+
+ </div>
+ </div>
+
+ </section>
+
+ </div>
+
+
+
+
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT:'../',
+ VERSION:'',
+ COLLAPSE_INDEX:false,
+ FILE_SUFFIX:'.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../_static/jquery.js"></script>
+ <script type="text/javascript" src="../_static/underscore.js"></script>
+ <script type="text/javascript" src="../_static/doctools.js"></script>
+
+
+
+
+
+ <script type="text/javascript" src="../_static/js/theme.js"></script>
+
+
+
+
+ <script type="text/javascript">
+ jQuery(function () {
+ SphinxRtdTheme.StickyNav.enable();
+ });
+ </script>
+
+
+</body>
+</html>
\ No newline at end of file
diff --git a/_modules/dagrun_operator.html b/_modules/dagrun_operator.html
new file mode 100644
index 0000000..66f5cef
--- /dev/null
+++ b/_modules/dagrun_operator.html
@@ -0,0 +1,260 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+ <meta charset="utf-8">
+
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+ <title>dagrun_operator — Airflow Documentation</title>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
+
+
+
+
+
+ <link rel="top" title="Airflow Documentation" href="../index.html"/>
+ <link rel="up" title="Module code" href="index.html"/>
+
+
+ <script src="../_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+ <div class="wy-grid-for-nav">
+
+
+ <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+ <div class="wy-side-scroll">
+ <div class="wy-side-nav-search">
+
+
+
+ <a href="../index.html" class="icon icon-home"> Airflow
+
+
+
+ </a>
+
+
+
+
+
+
+
+<div role="search">
+ <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
+ <input type="text" name="q" placeholder="Search docs" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+</div>
+
+
+ </div>
+
+ <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+
+
+
+ <ul>
+<li class="toctree-l1"><a class="reference internal" href="../project.html">Project</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../license.html">License</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../start.html">Quick Start</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../tutorial.html">Tutorial</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../configuration.html">Configuration</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../ui.html">UI / Screenshots</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../concepts.html">Concepts</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../profiling.html">Data Profiling</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../cli.html">Command Line Interface</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../scheduler.html">Scheduling & Triggers</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../plugins.html">Plugins</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../security.html">Security</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../faq.html">FAQ</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../code.html">API Reference</a></li>
+</ul>
+
+
+
+ </div>
+ </div>
+ </nav>
+
+ <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+
+ <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+ <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+ <a href="../index.html">Airflow</a>
+ </nav>
+
+
+
+ <div class="wy-nav-content">
+ <div class="rst-content">
+
+
+
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+ <ul class="wy-breadcrumbs">
+ <li><a href="../index.html">Docs</a> »</li>
+
+ <li><a href="index.html">Module code</a> »</li>
+
+ <li>dagrun_operator</li>
+ <li class="wy-breadcrumbs-aside">
+
+
+
+ </li>
+ </ul>
+ <hr/>
+</div>
+ <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+ <div itemprop="articleBody">
+
+ <h1>Source code for dagrun_operator</h1><div class="highlight"><pre>
+<span></span><span class="kn">from</span> <span class="nn">datetime</span> <span class="kn">import</span> <span class="n">datetime</span>
+<span class="kn">import</span> <span class="nn">logging</span>
+
+<span class="kn">from</span> <span class="nn">airflow.models</span> <span class="kn">import</span> <span class="n">BaseOperator</span><span class="p">,</span> <span class="n">DagRun</span>
+<span class="kn">from</span> <span class="nn">airflow.utils.decorators</span> <span class="kn">import</span> <span class="n">apply_defaults</span>
+<span class="kn">from</span> <span class="nn">airflow</span> <span class="kn">import</span> <span class="n">settings</span>
+
+
+<span class="k">class</span> <span class="nc">DagRunOrder</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">run_id</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="n">payload</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">run_id</span> <span class="o">=</span> <span class="n">run_id</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">payload</span> <span class="o">=</span> <span class="n">payload</span>
+
+
+<div class="viewcode-block" id="TriggerDagRunOperator"><a class="viewcode-back" href="../code.html#airflow.operators.TriggerDagRunOperator">[docs]</a><span class="k">class</span> <span class="nc">TriggerDagRunOperator</span><span class="p">(</span><span class="n">BaseOperator</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Triggers a DAG run for a specified ``dag_id`` if a criteria is met</span>
+
+<span class="sd"> :param trigger_dag_id: the dag_id to trigger</span>
+<span class="sd"> :type trigger_dag_id: str</span>
+<span class="sd"> :param python_callable: a reference to a python function that will be</span>
+<span class="sd"> called while passing it the ``context`` object and a placeholder</span>
+<span class="sd"> object ``obj`` for your callable to fill and return if you want</span>
+<span class="sd"> a DagRun created. This ``obj`` object contains a ``run_id`` and</span>
+<span class="sd"> ``payload`` attribute that you can modify in your function.</span>
+<span class="sd"> The ``run_id`` should be a unique identifier for that DAG run, and</span>
+<span class="sd"> the payload has to be a picklable object that will be made available</span>
+<span class="sd"> to your tasks while executing that DAG run. Your function header</span>
+<span class="sd"> should look like ``def foo(context, dag_run_obj):``</span>
+<span class="sd"> :type python_callable: python callable</span>
+<span class="sd"> """</span>
+ <span class="n">template_fields</span> <span class="o">=</span> <span class="nb">tuple</span><span class="p">()</span>
+ <span class="n">template_ext</span> <span class="o">=</span> <span class="nb">tuple</span><span class="p">()</span>
+ <span class="n">ui_color</span> <span class="o">=</span> <span class="s1">'#ffefeb'</span>
+ <span class="nd">@apply_defaults</span>
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span>
+ <span class="n">trigger_dag_id</span><span class="p">,</span>
+ <span class="n">python_callable</span><span class="p">,</span>
+ <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+ <span class="nb">super</span><span class="p">(</span><span class="n">TriggerDagRunOperator</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">python_callable</span> <span class="o">=</span> <span class="n">python_callable</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">trigger_dag_id</span> <span class="o">=</span> <span class="n">trigger_dag_id</span>
+
+ <span class="k">def</span> <span class="nf">execute</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">context</span><span class="p">):</span>
+ <span class="n">dro</span> <span class="o">=</span> <span class="n">DagRunOrder</span><span class="p">(</span><span class="n">run_id</span><span class="o">=</span><span class="s1">'trig__'</span> <span class="o">+</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span><span class="o">.</span><span class="n">isoformat</span><span class="p">())</span>
+ <span class="n">dro</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">python_callable</span><span class="p">(</span><span class="n">context</span><span class="p">,</span> <span class="n">dro</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">dro</span><span class="p">:</span>
+ <span class="n">session</span> <span class="o">=</span> <span class="n">settings</span><span class="o">.</span><span class="n">Session</span><span class="p">()</span>
+ <span class="n">dr</span> <span class="o">=</span> <span class="n">DagRun</span><span class="p">(</span>
+ <span class="n">dag_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">trigger_dag_id</span><span class="p">,</span>
+ <span class="n">run_id</span><span class="o">=</span><span class="n">dro</span><span class="o">.</span><span class="n">run_id</span><span class="p">,</span>
+ <span class="n">conf</span><span class="o">=</span><span class="n">dro</span><span class="o">.</span><span class="n">payload</span><span class="p">,</span>
+ <span class="n">external_trigger</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">"Creating DagRun {}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">dr</span><span class="p">))</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">dr</span><span class="p">)</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span>
+ <span class="n">session</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">"Criteria not met, moving on"</span><span class="p">)</span></div>
+</pre></div>
+
+ </div>
+ </div>
+ <footer>
+
+
+ <hr/>
+
+ <div role="contentinfo">
+ <p>
+ © Copyright 2014, Maxime Beauchemin, Airbnb.
+
+ </p>
+ </div>
+ Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
+
+</footer>
+
+ </div>
+ </div>
+
+ </section>
+
+ </div>
+
+
+
+
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT:'../',
+ VERSION:'',
+ COLLAPSE_INDEX:false,
+ FILE_SUFFIX:'.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../_static/jquery.js"></script>
+ <script type="text/javascript" src="../_static/underscore.js"></script>
+ <script type="text/javascript" src="../_static/doctools.js"></script>
+
+
+
+
+
+ <script type="text/javascript" src="../_static/js/theme.js"></script>
+
+
+
+
+ <script type="text/javascript">
+ jQuery(function () {
+ SphinxRtdTheme.StickyNav.enable();
+ });
+ </script>
+
+
+</body>
+</html>
\ No newline at end of file
diff --git a/_modules/dbapi_hook.html b/_modules/dbapi_hook.html
new file mode 100644
index 0000000..06bbb28
--- /dev/null
+++ b/_modules/dbapi_hook.html
@@ -0,0 +1,426 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+ <meta charset="utf-8">
+
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+ <title>dbapi_hook — Airflow Documentation</title>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
+
+
+
+
+
+ <link rel="top" title="Airflow Documentation" href="../index.html"/>
+ <link rel="up" title="Module code" href="index.html"/>
+
+
+ <script src="../_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+ <div class="wy-grid-for-nav">
+
+
+ <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+ <div class="wy-side-scroll">
+ <div class="wy-side-nav-search">
+
+
+
+ <a href="../index.html" class="icon icon-home"> Airflow
+
+
+
+ </a>
+
+
+
+
+
+
+
+<div role="search">
+ <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
+ <input type="text" name="q" placeholder="Search docs" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+</div>
+
+
+ </div>
+
+ <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+
+
+
+ <ul>
+<li class="toctree-l1"><a class="reference internal" href="../project.html">Project</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../license.html">License</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../start.html">Quick Start</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../tutorial.html">Tutorial</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../configuration.html">Configuration</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../ui.html">UI / Screenshots</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../concepts.html">Concepts</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../profiling.html">Data Profiling</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../cli.html">Command Line Interface</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../scheduler.html">Scheduling & Triggers</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../plugins.html">Plugins</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../security.html">Security</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../faq.html">FAQ</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../code.html">API Reference</a></li>
+</ul>
+
+
+
+ </div>
+ </div>
+ </nav>
+
+ <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+
+ <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+ <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+ <a href="../index.html">Airflow</a>
+ </nav>
+
+
+
+ <div class="wy-nav-content">
+ <div class="rst-content">
+
+
+
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+ <ul class="wy-breadcrumbs">
+ <li><a href="../index.html">Docs</a> »</li>
+
+ <li><a href="index.html">Module code</a> »</li>
+
+ <li>dbapi_hook</li>
+ <li class="wy-breadcrumbs-aside">
+
+
+
+ </li>
+ </ul>
+ <hr/>
+</div>
+ <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+ <div itemprop="articleBody">
+
+ <h1>Source code for dbapi_hook</h1><div class="highlight"><pre>
+<span></span>
+<span class="kn">from</span> <span class="nn">builtins</span> <span class="kn">import</span> <span class="nb">str</span>
+<span class="kn">from</span> <span class="nn">past.builtins</span> <span class="kn">import</span> <span class="nb">basestring</span>
+<span class="kn">from</span> <span class="nn">datetime</span> <span class="kn">import</span> <span class="n">datetime</span>
+<span class="kn">import</span> <span class="nn">numpy</span>
+<span class="kn">import</span> <span class="nn">logging</span>
+
+<span class="kn">from</span> <span class="nn">airflow.hooks.base_hook</span> <span class="kn">import</span> <span class="n">BaseHook</span>
+<span class="kn">from</span> <span class="nn">airflow.exceptions</span> <span class="kn">import</span> <span class="n">AirflowException</span>
+
+
+<div class="viewcode-block" id="DbApiHook"><a class="viewcode-back" href="../code.html#airflow.hooks.DbApiHook">[docs]</a><span class="k">class</span> <span class="nc">DbApiHook</span><span class="p">(</span><span class="n">BaseHook</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Abstract base class for sql hooks.</span>
+<span class="sd"> """</span>
+ <span class="c1"># Override to provide the connection name.</span>
+ <span class="n">conn_name_attr</span> <span class="o">=</span> <span class="bp">None</span>
+ <span class="c1"># Override to have a default connection id for a particular dbHook</span>
+ <span class="n">default_conn_name</span> <span class="o">=</span> <span class="s1">'default_conn_id'</span>
+ <span class="c1"># Override if this db supports autocommit.</span>
+ <span class="n">supports_autocommit</span> <span class="o">=</span> <span class="bp">False</span>
+ <span class="c1"># Override with the object that exposes the connect method</span>
+ <span class="n">connector</span> <span class="o">=</span> <span class="bp">None</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">conn_name_attr</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span><span class="s2">"conn_name_attr is not defined"</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="nb">len</span><span class="p">(</span><span class="n">args</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
+ <span class="nb">setattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">conn_name_attr</span><span class="p">,</span> <span class="n">args</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
+ <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">conn_name_attr</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">kwargs</span><span class="p">:</span>
+ <span class="nb">setattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">conn_name_attr</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">default_conn_name</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="nb">setattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">conn_name_attr</span><span class="p">,</span> <span class="n">kwargs</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">conn_name_attr</span><span class="p">])</span>
+
+<div class="viewcode-block" id="DbApiHook.get_conn"><a class="viewcode-back" href="../code.html#airflow.hooks.DbApiHook.get_conn">[docs]</a> <span class="k">def</span> <span class="nf">get_conn</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""Returns a connection object</span>
+<span class="sd"> """</span>
+ <span class="n">db</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_connection</span><span class="p">(</span><span class="nb">getattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">conn_name_attr</span><span class="p">))</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">connector</span><span class="o">.</span><span class="n">connect</span><span class="p">(</span>
+ <span class="n">host</span><span class="o">=</span><span class="n">db</span><span class="o">.</span><span class="n">host</span><span class="p">,</span>
+ <span class="n">port</span><span class="o">=</span><span class="n">db</span><span class="o">.</span><span class="n">port</span><span class="p">,</span>
+ <span class="n">username</span><span class="o">=</span><span class="n">db</span><span class="o">.</span><span class="n">login</span><span class="p">,</span>
+ <span class="n">schema</span><span class="o">=</span><span class="n">db</span><span class="o">.</span><span class="n">schema</span><span class="p">)</span></div>
+
+
+<div class="viewcode-block" id="DbApiHook.get_pandas_df"><a class="viewcode-back" href="../code.html#airflow.hooks.DbApiHook.get_pandas_df">[docs]</a> <span class="k">def</span> <span class="nf">get_pandas_df</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sql</span><span class="p">,</span> <span class="n">parameters</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="sd">'''</span>
+<span class="sd"> Executes the sql and returns a pandas dataframe</span>
+
+<span class="sd"> :param sql: the sql statement to be executed (str) or a list of</span>
+<span class="sd"> sql statements to execute</span>
+<span class="sd"> :type sql: str or list</span>
+<span class="sd"> :param parameters: The parameters to render the SQL query with.</span>
+<span class="sd"> :type parameters: mapping or iterable</span>
+<span class="sd"> '''</span>
+ <span class="kn">import</span> <span class="nn">pandas.io.sql</span> <span class="kn">as</span> <span class="nn">psql</span>
+ <span class="n">conn</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span>
+ <span class="n">df</span> <span class="o">=</span> <span class="n">psql</span><span class="o">.</span><span class="n">read_sql</span><span class="p">(</span><span class="n">sql</span><span class="p">,</span> <span class="n">con</span><span class="o">=</span><span class="n">conn</span><span class="p">,</span> <span class="n">params</span><span class="o">=</span><span class="n">parameters</span><span class="p">)</span>
+ <span class="n">conn</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
+ <span class="k">return</span> <span class="n">df</span></div>
+
+<div class="viewcode-block" id="DbApiHook.get_records"><a class="viewcode-back" href="../code.html#airflow.hooks.DbApiHook.get_records">[docs]</a> <span class="k">def</span> <span class="nf">get_records</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sql</span><span class="p">,</span> <span class="n">parameters</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="sd">'''</span>
+<span class="sd"> Executes the sql and returns a set of records.</span>
+
+<span class="sd"> :param sql: the sql statement to be executed (str) or a list of</span>
+<span class="sd"> sql statements to execute</span>
+<span class="sd"> :type sql: str or list</span>
+<span class="sd"> :param parameters: The parameters to render the SQL query with.</span>
+<span class="sd"> :type parameters: mapping or iterable</span>
+<span class="sd"> '''</span>
+ <span class="n">conn</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span>
+ <span class="n">cur</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_cursor</span><span class="p">()</span>
+ <span class="k">if</span> <span class="n">parameters</span> <span class="ow">is</span> <span class="ow">not</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="n">cur</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">sql</span><span class="p">,</span> <span class="n">parameters</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">cur</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">sql</span><span class="p">)</span>
+ <span class="n">rows</span> <span class="o">=</span> <span class="n">cur</span><span class="o">.</span><span class="n">fetchall</span><span class="p">()</span>
+ <span class="n">cur</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
+ <span class="n">conn</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
+ <span class="k">return</span> <span class="n">rows</span></div>
+
+<div class="viewcode-block" id="DbApiHook.get_first"><a class="viewcode-back" href="../code.html#airflow.hooks.DbApiHook.get_first">[docs]</a> <span class="k">def</span> <span class="nf">get_first</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sql</span><span class="p">,</span> <span class="n">parameters</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="sd">'''</span>
+<span class="sd"> Executes the sql and returns the first resulting row.</span>
+
+<span class="sd"> :param sql: the sql statement to be executed (str) or a list of</span>
+<span class="sd"> sql statements to execute</span>
+<span class="sd"> :type sql: str or list</span>
+<span class="sd"> :param parameters: The parameters to render the SQL query with.</span>
+<span class="sd"> :type parameters: mapping or iterable</span>
+<span class="sd"> '''</span>
+ <span class="n">conn</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span>
+ <span class="n">cur</span> <span class="o">=</span> <span class="n">conn</span><span class="o">.</span><span class="n">cursor</span><span class="p">()</span>
+ <span class="k">if</span> <span class="n">parameters</span> <span class="ow">is</span> <span class="ow">not</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="n">cur</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">sql</span><span class="p">,</span> <span class="n">parameters</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">cur</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">sql</span><span class="p">)</span>
+ <span class="n">rows</span> <span class="o">=</span> <span class="n">cur</span><span class="o">.</span><span class="n">fetchone</span><span class="p">()</span>
+ <span class="n">cur</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
+ <span class="n">conn</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
+ <span class="k">return</span> <span class="n">rows</span></div>
+
+<div class="viewcode-block" id="DbApiHook.run"><a class="viewcode-back" href="../code.html#airflow.hooks.DbApiHook.run">[docs]</a> <span class="k">def</span> <span class="nf">run</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sql</span><span class="p">,</span> <span class="n">autocommit</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span> <span class="n">parameters</span><span class="o">=</span><span cl [...]
+ <span class="sd">"""</span>
+<span class="sd"> Runs a command or a list of commands. Pass a list of sql</span>
+<span class="sd"> statements to the sql parameter to get them to execute</span>
+<span class="sd"> sequentially</span>
+
+<span class="sd"> :param sql: the sql statement to be executed (str) or a list of</span>
+<span class="sd"> sql statements to execute</span>
+<span class="sd"> :type sql: str or list</span>
+<span class="sd"> :param autocommit: What to set the connection's autocommit setting to</span>
+<span class="sd"> before executing the query.</span>
+<span class="sd"> :type autocommit: bool</span>
+<span class="sd"> :param parameters: The parameters to render the SQL query with.</span>
+<span class="sd"> :type parameters: mapping or iterable</span>
+<span class="sd"> """</span>
+ <span class="n">conn</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">sql</span><span class="p">,</span> <span class="nb">basestring</span><span class="p">):</span>
+ <span class="n">sql</span> <span class="o">=</span> <span class="p">[</span><span class="n">sql</span><span class="p">]</span>
+
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">supports_autocommit</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">set_autocommit</span><span class="p">(</span><span class="n">conn</span><span class="p">,</span> <span class="n">autocommit</span><span class="p">)</span>
+
+ <span class="n">cur</span> <span class="o">=</span> <span class="n">conn</span><span class="o">.</span><span class="n">cursor</span><span class="p">()</span>
+ <span class="k">for</span> <span class="n">s</span> <span class="ow">in</span> <span class="n">sql</span><span class="p">:</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="n">s</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">parameters</span> <span class="ow">is</span> <span class="ow">not</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="n">cur</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">s</span><span class="p">,</span> <span class="n">parameters</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">cur</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">s</span><span class="p">)</span>
+ <span class="n">cur</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
+ <span class="n">conn</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span>
+ <span class="n">conn</span><span class="o">.</span><span class="n">close</span><span class="p">()</span></div>
+
+ <span class="k">def</span> <span class="nf">set_autocommit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">conn</span><span class="p">,</span> <span class="n">autocommit</span><span class="p">):</span>
+ <span class="n">conn</span><span class="o">.</span><span class="n">autocommit</span> <span class="o">=</span> <span class="n">autocommit</span>
+
+<div class="viewcode-block" id="DbApiHook.get_cursor"><a class="viewcode-back" href="../code.html#airflow.hooks.DbApiHook.get_cursor">[docs]</a> <span class="k">def</span> <span class="nf">get_cursor</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Returns a cursor</span>
+<span class="sd"> """</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span><span class="o">.</span><span class="n">cursor</span><span class="p">()</span></div>
+
+<div class="viewcode-block" id="DbApiHook.insert_rows"><a class="viewcode-back" href="../code.html#airflow.hooks.DbApiHook.insert_rows">[docs]</a> <span class="k">def</span> <span class="nf">insert_rows</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">table</span><span class="p">,</span> <span class="n">rows</span><span class="p">,</span> <span class="n">target_fields</span><span class="o">=</span><span class="bp">None</span><span clas [...]
+ <span class="sd">"""</span>
+<span class="sd"> A generic way to insert a set of tuples into a table,</span>
+<span class="sd"> the whole set of inserts is treated as one transaction</span>
+
+<span class="sd"> :param table: Name of the target table</span>
+<span class="sd"> :type table: str</span>
+<span class="sd"> :param rows: The rows to insert into the table</span>
+<span class="sd"> :type rows: iterable of tuples</span>
+<span class="sd"> :param target_fields: The names of the columns to fill in the table</span>
+<span class="sd"> :type target_fields: iterable of strings</span>
+<span class="sd"> :param commit_every: The maximum number of rows to insert in one</span>
+<span class="sd"> transaction. Set to 0 to insert all rows in one transaction.</span>
+<span class="sd"> :type commit_every: int</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="n">target_fields</span><span class="p">:</span>
+ <span class="n">target_fields</span> <span class="o">=</span> <span class="s2">", "</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">target_fields</span><span class="p">)</span>
+ <span class="n">target_fields</span> <span class="o">=</span> <span class="s2">"({})"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">target_fields</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">target_fields</span> <span class="o">=</span> <span class="s1">''</span>
+ <span class="n">conn</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span>
+ <span class="n">cur</span> <span class="o">=</span> <span class="n">conn</span><span class="o">.</span><span class="n">cursor</span><span class="p">()</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">supports_autocommit</span><span class="p">:</span>
+ <span class="n">cur</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="s1">'SET autocommit = 0'</span><span class="p">)</span>
+ <span class="n">conn</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span>
+ <span class="n">i</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="k">for</span> <span class="n">row</span> <span class="ow">in</span> <span class="n">rows</span><span class="p">:</span>
+ <span class="n">i</span> <span class="o">+=</span> <span class="mi">1</span>
+ <span class="n">l</span> <span class="o">=</span> <span class="p">[]</span>
+ <span class="k">for</span> <span class="n">cell</span> <span class="ow">in</span> <span class="n">row</span><span class="p">:</span>
+ <span class="n">l</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_serialize_cell</span><span class="p">(</span><span class="n">cell</span><span class="p">))</span>
+ <span class="n">values</span> <span class="o">=</span> <span class="nb">tuple</span><span class="p">(</span><span class="n">l</span><span class="p">)</span>
+ <span class="n">sql</span> <span class="o">=</span> <span class="s2">"INSERT INTO {0} {1} VALUES ({2});"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
+ <span class="n">table</span><span class="p">,</span>
+ <span class="n">target_fields</span><span class="p">,</span>
+ <span class="s2">","</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">values</span><span class="p">))</span>
+ <span class="n">cur</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">sql</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">commit_every</span> <span class="ow">and</span> <span class="n">i</span> <span class="o">%</span> <span class="n">commit_every</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
+ <span class="n">conn</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span>
+ <span class="s2">"Loaded {i} into {table} rows so far"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="nb">locals</span><span class="p">()))</span>
+ <span class="n">conn</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span>
+ <span class="n">cur</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
+ <span class="n">conn</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span>
+ <span class="s2">"Done loading. Loaded a total of {i} rows"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="nb">locals</span><span class="p">()))</span></div>
+
+ <span class="nd">@staticmethod</span>
+ <span class="k">def</span> <span class="nf">_serialize_cell</span><span class="p">(</span><span class="n">cell</span><span class="p">):</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">cell</span><span class="p">,</span> <span class="nb">basestring</span><span class="p">):</span>
+ <span class="k">return</span> <span class="s2">"'"</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">cell</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">"'"</span><span class="p">,</span> <span class="s2">"''"</span><span class="p">)</span> <span class="o">+</span> <span class="s2">"'"</span>
+ <span class="k">elif</span> <span class="n">cell</span> <span class="ow">is</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="k">return</span> <span class="s1">'NULL'</span>
+ <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">cell</span><span class="p">,</span> <span class="n">numpy</span><span class="o">.</span><span class="n">datetime64</span><span class="p">):</span>
+ <span class="k">return</span> <span class="s2">"'"</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">cell</span><span class="p">)</span> <span class="o">+</span> <span class="s2">"'"</span>
+ <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">cell</span><span class="p">,</span> <span class="n">datetime</span><span class="p">):</span>
+ <span class="k">return</span> <span class="s2">"'"</span> <span class="o">+</span> <span class="n">cell</span><span class="o">.</span><span class="n">isoformat</span><span class="p">()</span> <span class="o">+</span> <span class="s2">"'"</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">return</span> <span class="nb">str</span><span class="p">(</span><span class="n">cell</span><span class="p">)</span>
+
+<div class="viewcode-block" id="DbApiHook.bulk_dump"><a class="viewcode-back" href="../code.html#airflow.hooks.DbApiHook.bulk_dump">[docs]</a> <span class="k">def</span> <span class="nf">bulk_dump</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">table</span><span class="p">,</span> <span class="n">tmp_file</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Dumps a database table into a tab-delimited file</span>
+
+<span class="sd"> :param table: The name of the source table</span>
+<span class="sd"> :type table: str</span>
+<span class="sd"> :param tmp_file: The path of the target file</span>
+<span class="sd"> :type tmp_file: str</span>
+<span class="sd"> """</span>
+ <span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">()</span></div>
+
+<div class="viewcode-block" id="DbApiHook.bulk_load"><a class="viewcode-back" href="../code.html#airflow.hooks.DbApiHook.bulk_load">[docs]</a> <span class="k">def</span> <span class="nf">bulk_load</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">table</span><span class="p">,</span> <span class="n">tmp_file</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Loads a tab-delimited file into a database table</span>
+
+<span class="sd"> :param table: The name of the target table</span>
+<span class="sd"> :type table: str</span>
+<span class="sd"> :param tmp_file: The path of the file to load into the table</span>
+<span class="sd"> :type tmp_file: str</span>
+<span class="sd"> """</span>
+ <span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">()</span></div></div>
+</pre></div>
+
+ </div>
+ </div>
+ <footer>
+
+
+ <hr/>
+
+ <div role="contentinfo">
+ <p>
+ © Copyright 2014, Maxime Beauchemin, Airbnb.
+
+ </p>
+ </div>
+ Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
+
+</footer>
+
+ </div>
+ </div>
+
+ </section>
+
+ </div>
+
+
+
+
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT:'../',
+ VERSION:'',
+ COLLAPSE_INDEX:false,
+ FILE_SUFFIX:'.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../_static/jquery.js"></script>
+ <script type="text/javascript" src="../_static/underscore.js"></script>
+ <script type="text/javascript" src="../_static/doctools.js"></script>
+
+
+
+
+
+ <script type="text/javascript" src="../_static/js/theme.js"></script>
+
+
+
+
+ <script type="text/javascript">
+ jQuery(function () {
+ SphinxRtdTheme.StickyNav.enable();
+ });
+ </script>
+
+
+</body>
+</html>
\ No newline at end of file
diff --git a/_modules/druid_hook.html b/_modules/druid_hook.html
new file mode 100644
index 0000000..97bb50f
--- /dev/null
+++ b/_modules/druid_hook.html
@@ -0,0 +1,369 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+ <meta charset="utf-8">
+
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+ <title>druid_hook — Airflow Documentation</title>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
+
+
+
+
+
+ <link rel="top" title="Airflow Documentation" href="../index.html"/>
+ <link rel="up" title="Module code" href="index.html"/>
+
+
+ <script src="../_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+ <div class="wy-grid-for-nav">
+
+
+ <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+ <div class="wy-side-scroll">
+ <div class="wy-side-nav-search">
+
+
+
+ <a href="../index.html" class="icon icon-home"> Airflow
+
+
+
+ </a>
+
+
+
+
+
+
+
+<div role="search">
+ <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
+ <input type="text" name="q" placeholder="Search docs" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+</div>
+
+
+ </div>
+
+ <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+
+
+
+ <ul>
+<li class="toctree-l1"><a class="reference internal" href="../project.html">Project</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../license.html">License</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../start.html">Quick Start</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../tutorial.html">Tutorial</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../configuration.html">Configuration</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../ui.html">UI / Screenshots</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../concepts.html">Concepts</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../profiling.html">Data Profiling</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../cli.html">Command Line Interface</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../scheduler.html">Scheduling & Triggers</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../plugins.html">Plugins</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../security.html">Security</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../faq.html">FAQ</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../code.html">API Reference</a></li>
+</ul>
+
+
+
+ </div>
+ </div>
+ </nav>
+
+ <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+
+ <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+ <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+ <a href="../index.html">Airflow</a>
+ </nav>
+
+
+
+ <div class="wy-nav-content">
+ <div class="rst-content">
+
+
+
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+ <ul class="wy-breadcrumbs">
+ <li><a href="../index.html">Docs</a> »</li>
+
+ <li><a href="index.html">Module code</a> »</li>
+
+ <li>druid_hook</li>
+ <li class="wy-breadcrumbs-aside">
+
+
+
+ </li>
+ </ul>
+ <hr/>
+</div>
+ <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+ <div itemprop="articleBody">
+
+ <h1>Source code for druid_hook</h1><div class="highlight"><pre>
+<span></span><span class="kn">from</span> <span class="nn">__future__</span> <span class="kn">import</span> <span class="n">print_function</span>
+<span class="kn">import</span> <span class="nn">logging</span>
+<span class="kn">import</span> <span class="nn">json</span>
+<span class="kn">import</span> <span class="nn">time</span>
+
+<span class="kn">from</span> <span class="nn">pydruid.client</span> <span class="kn">import</span> <span class="n">PyDruid</span>
+<span class="kn">import</span> <span class="nn">requests</span>
+
+<span class="kn">from</span> <span class="nn">airflow.hooks.base_hook</span> <span class="kn">import</span> <span class="n">BaseHook</span>
+<span class="kn">from</span> <span class="nn">airflow.exceptions</span> <span class="kn">import</span> <span class="n">AirflowException</span>
+
+<span class="n">LOAD_CHECK_INTERVAL</span> <span class="o">=</span> <span class="mi">5</span>
+<span class="n">DEFAULT_TARGET_PARTITION_SIZE</span> <span class="o">=</span> <span class="mi">5000000</span>
+
+<span class="k">class</span> <span class="nc">AirflowDruidLoadException</span><span class="p">(</span><span class="n">AirflowException</span><span class="p">):</span>
+ <span class="k">pass</span>
+
+
+<div class="viewcode-block" id="DruidHook"><a class="viewcode-back" href="../code.html#airflow.hooks.DruidHook">[docs]</a><span class="k">class</span> <span class="nc">DruidHook</span><span class="p">(</span><span class="n">BaseHook</span><span class="p">):</span>
+ <span class="sd">'''</span>
+<span class="sd"> Interact with druid.</span>
+<span class="sd"> '''</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span>
+ <span class="n">druid_query_conn_id</span><span class="o">=</span><span class="s1">'druid_query_default'</span><span class="p">,</span>
+ <span class="n">druid_ingest_conn_id</span><span class="o">=</span><span class="s1">'druid_ingest_default'</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">druid_query_conn_id</span> <span class="o">=</span> <span class="n">druid_query_conn_id</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">druid_ingest_conn_id</span> <span class="o">=</span> <span class="n">druid_ingest_conn_id</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">header</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'content-type'</span><span class="p">:</span> <span class="s1">'application/json'</span><span class="p">}</span>
+
+<div class="viewcode-block" id="DruidHook.get_conn"><a class="viewcode-back" href="../code.html#airflow.hooks.DruidHook.get_conn">[docs]</a> <span class="k">def</span> <span class="nf">get_conn</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Returns a druid connection object for query</span>
+<span class="sd"> """</span>
+ <span class="n">conn</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_connection</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">druid_query_conn_id</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">PyDruid</span><span class="p">(</span>
+ <span class="s2">"http://{conn.host}:{conn.port}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="nb">locals</span><span class="p">()),</span>
+ <span class="n">conn</span><span class="o">.</span><span class="n">extra_dejson</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'endpoint'</span><span class="p">,</span> <span class="s1">''</span><span class="p">))</span></div>
+
+ <span class="nd">@property</span>
+ <span class="k">def</span> <span class="nf">ingest_post_url</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="n">conn</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_connection</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">druid_ingest_conn_id</span><span class="p">)</span>
+ <span class="n">host</span> <span class="o">=</span> <span class="n">conn</span><span class="o">.</span><span class="n">host</span>
+ <span class="n">port</span> <span class="o">=</span> <span class="n">conn</span><span class="o">.</span><span class="n">port</span>
+ <span class="n">endpoint</span> <span class="o">=</span> <span class="n">conn</span><span class="o">.</span><span class="n">extra_dejson</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'endpoint'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span>
+ <span class="k">return</span> <span class="s2">"http://{host}:{port}/{endpoint}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="nb">locals</span><span class="p">())</span>
+
+ <span class="k">def</span> <span class="nf">get_ingest_status_url</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">task_id</span><span class="p">):</span>
+ <span class="n">post_url</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">ingest_post_url</span>
+ <span class="k">return</span> <span class="s2">"{post_url}/{task_id}/status"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="nb">locals</span><span class="p">())</span>
+
+<div class="viewcode-block" id="DruidHook.construct_ingest_query"><a class="viewcode-back" href="../code.html#airflow.hooks.DruidHook.construct_ingest_query">[docs]</a> <span class="k">def</span> <span class="nf">construct_ingest_query</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span> <span class="n">datasource</span><span class="p">,</span> <span class="n">static_path</span><span class="p">,</span> <span class="n">ts_dim</span><span class="p">,</span> <span class="n">columns</span><span class="p">,</span> <span class="n">metric_spec</span><span class="p">,</span>
+ <span class="n">intervals</span><span class="p">,</span> <span class="n">num_shards</span><span class="p">,</span> <span class="n">target_partition_size</span><span class="p">,</span> <span class="n">hadoop_dependency_coordinates</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Builds an ingest query for an HDFS TSV load.</span>
+
+<span class="sd"> :param datasource: target datasource in druid</span>
+<span class="sd"> :param columns: list of all columns in the TSV, in the right order</span>
+<span class="sd"> """</span>
+
+ <span class="c1"># backward compatibilty for num_shards, but target_partition_size is the default setting</span>
+ <span class="c1"># and overwrites the num_shards</span>
+ <span class="k">if</span> <span class="n">target_partition_size</span> <span class="o">==</span> <span class="o">-</span><span class="mi">1</span><span class="p">:</span>
+ <span class="k">if</span> <span class="n">num_shards</span> <span class="o">==</span> <span class="o">-</span><span class="mi">1</span><span class="p">:</span>
+ <span class="n">target_partition_size</span> <span class="o">=</span> <span class="n">DEFAULT_TARGET_PARTITION_SIZE</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">num_shards</span> <span class="o">=</span> <span class="o">-</span><span class="mi">1</span>
+
+ <span class="n">metric_names</span> <span class="o">=</span> <span class="p">[</span>
+ <span class="n">m</span><span class="p">[</span><span class="s1">'fieldName'</span><span class="p">]</span> <span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="n">metric_spec</span> <span class="k">if</span> <span class="n">m</span><span class="p">[</span><span class="s1">'type'</span><span class="p">]</span> <span class="o">!=</span> <span class="s1">'count'</span><span class="p">]</span>
+ <span class="n">dimensions</span> <span class="o">=</span> <span class="p">[</span><span class="n">c</span> <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">columns</span> <span class="k">if</span> <span class="n">c</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">metric_names</span> <span class="ow">and</span> <span class="n">c</span> <span class="o">!=</span> <span class="n">ts_dim</span><span class="p"> [...]
+ <span class="n">ingest_query_dict</span> <span class="o">=</span> <span class="p">{</span>
+ <span class="s2">"type"</span><span class="p">:</span> <span class="s2">"index_hadoop"</span><span class="p">,</span>
+ <span class="s2">"spec"</span><span class="p">:</span> <span class="p">{</span>
+ <span class="s2">"dataSchema"</span><span class="p">:</span> <span class="p">{</span>
+ <span class="s2">"metricsSpec"</span><span class="p">:</span> <span class="n">metric_spec</span><span class="p">,</span>
+ <span class="s2">"granularitySpec"</span><span class="p">:</span> <span class="p">{</span>
+ <span class="s2">"queryGranularity"</span><span class="p">:</span> <span class="s2">"NONE"</span><span class="p">,</span>
+ <span class="s2">"intervals"</span><span class="p">:</span> <span class="n">intervals</span><span class="p">,</span>
+ <span class="s2">"type"</span><span class="p">:</span> <span class="s2">"uniform"</span><span class="p">,</span>
+ <span class="s2">"segmentGranularity"</span><span class="p">:</span> <span class="s2">"DAY"</span><span class="p">,</span>
+ <span class="p">},</span>
+ <span class="s2">"parser"</span><span class="p">:</span> <span class="p">{</span>
+ <span class="s2">"type"</span><span class="p">:</span> <span class="s2">"string"</span><span class="p">,</span>
+ <span class="s2">"parseSpec"</span><span class="p">:</span> <span class="p">{</span>
+ <span class="s2">"columns"</span><span class="p">:</span> <span class="n">columns</span><span class="p">,</span>
+ <span class="s2">"dimensionsSpec"</span><span class="p">:</span> <span class="p">{</span>
+ <span class="s2">"dimensionExclusions"</span><span class="p">:</span> <span class="p">[],</span>
+ <span class="s2">"dimensions"</span><span class="p">:</span> <span class="n">dimensions</span><span class="p">,</span> <span class="c1"># list of names</span>
+ <span class="s2">"spatialDimensions"</span><span class="p">:</span> <span class="p">[]</span>
+ <span class="p">},</span>
+ <span class="s2">"timestampSpec"</span><span class="p">:</span> <span class="p">{</span>
+ <span class="s2">"column"</span><span class="p">:</span> <span class="n">ts_dim</span><span class="p">,</span>
+ <span class="s2">"format"</span><span class="p">:</span> <span class="s2">"auto"</span>
+ <span class="p">},</span>
+ <span class="s2">"format"</span><span class="p">:</span> <span class="s2">"tsv"</span>
+ <span class="p">}</span>
+ <span class="p">},</span>
+ <span class="s2">"dataSource"</span><span class="p">:</span> <span class="n">datasource</span>
+ <span class="p">},</span>
+ <span class="s2">"tuningConfig"</span><span class="p">:</span> <span class="p">{</span>
+ <span class="s2">"type"</span><span class="p">:</span> <span class="s2">"hadoop"</span><span class="p">,</span>
+ <span class="s2">"jobProperties"</span><span class="p">:</span> <span class="p">{</span>
+ <span class="s2">"mapreduce.job.user.classpath.first"</span><span class="p">:</span> <span class="s2">"false"</span><span class="p">,</span>
+ <span class="s2">"mapreduce.map.output.compress"</span><span class="p">:</span> <span class="s2">"false"</span><span class="p">,</span>
+ <span class="s2">"mapreduce.output.fileoutputformat.compress"</span><span class="p">:</span> <span class="s2">"false"</span><span class="p">,</span>
+ <span class="p">},</span>
+ <span class="s2">"partitionsSpec"</span> <span class="p">:</span> <span class="p">{</span>
+ <span class="s2">"type"</span> <span class="p">:</span> <span class="s2">"hashed"</span><span class="p">,</span>
+ <span class="s2">"targetPartitionSize"</span> <span class="p">:</span> <span class="n">target_partition_size</span><span class="p">,</span>
+ <span class="s2">"numShards"</span> <span class="p">:</span> <span class="n">num_shards</span><span class="p">,</span>
+ <span class="p">},</span>
+ <span class="p">},</span>
+ <span class="s2">"ioConfig"</span><span class="p">:</span> <span class="p">{</span>
+ <span class="s2">"inputSpec"</span><span class="p">:</span> <span class="p">{</span>
+ <span class="s2">"paths"</span><span class="p">:</span> <span class="n">static_path</span><span class="p">,</span>
+ <span class="s2">"type"</span><span class="p">:</span> <span class="s2">"static"</span>
+ <span class="p">},</span>
+ <span class="s2">"type"</span><span class="p">:</span> <span class="s2">"hadoop"</span>
+ <span class="p">}</span>
+ <span class="p">}</span>
+ <span class="p">}</span>
+ <span class="k">if</span> <span class="n">hadoop_dependency_coordinates</span><span class="p">:</span>
+ <span class="n">ingest_query_dict</span><span class="p">[</span>
+ <span class="s1">'hadoopDependencyCoordinates'</span><span class="p">]</span> <span class="o">=</span> <span class="n">hadoop_dependency_coordinates</span>
+
+ <span class="k">return</span> <span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">ingest_query_dict</span><span class="p">,</span> <span class="n">indent</span><span class="o">=</span><span class="mi">4</span><span class="p">)</span></div>
+
+ <span class="k">def</span> <span class="nf">send_ingest_query</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span> <span class="n">datasource</span><span class="p">,</span> <span class="n">static_path</span><span class="p">,</span> <span class="n">ts_dim</span><span class="p">,</span> <span class="n">columns</span><span class="p">,</span> <span class="n">metric_spec</span><span class="p">,</span>
+ <span class="n">intervals</span><span class="p">,</span> <span class="n">num_shards</span><span class="p">,</span> <span class="n">target_partition_size</span><span class="p">,</span> <span class="n">hadoop_dependency_coordinates</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="n">query</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">construct_ingest_query</span><span class="p">(</span>
+ <span class="n">datasource</span><span class="p">,</span> <span class="n">static_path</span><span class="p">,</span> <span class="n">ts_dim</span><span class="p">,</span> <span class="n">columns</span><span class="p">,</span>
+ <span class="n">metric_spec</span><span class="p">,</span> <span class="n">intervals</span><span class="p">,</span> <span class="n">num_shards</span><span class="p">,</span> <span class="n">target_partition_size</span><span class="p">,</span> <span class="n">hadoop_dependency_coordinates</span><span class="p">)</span>
+ <span class="n">r</span> <span class="o">=</span> <span class="n">requests</span><span class="o">.</span><span class="n">post</span><span class="p">(</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">ingest_post_url</span><span class="p">,</span> <span class="n">headers</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">header</span><span class="p">,</span> <span class="n">data</span><span class="o">=</span><span class="n">query</span><span class="p">)</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">ingest_post_url</span><span class="p">)</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="n">query</span><span class="p">)</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="n">r</span><span class="o">.</span><span class="n">text</span><span class="p">)</span>
+ <span class="n">d</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">r</span><span class="o">.</span><span class="n">text</span><span class="p">)</span>
+ <span class="k">if</span> <span class="s2">"task"</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">d</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">AirflowDruidLoadException</span><span class="p">(</span>
+ <span class="s2">"[Error]: Ingesting data to druid failed."</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">d</span><span class="p">[</span><span class="s2">"task"</span><span class="p">]</span>
+
+<div class="viewcode-block" id="DruidHook.load_from_hdfs"><a class="viewcode-back" href="../code.html#airflow.hooks.DruidHook.load_from_hdfs">[docs]</a> <span class="k">def</span> <span class="nf">load_from_hdfs</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span> <span class="n">datasource</span><span class="p">,</span> <span class="n">static_path</span><span class="p">,</span> <span class="n">ts_dim</span><span class="p">,</span> <span class="n">columns</span><span class="p">,</span>
+ <span class="n">intervals</span><span class="p">,</span> <span class="n">num_shards</span><span class="p">,</span> <span class="n">target_partition_size</span><span class="p">,</span> <span class="n">metric_spec</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="n">hadoop_dependency_coordinates</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> load data to druid from hdfs</span>
+<span class="sd"> :params ts_dim: The column name to use as a timestamp</span>
+<span class="sd"> :params metric_spec: A list of dictionaries</span>
+<span class="sd"> """</span>
+ <span class="n">task_id</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">send_ingest_query</span><span class="p">(</span>
+ <span class="n">datasource</span><span class="p">,</span> <span class="n">static_path</span><span class="p">,</span> <span class="n">ts_dim</span><span class="p">,</span> <span class="n">columns</span><span class="p">,</span> <span class="n">metric_spec</span><span class="p">,</span>
+ <span class="n">intervals</span><span class="p">,</span> <span class="n">num_shards</span><span class="p">,</span> <span class="n">target_partition_size</span><span class="p">,</span> <span class="n">hadoop_dependency_coordinates</span><span class="p">)</span>
+ <span class="n">status_url</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_ingest_status_url</span><span class="p">(</span><span class="n">task_id</span><span class="p">)</span>
+ <span class="k">while</span> <span class="bp">True</span><span class="p">:</span>
+ <span class="n">r</span> <span class="o">=</span> <span class="n">requests</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">status_url</span><span class="p">)</span>
+ <span class="n">d</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">r</span><span class="o">.</span><span class="n">text</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">d</span><span class="p">[</span><span class="s1">'status'</span><span class="p">][</span><span class="s1">'status'</span><span class="p">]</span> <span class="o">==</span> <span class="s1">'FAILED'</span><span class="p">:</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="n">d</span><span class="p">)</span>
+ <span class="k">raise</span> <span class="n">AirflowDruidLoadException</span><span class="p">(</span>
+ <span class="s2">"[Error]: Ingesting data to druid failed."</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="n">d</span><span class="p">[</span><span class="s1">'status'</span><span class="p">][</span><span class="s1">'status'</span><span class="p">]</span> <span class="o">==</span> <span class="s1">'SUCCESS'</span><span class="p">:</span>
+ <span class="k">break</span>
+ <span class="n">time</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="n">LOAD_CHECK_INTERVAL</span><span class="p">)</span></div></div>
+</pre></div>
+
+ </div>
+ </div>
+ <footer>
+
+
+ <hr/>
+
+ <div role="contentinfo">
+ <p>
+ © Copyright 2014, Maxime Beauchemin, Airbnb.
+
+ </p>
+ </div>
+ Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
+
+</footer>
+
+ </div>
+ </div>
+
+ </section>
+
+ </div>
+
+
+
+
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT:'../',
+ VERSION:'',
+ COLLAPSE_INDEX:false,
+ FILE_SUFFIX:'.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../_static/jquery.js"></script>
+ <script type="text/javascript" src="../_static/underscore.js"></script>
+ <script type="text/javascript" src="../_static/doctools.js"></script>
+
+
+
+
+
+ <script type="text/javascript" src="../_static/js/theme.js"></script>
+
+
+
+
+ <script type="text/javascript">
+ jQuery(function () {
+ SphinxRtdTheme.StickyNav.enable();
+ });
+ </script>
+
+
+</body>
+</html>
\ No newline at end of file
diff --git a/_modules/dummy_operator.html b/_modules/dummy_operator.html
new file mode 100644
index 0000000..25f7c29
--- /dev/null
+++ b/_modules/dummy_operator.html
@@ -0,0 +1,219 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+ <meta charset="utf-8">
+
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+ <title>dummy_operator — Airflow Documentation</title>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
+
+
+
+
+
+ <link rel="top" title="Airflow Documentation" href="../index.html"/>
+ <link rel="up" title="Module code" href="index.html"/>
+
+
+ <script src="../_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+ <div class="wy-grid-for-nav">
+
+
+ <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+ <div class="wy-side-scroll">
+ <div class="wy-side-nav-search">
+
+
+
+ <a href="../index.html" class="icon icon-home"> Airflow
+
+
+
+ </a>
+
+
+
+
+
+
+
+<div role="search">
+ <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
+ <input type="text" name="q" placeholder="Search docs" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+</div>
+
+
+ </div>
+
+ <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+
+
+
+ <ul>
+<li class="toctree-l1"><a class="reference internal" href="../project.html">Project</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../license.html">License</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../start.html">Quick Start</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../tutorial.html">Tutorial</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../configuration.html">Configuration</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../ui.html">UI / Screenshots</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../concepts.html">Concepts</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../profiling.html">Data Profiling</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../cli.html">Command Line Interface</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../scheduler.html">Scheduling & Triggers</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../plugins.html">Plugins</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../security.html">Security</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../faq.html">FAQ</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../code.html">API Reference</a></li>
+</ul>
+
+
+
+ </div>
+ </div>
+ </nav>
+
+ <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+
+ <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+ <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+ <a href="../index.html">Airflow</a>
+ </nav>
+
+
+
+ <div class="wy-nav-content">
+ <div class="rst-content">
+
+
+
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+ <ul class="wy-breadcrumbs">
+ <li><a href="../index.html">Docs</a> »</li>
+
+ <li><a href="index.html">Module code</a> »</li>
+
+ <li>dummy_operator</li>
+ <li class="wy-breadcrumbs-aside">
+
+
+
+ </li>
+ </ul>
+ <hr/>
+</div>
+ <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+ <div itemprop="articleBody">
+
+ <h1>Source code for dummy_operator</h1><div class="highlight"><pre>
+<span></span><span class="kn">from</span> <span class="nn">airflow.models</span> <span class="kn">import</span> <span class="n">BaseOperator</span>
+<span class="kn">from</span> <span class="nn">airflow.utils.decorators</span> <span class="kn">import</span> <span class="n">apply_defaults</span>
+
+
+<div class="viewcode-block" id="DummyOperator"><a class="viewcode-back" href="../code.html#airflow.operators.DummyOperator">[docs]</a><span class="k">class</span> <span class="nc">DummyOperator</span><span class="p">(</span><span class="n">BaseOperator</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Operator that does literally nothing. It can be used to group tasks in a</span>
+<span class="sd"> DAG.</span>
+<span class="sd"> """</span>
+
+ <span class="n">template_fields</span> <span class="o">=</span> <span class="nb">tuple</span><span class="p">()</span>
+ <span class="n">ui_color</span> <span class="o">=</span> <span class="s1">'#e8f7e4'</span>
+
+ <span class="nd">@apply_defaults</span>
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+ <span class="nb">super</span><span class="p">(</span><span class="n">DummyOperator</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">execute</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">context</span><span class="p">):</span>
+ <span class="k">pass</span></div>
+</pre></div>
+
+ </div>
+ </div>
+ <footer>
+
+
+ <hr/>
+
+ <div role="contentinfo">
+ <p>
+ © Copyright 2014, Maxime Beauchemin, Airbnb.
+
+ </p>
+ </div>
+ Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
+
+</footer>
+
+ </div>
+ </div>
+
+ </section>
+
+ </div>
+
+
+
+
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT:'../',
+ VERSION:'',
+ COLLAPSE_INDEX:false,
+ FILE_SUFFIX:'.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../_static/jquery.js"></script>
+ <script type="text/javascript" src="../_static/underscore.js"></script>
+ <script type="text/javascript" src="../_static/doctools.js"></script>
+
+
+
+
+
+ <script type="text/javascript" src="../_static/js/theme.js"></script>
+
+
+
+
+ <script type="text/javascript">
+ jQuery(function () {
+ SphinxRtdTheme.StickyNav.enable();
+ });
+ </script>
+
+
+</body>
+</html>
\ No newline at end of file
diff --git a/_modules/email_operator.html b/_modules/email_operator.html
new file mode 100644
index 0000000..7c9eeac
--- /dev/null
+++ b/_modules/email_operator.html
@@ -0,0 +1,240 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+ <meta charset="utf-8">
+
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+ <title>email_operator — Airflow Documentation</title>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
+
+
+
+
+
+ <link rel="top" title="Airflow Documentation" href="../index.html"/>
+ <link rel="up" title="Module code" href="index.html"/>
+
+
+ <script src="../_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+ <div class="wy-grid-for-nav">
+
+
+ <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+ <div class="wy-side-scroll">
+ <div class="wy-side-nav-search">
+
+
+
+ <a href="../index.html" class="icon icon-home"> Airflow
+
+
+
+ </a>
+
+
+
+
+
+
+
+<div role="search">
+ <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
+ <input type="text" name="q" placeholder="Search docs" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+</div>
+
+
+ </div>
+
+ <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+
+
+
+ <ul>
+<li class="toctree-l1"><a class="reference internal" href="../project.html">Project</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../license.html">License</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../start.html">Quick Start</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../tutorial.html">Tutorial</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../configuration.html">Configuration</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../ui.html">UI / Screenshots</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../concepts.html">Concepts</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../profiling.html">Data Profiling</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../cli.html">Command Line Interface</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../scheduler.html">Scheduling & Triggers</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../plugins.html">Plugins</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../security.html">Security</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../faq.html">FAQ</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../code.html">API Reference</a></li>
+</ul>
+
+
+
+ </div>
+ </div>
+ </nav>
+
+ <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+
+ <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+ <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+ <a href="../index.html">Airflow</a>
+ </nav>
+
+
+
+ <div class="wy-nav-content">
+ <div class="rst-content">
+
+
+
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+ <ul class="wy-breadcrumbs">
+ <li><a href="../index.html">Docs</a> »</li>
+
+ <li><a href="index.html">Module code</a> »</li>
+
+ <li>email_operator</li>
+ <li class="wy-breadcrumbs-aside">
+
+
+
+ </li>
+ </ul>
+ <hr/>
+</div>
+ <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+ <div itemprop="articleBody">
+
+ <h1>Source code for email_operator</h1><div class="highlight"><pre>
+<span></span><span class="kn">from</span> <span class="nn">airflow.models</span> <span class="kn">import</span> <span class="n">BaseOperator</span>
+<span class="kn">from</span> <span class="nn">airflow.utils.email</span> <span class="kn">import</span> <span class="n">send_email</span>
+<span class="kn">from</span> <span class="nn">airflow.utils.decorators</span> <span class="kn">import</span> <span class="n">apply_defaults</span>
+
+
+<div class="viewcode-block" id="EmailOperator"><a class="viewcode-back" href="../code.html#airflow.operators.EmailOperator">[docs]</a><span class="k">class</span> <span class="nc">EmailOperator</span><span class="p">(</span><span class="n">BaseOperator</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Sends an email.</span>
+
+<span class="sd"> :param to: list of emails to send the email to</span>
+<span class="sd"> :type to: list or string (comma or semicolon delimited)</span>
+<span class="sd"> :param subject: subject line for the email (templated)</span>
+<span class="sd"> :type subject: string</span>
+<span class="sd"> :param html_content: content of the email (templated), html markup</span>
+<span class="sd"> is allowed</span>
+<span class="sd"> :type html_content: string</span>
+<span class="sd"> :param files: file names to attach in email</span>
+<span class="sd"> :type files: list</span>
+<span class="sd"> """</span>
+
+ <span class="n">template_fields</span> <span class="o">=</span> <span class="p">(</span><span class="s1">'subject'</span><span class="p">,</span> <span class="s1">'html_content'</span><span class="p">)</span>
+ <span class="n">template_ext</span> <span class="o">=</span> <span class="p">(</span><span class="s1">'.html'</span><span class="p">,)</span>
+ <span class="n">ui_color</span> <span class="o">=</span> <span class="s1">'#e6faf9'</span>
+
+ <span class="nd">@apply_defaults</span>
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span>
+ <span class="n">to</span><span class="p">,</span>
+ <span class="n">subject</span><span class="p">,</span>
+ <span class="n">html_content</span><span class="p">,</span>
+ <span class="n">files</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+ <span class="nb">super</span><span class="p">(</span><span class="n">EmailOperator</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">to</span> <span class="o">=</span> <span class="n">to</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">subject</span> <span class="o">=</span> <span class="n">subject</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">html_content</span> <span class="o">=</span> <span class="n">html_content</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">files</span> <span class="o">=</span> <span class="n">files</span> <span class="ow">or</span> <span class="p">[]</span>
+
+ <span class="k">def</span> <span class="nf">execute</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">context</span><span class="p">):</span>
+ <span class="n">send_email</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">to</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">subject</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">html_content</span><span class="p">,</span> <span class="n">files</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><s [...]
+</pre></div>
+
+ </div>
+ </div>
+ <footer>
+
+
+ <hr/>
+
+ <div role="contentinfo">
+ <p>
+ © Copyright 2014, Maxime Beauchemin, Airbnb.
+
+ </p>
+ </div>
+ Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
+
+</footer>
+
+ </div>
+ </div>
+
+ </section>
+
+ </div>
+
+
+
+
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT:'../',
+ VERSION:'',
+ COLLAPSE_INDEX:false,
+ FILE_SUFFIX:'.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../_static/jquery.js"></script>
+ <script type="text/javascript" src="../_static/underscore.js"></script>
+ <script type="text/javascript" src="../_static/doctools.js"></script>
+
+
+
+
+
+ <script type="text/javascript" src="../_static/js/theme.js"></script>
+
+
+
+
+ <script type="text/javascript">
+ jQuery(function () {
+ SphinxRtdTheme.StickyNav.enable();
+ });
+ </script>
+
+
+</body>
+</html>
\ No newline at end of file
diff --git a/_modules/ftp_hook.html b/_modules/ftp_hook.html
new file mode 100644
index 0000000..f433866
--- /dev/null
+++ b/_modules/ftp_hook.html
@@ -0,0 +1,427 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+ <meta charset="utf-8">
+
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+ <title>ftp_hook — Airflow Documentation</title>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
+
+
+
+
+
+ <link rel="top" title="Airflow Documentation" href="../index.html"/>
+ <link rel="up" title="Module code" href="index.html"/>
+
+
+ <script src="../_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+ <div class="wy-grid-for-nav">
+
+
+ <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+ <div class="wy-side-scroll">
+ <div class="wy-side-nav-search">
+
+
+
+ <a href="../index.html" class="icon icon-home"> Airflow
+
+
+
+ </a>
+
+
+
+
+
+
+
+<div role="search">
+ <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
+ <input type="text" name="q" placeholder="Search docs" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+</div>
+
+
+ </div>
+
+ <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+
+
+
+ <ul>
+<li class="toctree-l1"><a class="reference internal" href="../project.html">Project</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../license.html">License</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../start.html">Quick Start</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../tutorial.html">Tutorial</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../configuration.html">Configuration</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../ui.html">UI / Screenshots</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../concepts.html">Concepts</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../profiling.html">Data Profiling</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../cli.html">Command Line Interface</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../scheduler.html">Scheduling & Triggers</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../plugins.html">Plugins</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../security.html">Security</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../faq.html">FAQ</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../code.html">API Reference</a></li>
+</ul>
+
+
+
+ </div>
+ </div>
+ </nav>
+
+ <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+
+ <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+ <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+ <a href="../index.html">Airflow</a>
+ </nav>
+
+
+
+ <div class="wy-nav-content">
+ <div class="rst-content">
+
+
+
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+ <ul class="wy-breadcrumbs">
+ <li><a href="../index.html">Docs</a> »</li>
+
+ <li><a href="index.html">Module code</a> »</li>
+
+ <li>ftp_hook</li>
+ <li class="wy-breadcrumbs-aside">
+
+
+
+ </li>
+ </ul>
+ <hr/>
+</div>
+ <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+ <div itemprop="articleBody">
+
+ <h1>Source code for ftp_hook</h1><div class="highlight"><pre>
+<span></span><span class="c1"># -*- coding: utf-8 -*-</span>
+<span class="c1">#</span>
+<span class="c1"># Licensed under the Apache License, Version 2.0 (the "License");</span>
+<span class="c1"># you may not use this file except in compliance with the License.</span>
+<span class="c1"># You may obtain a copy of the License at</span>
+<span class="c1">#</span>
+<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
+<span class="c1">#</span>
+<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
+<span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span>
+<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
+<span class="c1"># See the License for the specific language governing permissions and</span>
+<span class="c1"># limitations under the License.</span>
+<span class="c1">#</span>
+
+<span class="kn">import</span> <span class="nn">datetime</span>
+<span class="kn">import</span> <span class="nn">ftplib</span>
+<span class="kn">import</span> <span class="nn">logging</span>
+<span class="kn">import</span> <span class="nn">os.path</span>
+<span class="kn">from</span> <span class="nn">airflow.hooks.base_hook</span> <span class="kn">import</span> <span class="n">BaseHook</span>
+<span class="kn">from</span> <span class="nn">past.builtins</span> <span class="kn">import</span> <span class="nb">basestring</span>
+
+
+<span class="k">def</span> <span class="nf">mlsd</span><span class="p">(</span><span class="n">conn</span><span class="p">,</span> <span class="n">path</span><span class="o">=</span><span class="s2">""</span><span class="p">,</span> <span class="n">facts</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="sd">'''</span>
+<span class="sd"> BACKPORT FROM PYTHON3 FTPLIB</span>
+
+<span class="sd"> List a directory in a standardized format by using MLSD</span>
+<span class="sd"> command (RFC-3659). If path is omitted the current directory</span>
+<span class="sd"> is assumed. "facts" is a list of strings representing the type</span>
+<span class="sd"> of information desired (e.g. ["type", "size", "perm"]).</span>
+
+<span class="sd"> Return a generator object yielding a tuple of two elements</span>
+<span class="sd"> for every file found in path.</span>
+<span class="sd"> First element is the file name, the second one is a dictionary</span>
+<span class="sd"> including a variable number of "facts" depending on the server</span>
+<span class="sd"> and whether "facts" argument has been provided.</span>
+<span class="sd"> '''</span>
+ <span class="n">facts</span> <span class="o">=</span> <span class="n">facts</span> <span class="ow">or</span> <span class="p">[]</span>
+ <span class="k">if</span> <span class="n">facts</span><span class="p">:</span>
+ <span class="n">conn</span><span class="o">.</span><span class="n">sendcmd</span><span class="p">(</span><span class="s2">"OPTS MLST "</span> <span class="o">+</span> <span class="s2">";"</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">facts</span><span class="p">)</span> <span class="o">+</span> <span class="s2">";"</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">path</span><span class="p">:</span>
+ <span class="n">cmd</span> <span class="o">=</span> <span class="s2">"MLSD </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="n">path</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">cmd</span> <span class="o">=</span> <span class="s2">"MLSD"</span>
+ <span class="n">lines</span> <span class="o">=</span> <span class="p">[]</span>
+ <span class="n">conn</span><span class="o">.</span><span class="n">retrlines</span><span class="p">(</span><span class="n">cmd</span><span class="p">,</span> <span class="n">lines</span><span class="o">.</span><span class="n">append</span><span class="p">)</span>
+ <span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="n">lines</span><span class="p">:</span>
+ <span class="n">facts_found</span><span class="p">,</span> <span class="n">_</span><span class="p">,</span> <span class="n">name</span> <span class="o">=</span> <span class="n">line</span><span class="o">.</span><span class="n">rstrip</span><span class="p">(</span><span class="n">ftplib</span><span class="o">.</span><span class="n">CRLF</span><span class="p">)</span><span class="o">.</span><span class="n">partition</span><span class="p">(</span><span class="s1">' '</span> [...]
+ <span class="n">entry</span> <span class="o">=</span> <span class="p">{}</span>
+ <span class="k">for</span> <span class="n">fact</span> <span class="ow">in</span> <span class="n">facts_found</span><span class="p">[:</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s2">";"</span><span class="p">):</span>
+ <span class="n">key</span><span class="p">,</span> <span class="n">_</span><span class="p">,</span> <span class="n">value</span> <span class="o">=</span> <span class="n">fact</span><span class="o">.</span><span class="n">partition</span><span class="p">(</span><span class="s2">"="</span><span class="p">)</span>
+ <span class="n">entry</span><span class="p">[</span><span class="n">key</span><span class="o">.</span><span class="n">lower</span><span class="p">()]</span> <span class="o">=</span> <span class="n">value</span>
+ <span class="k">yield</span> <span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">entry</span><span class="p">)</span>
+
+
+<div class="viewcode-block" id="FTPHook"><a class="viewcode-back" href="../code.html#airflow.contrib.hooks.FTPHook">[docs]</a><span class="k">class</span> <span class="nc">FTPHook</span><span class="p">(</span><span class="n">BaseHook</span><span class="p">):</span>
+
+ <span class="sd">"""</span>
+<span class="sd"> Interact with FTP.</span>
+
+<span class="sd"> Errors that may occur throughout but should be handled</span>
+<span class="sd"> downstream.</span>
+<span class="sd"> """</span>
+
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">ftp_conn_id</span><span class="o">=</span><span class="s1">'ftp_default'</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">ftp_conn_id</span> <span class="o">=</span> <span class="n">ftp_conn_id</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">conn</span> <span class="o">=</span> <span class="bp">None</span>
+
+<div class="viewcode-block" id="FTPHook.get_conn"><a class="viewcode-back" href="../code.html#airflow.contrib.hooks.FTPHook.get_conn">[docs]</a> <span class="k">def</span> <span class="nf">get_conn</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Returns a FTP connection object</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">conn</span> <span class="ow">is</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="n">params</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_connection</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">ftp_conn_id</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">conn</span> <span class="o">=</span> <span class="n">ftplib</span><span class="o">.</span><span class="n">FTP</span><span class="p">(</span><span class="n">params</span><span class="o">.</span><span class="n">host</span><span class="p">,</span> <span class="n">params</span><span class="o">.</span><span class="n">login</span><span class="p">,</span> <span class="n">params</span><span class="o">.</span><span cl [...]
+
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">conn</span></div>
+
+<div class="viewcode-block" id="FTPHook.close_conn"><a class="viewcode-back" href="../code.html#airflow.contrib.hooks.FTPHook.close_conn">[docs]</a> <span class="k">def</span> <span class="nf">close_conn</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Closes the connection. An error will occur if the</span>
+<span class="sd"> connection wasnt ever opened.</span>
+<span class="sd"> """</span>
+ <span class="n">conn</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">conn</span>
+ <span class="n">conn</span><span class="o">.</span><span class="n">quit</span><span class="p">()</span></div>
+
+<div class="viewcode-block" id="FTPHook.describe_directory"><a class="viewcode-back" href="../code.html#airflow.contrib.hooks.FTPHook.describe_directory">[docs]</a> <span class="k">def</span> <span class="nf">describe_directory</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Returns a dictionary of {filename: {attributes}} for all files</span>
+<span class="sd"> on the remote system (where the MLSD command is supported).</span>
+
+<span class="sd"> :param path: full path to the remote directory</span>
+<span class="sd"> :type path: str</span>
+<span class="sd"> """</span>
+ <span class="n">conn</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span>
+ <span class="n">conn</span><span class="o">.</span><span class="n">cwd</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="c1"># only works in Python 3</span>
+ <span class="n">files</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">(</span><span class="n">conn</span><span class="o">.</span><span class="n">mlsd</span><span class="p">())</span>
+ <span class="k">except</span> <span class="ne">AttributeError</span><span class="p">:</span>
+ <span class="n">files</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">(</span><span class="n">mlsd</span><span class="p">(</span><span class="n">conn</span><span class="p">))</span>
+ <span class="k">return</span> <span class="n">files</span></div>
+
+<div class="viewcode-block" id="FTPHook.list_directory"><a class="viewcode-back" href="../code.html#airflow.contrib.hooks.FTPHook.list_directory">[docs]</a> <span class="k">def</span> <span class="nf">list_directory</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">,</span> <span class="n">nlst</span><span class="o">=</span><span class="bp">False</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Returns a list of files on the remote system.</span>
+
+<span class="sd"> :param path: full path to the remote directory to list</span>
+<span class="sd"> :type path: str</span>
+<span class="sd"> """</span>
+ <span class="n">conn</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span>
+ <span class="n">conn</span><span class="o">.</span><span class="n">cwd</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
+
+ <span class="n">files</span> <span class="o">=</span> <span class="n">conn</span><span class="o">.</span><span class="n">nlst</span><span class="p">()</span>
+ <span class="k">return</span> <span class="n">files</span></div>
+
+<div class="viewcode-block" id="FTPHook.create_directory"><a class="viewcode-back" href="../code.html#airflow.contrib.hooks.FTPHook.create_directory">[docs]</a> <span class="k">def</span> <span class="nf">create_directory</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Creates a directory on the remote system.</span>
+
+<span class="sd"> :param path: full path to the remote directory to create</span>
+<span class="sd"> :type path: str</span>
+<span class="sd"> """</span>
+ <span class="n">conn</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span>
+ <span class="n">conn</span><span class="o">.</span><span class="n">mkd</span><span class="p">(</span><span class="n">path</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="FTPHook.delete_directory"><a class="viewcode-back" href="../code.html#airflow.contrib.hooks.FTPHook.delete_directory">[docs]</a> <span class="k">def</span> <span class="nf">delete_directory</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Deletes a directory on the remote system.</span>
+
+<span class="sd"> :param path: full path to the remote directory to delete</span>
+<span class="sd"> :type path: str</span>
+<span class="sd"> """</span>
+ <span class="n">conn</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span>
+ <span class="n">conn</span><span class="o">.</span><span class="n">rmd</span><span class="p">(</span><span class="n">path</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="FTPHook.retrieve_file"><a class="viewcode-back" href="../code.html#airflow.contrib.hooks.FTPHook.retrieve_file">[docs]</a> <span class="k">def</span> <span class="nf">retrieve_file</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">remote_full_path</span><span class="p">,</span> <span class="n">local_full_path_or_buffer</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Transfers the remote file to a local location.</span>
+
+<span class="sd"> If local_full_path_or_buffer is a string path, the file will be put</span>
+<span class="sd"> at that location; if it is a file-like buffer, the file will</span>
+<span class="sd"> be written to the buffer but not closed.</span>
+
+<span class="sd"> :param remote_full_path: full path to the remote file</span>
+<span class="sd"> :type remote_full_path: str</span>
+<span class="sd"> :param local_full_path_or_buffer: full path to the local file or a</span>
+<span class="sd"> file-like buffer</span>
+<span class="sd"> :type local_full_path: str or file-like buffer</span>
+<span class="sd"> """</span>
+ <span class="n">conn</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span>
+
+ <span class="n">is_path</span> <span class="o">=</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">local_full_path_or_buffer</span><span class="p">,</span> <span class="nb">basestring</span><span class="p">)</span>
+
+ <span class="k">if</span> <span class="n">is_path</span><span class="p">:</span>
+ <span class="n">output_handle</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">local_full_path_or_buffer</span><span class="p">,</span> <span class="s1">'wb'</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">output_handle</span> <span class="o">=</span> <span class="n">local_full_path_or_buffer</span>
+
+ <span class="n">remote_path</span><span class="p">,</span> <span class="n">remote_file_name</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">remote_full_path</span><span class="p">)</span>
+ <span class="n">conn</span><span class="o">.</span><span class="n">cwd</span><span class="p">(</span><span class="n">remote_path</span><span class="p">)</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Retrieving file from FTP: {}'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">remote_full_path</span><span class="p">))</span>
+ <span class="n">conn</span><span class="o">.</span><span class="n">retrbinary</span><span class="p">(</span><span class="s1">'RETR </span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="n">remote_file_name</span><span class="p">,</span> <span class="n">output_handle</span><span class="o">.</span><span class="n">write</span><span class="p">)</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Finished etrieving file from FTP: {}'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
+ <span class="n">remote_full_path</span><span class="p">))</span>
+
+ <span class="k">if</span> <span class="n">is_path</span><span class="p">:</span>
+ <span class="n">output_handle</span><span class="o">.</span><span class="n">close</span><span class="p">()</span></div>
+
+<div class="viewcode-block" id="FTPHook.store_file"><a class="viewcode-back" href="../code.html#airflow.contrib.hooks.FTPHook.store_file">[docs]</a> <span class="k">def</span> <span class="nf">store_file</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">remote_full_path</span><span class="p">,</span> <span class="n">local_full_path_or_buffer</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Transfers a local file to the remote location.</span>
+
+<span class="sd"> If local_full_path_or_buffer is a string path, the file will be read</span>
+<span class="sd"> from that location; if it is a file-like buffer, the file will</span>
+<span class="sd"> be read from the buffer but not closed.</span>
+
+<span class="sd"> :param remote_full_path: full path to the remote file</span>
+<span class="sd"> :type remote_full_path: str</span>
+<span class="sd"> :param local_full_path_or_buffer: full path to the local file or a</span>
+<span class="sd"> file-like buffer</span>
+<span class="sd"> :type local_full_path_or_buffer: str or file-like buffer</span>
+<span class="sd"> """</span>
+ <span class="n">conn</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span>
+
+ <span class="n">is_path</span> <span class="o">=</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">local_full_path_or_buffer</span><span class="p">,</span> <span class="nb">basestring</span><span class="p">)</span>
+
+ <span class="k">if</span> <span class="n">is_path</span><span class="p">:</span>
+ <span class="n">input_handle</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">local_full_path_or_buffer</span><span class="p">,</span> <span class="s1">'rb'</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">input_handle</span> <span class="o">=</span> <span class="n">local_full_path_or_buffer</span>
+ <span class="n">remote_path</span><span class="p">,</span> <span class="n">remote_file_name</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">remote_full_path</span><span class="p">)</span>
+ <span class="n">conn</span><span class="o">.</span><span class="n">cwd</span><span class="p">(</span><span class="n">remote_path</span><span class="p">)</span>
+ <span class="n">conn</span><span class="o">.</span><span class="n">storbinary</span><span class="p">(</span><span class="s1">'STOR </span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="n">remote_file_name</span><span class="p">,</span> <span class="n">input_handle</span><span class="p">)</span>
+
+ <span class="k">if</span> <span class="n">is_path</span><span class="p">:</span>
+ <span class="n">input_handle</span><span class="o">.</span><span class="n">close</span><span class="p">()</span></div>
+
+<div class="viewcode-block" id="FTPHook.delete_file"><a class="viewcode-back" href="../code.html#airflow.contrib.hooks.FTPHook.delete_file">[docs]</a> <span class="k">def</span> <span class="nf">delete_file</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Removes a file on the FTP Server</span>
+
+<span class="sd"> :param path: full path to the remote file</span>
+<span class="sd"> :type path: str</span>
+<span class="sd"> """</span>
+ <span class="n">conn</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span>
+ <span class="n">conn</span><span class="o">.</span><span class="n">delete</span><span class="p">(</span><span class="n">path</span><span class="p">)</span></div>
+
+ <span class="k">def</span> <span class="nf">get_mod_time</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">):</span>
+ <span class="n">conn</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span>
+ <span class="n">ftp_mdtm</span> <span class="o">=</span> <span class="n">conn</span><span class="o">.</span><span class="n">sendcmd</span><span class="p">(</span><span class="s1">'MDTM '</span> <span class="o">+</span> <span class="n">path</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">datetime</span><span class="o">.</span><span class="n">datetime</span><span class="o">.</span><span class="n">strptime</span><span class="p">(</span><span class="n">ftp_mdtm</span><span class="p">[</span><span class="mi">4</span><span class="p">:],</span> <span class="s1">'%Y%m</span><span class="si">%d</span><span class="s1">%H%M%S'</span><span class="p">)</span></div>
+
+
+<span class="k">class</span> <span class="nc">FTPSHook</span><span class="p">(</span><span class="n">FTPHook</span><span class="p">):</span>
+
+ <span class="k">def</span> <span class="nf">get_conn</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
... 39563 lines suppressed ...