You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by ma...@apache.org on 2016/06/05 05:24:24 UTC
[34/34] incubator-airflow-site git commit: Initial commit
Initial commit
Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow-site/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow-site/commit/9e19165c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow-site/tree/9e19165c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow-site/diff/9e19165c
Branch: refs/heads/asf-site
Commit: 9e19165cac0531b3f00dff4281c9910620b824c6
Parents:
Author: Maxime Beauchemin <ma...@gmail.com>
Authored: Fri Jun 3 11:44:05 2016 -0700
Committer: Maxime Beauchemin <ma...@gmail.com>
Committed: Fri Jun 3 11:44:05 2016 -0700
----------------------------------------------------------------------
_images/adhoc.png | Bin 0 -> 182773 bytes
_images/airflow.gif | Bin 0 -> 622963 bytes
_images/apache.jpg | Bin 0 -> 43364 bytes
_images/branch_bad.png | Bin 0 -> 8825 bytes
_images/branch_good.png | Bin 0 -> 12035 bytes
_images/chart.png | Bin 0 -> 169382 bytes
_images/chart_form.png | Bin 0 -> 203224 bytes
_images/code.png | Bin 0 -> 218215 bytes
_images/connections.png | Bin 0 -> 93057 bytes
_images/context.png | Bin 0 -> 238360 bytes
_images/dags.png | Bin 0 -> 115068 bytes
_images/duration.png | Bin 0 -> 254497 bytes
_images/gantt.png | Bin 0 -> 213845 bytes
_images/graph.png | Bin 0 -> 333294 bytes
_images/incubator.jpg | Bin 0 -> 91227 bytes
_images/pin_large.png | Bin 0 -> 358276 bytes
_images/subdag_after.png | Bin 0 -> 30245 bytes
_images/subdag_before.png | Bin 0 -> 70382 bytes
_images/subdag_zoom.png | Bin 0 -> 150185 bytes
_images/tree.png | Bin 0 -> 163147 bytes
_images/variable_hidden.png | Bin 0 -> 154299 bytes
_modules/S3_hook.html | 604 +
.../contrib/operators/hipchat_operator.html | 330 +
_modules/airflow/executors/celery_executor.html | 298 +
_modules/airflow/executors/local_executor.html | 276 +
.../airflow/executors/sequential_executor.html | 238 +
_modules/airflow/macros.html | 255 +
_modules/airflow/macros/hive.html | 298 +
_modules/airflow/models.html | 3802 ++++++
_modules/airflow/operators/docker_operator.html | 383 +
_modules/airflow/operators/sensors.html | 721 ++
_modules/bash_operator.html | 290 +
_modules/cloudant_hook.html | 274 +
_modules/dagrun_operator.html | 260 +
_modules/dbapi_hook.html | 426 +
_modules/druid_hook.html | 369 +
_modules/dummy_operator.html | 219 +
_modules/email_operator.html | 240 +
_modules/ftp_hook.html | 427 +
_modules/gcs_hook.html | 296 +
_modules/generic_transfer.html | 264 +
_modules/hive_hooks.html | 743 ++
_modules/hive_operator.html | 272 +
_modules/hive_to_druid.html | 316 +
_modules/hive_to_mysql.html | 294 +
_modules/hive_to_samba_operator.html | 246 +
_modules/http_hook.html | 310 +
_modules/http_operator.html | 265 +
_modules/index.html | 245 +
_modules/mssql_hook.html | 228 +
_modules/mssql_operator.html | 234 +
_modules/mssql_to_hive.html | 312 +
_modules/mysql_hook.html | 267 +
_modules/mysql_operator.html | 240 +
_modules/mysql_to_hive.html | 316 +
_modules/postgres_hook.html | 236 +
_modules/postgres_operator.html | 239 +
_modules/presto_check_operator.html | 303 +
_modules/presto_hook.html | 298 +
_modules/python_operator.html | 338 +
_modules/s3_to_hive_operator.html | 353 +
_modules/sensors.html | 721 ++
_modules/slack_operator.html | 304 +
_modules/sqlite_hook.html | 222 +
_modules/ssh_execute_operator.html | 343 +
_modules/ssh_hook.html | 353 +
_modules/vertica_hook.html | 247 +
_modules/vertica_operator.html | 233 +
_modules/vertica_to_hive.html | 316 +
_modules/webhdfs_hook.html | 287 +
_sources/cli.txt | 11 +
_sources/code.txt | 243 +
_sources/concepts.txt | 758 ++
_sources/configuration.txt | 230 +
_sources/faq.txt | 100 +
_sources/index.txt | 75 +
_sources/installation.txt | 90 +
_sources/license.txt | 211 +
_sources/plugins.txt | 139 +
_sources/profiling.txt | 39 +
_sources/project.txt | 58 +
_sources/scheduler.txt | 101 +
_sources/security.txt | 249 +
_sources/start.txt | 49 +
_sources/tutorial.txt | 429 +
_sources/ui.txt | 102 +
_static/ajax-loader.gif | Bin 0 -> 673 bytes
_static/apache.jpg | Bin 0 -> 43364 bytes
_static/basic.css | 608 +
_static/comment-bright.png | Bin 0 -> 3500 bytes
_static/comment-close.png | Bin 0 -> 3578 bytes
_static/comment.png | Bin 0 -> 3445 bytes
_static/css/badge_only.css | 2 +
_static/css/theme.css | 5 +
_static/doctools.js | 287 +
_static/down-pressed.png | Bin 0 -> 347 bytes
_static/down.png | Bin 0 -> 347 bytes
_static/file.png | Bin 0 -> 358 bytes
_static/fonts/Inconsolata-Bold.ttf | Bin 0 -> 66352 bytes
_static/fonts/Inconsolata-Regular.ttf | Bin 0 -> 84548 bytes
_static/fonts/Lato-Bold.ttf | Bin 0 -> 121788 bytes
_static/fonts/Lato-Regular.ttf | Bin 0 -> 120196 bytes
_static/fonts/RobotoSlab-Bold.ttf | Bin 0 -> 170616 bytes
_static/fonts/RobotoSlab-Regular.ttf | Bin 0 -> 169064 bytes
_static/fonts/fontawesome-webfont.eot | Bin 0 -> 56006 bytes
_static/fonts/fontawesome-webfont.svg | 520 +
_static/fonts/fontawesome-webfont.ttf | Bin 0 -> 112160 bytes
_static/fonts/fontawesome-webfont.woff | Bin 0 -> 65452 bytes
_static/incubator.jpg | Bin 0 -> 91227 bytes
_static/jquery-1.11.1.js | 10308 +++++++++++++++++
_static/jquery.js | 4 +
_static/js/modernizr.min.js | 4 +
_static/js/theme.js | 153 +
_static/minus.png | Bin 0 -> 173 bytes
_static/plus.png | Bin 0 -> 173 bytes
_static/pygments.css | 65 +
_static/searchtools.js | 651 ++
_static/underscore-1.3.1.js | 999 ++
_static/underscore.js | 31 +
_static/up-pressed.png | Bin 0 -> 345 bytes
_static/up.png | Bin 0 -> 345 bytes
_static/websupport.js | 808 ++
cli.html | 1035 ++
code.html | 3517 ++++++
concepts.html | 897 ++
configuration.html | 419 +
faq.html | 293 +
genindex.html | 1258 ++
index.html | 417 +
installation.html | 358 +
license.html | 418 +
objects.inv | Bin 0 -> 2103 bytes
plugins.html | 343 +
profiling.html | 250 +
project.html | 268 +
py-modindex.html | 262 +
scheduler.html | 328 +
search.html | 214 +
searchindex.js | 1 +
security.html | 436 +
start.html | 256 +
tutorial.html | 622 +
ui.html | 296 +
143 files changed, 48568 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-airflow-site/blob/9e19165c/_images/adhoc.png
----------------------------------------------------------------------
diff --git a/_images/adhoc.png b/_images/adhoc.png
new file mode 100644
index 0000000..77ea780
Binary files /dev/null and b/_images/adhoc.png differ
http://git-wip-us.apache.org/repos/asf/incubator-airflow-site/blob/9e19165c/_images/airflow.gif
----------------------------------------------------------------------
diff --git a/_images/airflow.gif b/_images/airflow.gif
new file mode 100644
index 0000000..1889b86
Binary files /dev/null and b/_images/airflow.gif differ
http://git-wip-us.apache.org/repos/asf/incubator-airflow-site/blob/9e19165c/_images/apache.jpg
----------------------------------------------------------------------
diff --git a/_images/apache.jpg b/_images/apache.jpg
new file mode 100644
index 0000000..312251f
Binary files /dev/null and b/_images/apache.jpg differ
http://git-wip-us.apache.org/repos/asf/incubator-airflow-site/blob/9e19165c/_images/branch_bad.png
----------------------------------------------------------------------
diff --git a/_images/branch_bad.png b/_images/branch_bad.png
new file mode 100644
index 0000000..586844f
Binary files /dev/null and b/_images/branch_bad.png differ
http://git-wip-us.apache.org/repos/asf/incubator-airflow-site/blob/9e19165c/_images/branch_good.png
----------------------------------------------------------------------
diff --git a/_images/branch_good.png b/_images/branch_good.png
new file mode 100644
index 0000000..fbd4650
Binary files /dev/null and b/_images/branch_good.png differ
http://git-wip-us.apache.org/repos/asf/incubator-airflow-site/blob/9e19165c/_images/chart.png
----------------------------------------------------------------------
diff --git a/_images/chart.png b/_images/chart.png
new file mode 100644
index 0000000..bfca26b
Binary files /dev/null and b/_images/chart.png differ
http://git-wip-us.apache.org/repos/asf/incubator-airflow-site/blob/9e19165c/_images/chart_form.png
----------------------------------------------------------------------
diff --git a/_images/chart_form.png b/_images/chart_form.png
new file mode 100644
index 0000000..f73daf5
Binary files /dev/null and b/_images/chart_form.png differ
http://git-wip-us.apache.org/repos/asf/incubator-airflow-site/blob/9e19165c/_images/code.png
----------------------------------------------------------------------
diff --git a/_images/code.png b/_images/code.png
new file mode 100644
index 0000000..ac49291
Binary files /dev/null and b/_images/code.png differ
http://git-wip-us.apache.org/repos/asf/incubator-airflow-site/blob/9e19165c/_images/connections.png
----------------------------------------------------------------------
diff --git a/_images/connections.png b/_images/connections.png
new file mode 100644
index 0000000..d07a130
Binary files /dev/null and b/_images/connections.png differ
http://git-wip-us.apache.org/repos/asf/incubator-airflow-site/blob/9e19165c/_images/context.png
----------------------------------------------------------------------
diff --git a/_images/context.png b/_images/context.png
new file mode 100644
index 0000000..de75e48
Binary files /dev/null and b/_images/context.png differ
http://git-wip-us.apache.org/repos/asf/incubator-airflow-site/blob/9e19165c/_images/dags.png
----------------------------------------------------------------------
diff --git a/_images/dags.png b/_images/dags.png
new file mode 100644
index 0000000..a551f02
Binary files /dev/null and b/_images/dags.png differ
http://git-wip-us.apache.org/repos/asf/incubator-airflow-site/blob/9e19165c/_images/duration.png
----------------------------------------------------------------------
diff --git a/_images/duration.png b/_images/duration.png
new file mode 100644
index 0000000..18d723c
Binary files /dev/null and b/_images/duration.png differ
http://git-wip-us.apache.org/repos/asf/incubator-airflow-site/blob/9e19165c/_images/gantt.png
----------------------------------------------------------------------
diff --git a/_images/gantt.png b/_images/gantt.png
new file mode 100644
index 0000000..c462adb
Binary files /dev/null and b/_images/gantt.png differ
http://git-wip-us.apache.org/repos/asf/incubator-airflow-site/blob/9e19165c/_images/graph.png
----------------------------------------------------------------------
diff --git a/_images/graph.png b/_images/graph.png
new file mode 100644
index 0000000..cbc58e6
Binary files /dev/null and b/_images/graph.png differ
http://git-wip-us.apache.org/repos/asf/incubator-airflow-site/blob/9e19165c/_images/incubator.jpg
----------------------------------------------------------------------
diff --git a/_images/incubator.jpg b/_images/incubator.jpg
new file mode 100644
index 0000000..6f34a85
Binary files /dev/null and b/_images/incubator.jpg differ
http://git-wip-us.apache.org/repos/asf/incubator-airflow-site/blob/9e19165c/_images/pin_large.png
----------------------------------------------------------------------
diff --git a/_images/pin_large.png b/_images/pin_large.png
new file mode 100644
index 0000000..986c88b
Binary files /dev/null and b/_images/pin_large.png differ
http://git-wip-us.apache.org/repos/asf/incubator-airflow-site/blob/9e19165c/_images/subdag_after.png
----------------------------------------------------------------------
diff --git a/_images/subdag_after.png b/_images/subdag_after.png
new file mode 100644
index 0000000..166a6de
Binary files /dev/null and b/_images/subdag_after.png differ
http://git-wip-us.apache.org/repos/asf/incubator-airflow-site/blob/9e19165c/_images/subdag_before.png
----------------------------------------------------------------------
diff --git a/_images/subdag_before.png b/_images/subdag_before.png
new file mode 100644
index 0000000..ebc3e58
Binary files /dev/null and b/_images/subdag_before.png differ
http://git-wip-us.apache.org/repos/asf/incubator-airflow-site/blob/9e19165c/_images/subdag_zoom.png
----------------------------------------------------------------------
diff --git a/_images/subdag_zoom.png b/_images/subdag_zoom.png
new file mode 100644
index 0000000..08fcf5c
Binary files /dev/null and b/_images/subdag_zoom.png differ
http://git-wip-us.apache.org/repos/asf/incubator-airflow-site/blob/9e19165c/_images/tree.png
----------------------------------------------------------------------
diff --git a/_images/tree.png b/_images/tree.png
new file mode 100644
index 0000000..f3796b0
Binary files /dev/null and b/_images/tree.png differ
http://git-wip-us.apache.org/repos/asf/incubator-airflow-site/blob/9e19165c/_images/variable_hidden.png
----------------------------------------------------------------------
diff --git a/_images/variable_hidden.png b/_images/variable_hidden.png
new file mode 100644
index 0000000..e081ca3
Binary files /dev/null and b/_images/variable_hidden.png differ
http://git-wip-us.apache.org/repos/asf/incubator-airflow-site/blob/9e19165c/_modules/S3_hook.html
----------------------------------------------------------------------
diff --git a/_modules/S3_hook.html b/_modules/S3_hook.html
new file mode 100644
index 0000000..e18ec5f
--- /dev/null
+++ b/_modules/S3_hook.html
@@ -0,0 +1,604 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+ <meta charset="utf-8">
+
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+ <title>S3_hook — Airflow Documentation</title>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
+
+
+
+
+
+ <link rel="top" title="Airflow Documentation" href="../index.html"/>
+ <link rel="up" title="Module code" href="index.html"/>
+
+
+ <script src="../_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+ <div class="wy-grid-for-nav">
+
+
+ <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+ <div class="wy-side-scroll">
+ <div class="wy-side-nav-search">
+
+
+
+ <a href="../index.html" class="icon icon-home"> Airflow
+
+
+
+ </a>
+
+
+
+
+
+
+
+<div role="search">
+ <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
+ <input type="text" name="q" placeholder="Search docs" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+</div>
+
+
+ </div>
+
+ <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+
+
+
+ <ul>
+<li class="toctree-l1"><a class="reference internal" href="../project.html">Project</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../license.html">License</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../start.html">Quick Start</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../tutorial.html">Tutorial</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../configuration.html">Configuration</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../ui.html">UI / Screenshots</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../concepts.html">Concepts</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../profiling.html">Data Profiling</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../cli.html">Command Line Interface</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../scheduler.html">Scheduling & Triggers</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../plugins.html">Plugins</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../security.html">Security</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../faq.html">FAQ</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../code.html">API Reference</a></li>
+</ul>
+
+
+
+ </div>
+ </div>
+ </nav>
+
+ <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+
+ <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+ <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+ <a href="../index.html">Airflow</a>
+ </nav>
+
+
+
+ <div class="wy-nav-content">
+ <div class="rst-content">
+
+
+
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+ <ul class="wy-breadcrumbs">
+ <li><a href="../index.html">Docs</a> »</li>
+
+ <li><a href="index.html">Module code</a> »</li>
+
+ <li>S3_hook</li>
+ <li class="wy-breadcrumbs-aside">
+
+
+
+ </li>
+ </ul>
+ <hr/>
+</div>
+ <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+ <div itemprop="articleBody">
+
+ <h1>Source code for S3_hook</h1><div class="highlight"><pre>
+<span></span><span class="c1"># -*- coding: utf-8 -*-</span>
+<span class="c1">#</span>
+<span class="c1"># Licensed under the Apache License, Version 2.0 (the "License");</span>
+<span class="c1"># you may not use this file except in compliance with the License.</span>
+<span class="c1"># You may obtain a copy of the License at</span>
+<span class="c1">#</span>
+<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
+<span class="c1">#</span>
+<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
+<span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span>
+<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
+<span class="c1"># See the License for the specific language governing permissions and</span>
+<span class="c1"># limitations under the License.</span>
+
+<span class="kn">from</span> <span class="nn">future</span> <span class="kn">import</span> <span class="n">standard_library</span>
+<span class="n">standard_library</span><span class="o">.</span><span class="n">install_aliases</span><span class="p">()</span>
+<span class="kn">import</span> <span class="nn">logging</span>
+<span class="kn">import</span> <span class="nn">re</span>
+<span class="kn">import</span> <span class="nn">fnmatch</span>
+<span class="kn">import</span> <span class="nn">configparser</span>
+<span class="kn">import</span> <span class="nn">math</span>
+<span class="kn">import</span> <span class="nn">os</span>
+<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="kn">import</span> <span class="n">urlparse</span>
+<span class="kn">import</span> <span class="nn">warnings</span>
+
+<span class="kn">import</span> <span class="nn">boto</span>
+<span class="kn">from</span> <span class="nn">boto.s3.connection</span> <span class="kn">import</span> <span class="n">S3Connection</span>
+<span class="kn">from</span> <span class="nn">boto.sts</span> <span class="kn">import</span> <span class="n">STSConnection</span>
+<span class="n">boto</span><span class="o">.</span><span class="n">set_stream_logger</span><span class="p">(</span><span class="s1">'boto'</span><span class="p">)</span>
+<span class="n">logging</span><span class="o">.</span><span class="n">getLogger</span><span class="p">(</span><span class="s2">"boto"</span><span class="p">)</span><span class="o">.</span><span class="n">setLevel</span><span class="p">(</span><span class="n">logging</span><span class="o">.</span><span class="n">INFO</span><span class="p">)</span>
+
+<span class="kn">from</span> <span class="nn">airflow.exceptions</span> <span class="kn">import</span> <span class="n">AirflowException</span>
+<span class="kn">from</span> <span class="nn">airflow.hooks.base_hook</span> <span class="kn">import</span> <span class="n">BaseHook</span>
+
+
+<span class="k">def</span> <span class="nf">_parse_s3_config</span><span class="p">(</span><span class="n">config_file_name</span><span class="p">,</span> <span class="n">config_format</span><span class="o">=</span><span class="s1">'boto'</span><span class="p">,</span> <span class="n">profile</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Parses a config file for s3 credentials. Can currently</span>
+<span class="sd"> parse boto, s3cmd.conf and AWS SDK config formats</span>
+
+<span class="sd"> :param config_file_name: path to the config file</span>
+<span class="sd"> :type config_file_name: str</span>
+<span class="sd"> :param config_format: config type. One of "boto", "s3cmd" or "aws".</span>
+<span class="sd"> Defaults to "boto"</span>
+<span class="sd"> :type config_format: str</span>
+<span class="sd"> :param profile: profile name in AWS type config file</span>
+<span class="sd"> :type profile: str</span>
+<span class="sd"> """</span>
+ <span class="n">Config</span> <span class="o">=</span> <span class="n">configparser</span><span class="o">.</span><span class="n">ConfigParser</span><span class="p">()</span>
+ <span class="k">if</span> <span class="n">Config</span><span class="o">.</span><span class="n">read</span><span class="p">(</span><span class="n">config_file_name</span><span class="p">):</span> <span class="c1"># pragma: no cover</span>
+ <span class="n">sections</span> <span class="o">=</span> <span class="n">Config</span><span class="o">.</span><span class="n">sections</span><span class="p">()</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span><span class="s2">"Couldn't read {0}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">config_file_name</span><span class="p">))</span>
+ <span class="c1"># Setting option names depending on file format</span>
+ <span class="k">if</span> <span class="n">config_format</span> <span class="ow">is</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="n">config_format</span> <span class="o">=</span> <span class="s1">'boto'</span>
+ <span class="n">conf_format</span> <span class="o">=</span> <span class="n">config_format</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span>
+ <span class="k">if</span> <span class="n">conf_format</span> <span class="o">==</span> <span class="s1">'boto'</span><span class="p">:</span> <span class="c1"># pragma: no cover</span>
+ <span class="k">if</span> <span class="n">profile</span> <span class="ow">is</span> <span class="ow">not</span> <span class="bp">None</span> <span class="ow">and</span> <span class="s1">'profile '</span> <span class="o">+</span> <span class="n">profile</span> <span class="ow">in</span> <span class="n">sections</span><span class="p">:</span>
+ <span class="n">cred_section</span> <span class="o">=</span> <span class="s1">'profile '</span> <span class="o">+</span> <span class="n">profile</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">cred_section</span> <span class="o">=</span> <span class="s1">'Credentials'</span>
+ <span class="k">elif</span> <span class="n">conf_format</span> <span class="o">==</span> <span class="s1">'aws'</span> <span class="ow">and</span> <span class="n">profile</span> <span class="ow">is</span> <span class="ow">not</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="n">cred_section</span> <span class="o">=</span> <span class="n">profile</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">cred_section</span> <span class="o">=</span> <span class="s1">'default'</span>
+ <span class="c1"># Option names</span>
+ <span class="k">if</span> <span class="n">conf_format</span> <span class="ow">in</span> <span class="p">(</span><span class="s1">'boto'</span><span class="p">,</span> <span class="s1">'aws'</span><span class="p">):</span> <span class="c1"># pragma: no cover</span>
+ <span class="n">key_id_option</span> <span class="o">=</span> <span class="s1">'aws_access_key_id'</span>
+ <span class="n">secret_key_option</span> <span class="o">=</span> <span class="s1">'aws_secret_access_key'</span>
+ <span class="c1"># security_token_option = 'aws_security_token'</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">key_id_option</span> <span class="o">=</span> <span class="s1">'access_key'</span>
+ <span class="n">secret_key_option</span> <span class="o">=</span> <span class="s1">'secret_key'</span>
+ <span class="c1"># Actual Parsing</span>
+ <span class="k">if</span> <span class="n">cred_section</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">sections</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span><span class="s2">"This config file format is not recognized"</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="n">access_key</span> <span class="o">=</span> <span class="n">Config</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">cred_section</span><span class="p">,</span> <span class="n">key_id_option</span><span class="p">)</span>
+ <span class="n">secret_key</span> <span class="o">=</span> <span class="n">Config</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">cred_section</span><span class="p">,</span> <span class="n">secret_key_option</span><span class="p">)</span>
+ <span class="n">calling_format</span> <span class="o">=</span> <span class="bp">None</span>
+ <span class="k">if</span> <span class="n">Config</span><span class="o">.</span><span class="n">has_option</span><span class="p">(</span><span class="n">cred_section</span><span class="p">,</span> <span class="s1">'calling_format'</span><span class="p">):</span>
+ <span class="n">calling_format</span> <span class="o">=</span> <span class="n">Config</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">cred_section</span><span class="p">,</span> <span class="s1">'calling_format'</span><span class="p">)</span>
+ <span class="k">except</span><span class="p">:</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s2">"Option Error in parsing s3 config file"</span><span class="p">)</span>
+ <span class="k">raise</span>
+ <span class="k">return</span> <span class="p">(</span><span class="n">access_key</span><span class="p">,</span> <span class="n">secret_key</span><span class="p">,</span> <span class="n">calling_format</span><span class="p">)</span>
+
+
+<div class="viewcode-block" id="S3Hook"><a class="viewcode-back" href="../code.html#airflow.hooks.S3Hook">[docs]</a><span class="k">class</span> <span class="nc">S3Hook</span><span class="p">(</span><span class="n">BaseHook</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Interact with S3. This class is a wrapper around the boto library.</span>
+<span class="sd"> """</span>
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span>
+ <span class="n">s3_conn_id</span><span class="o">=</span><span class="s1">'s3_default'</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">s3_conn_id</span> <span class="o">=</span> <span class="n">s3_conn_id</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">s3_conn</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_connection</span><span class="p">(</span><span class="n">s3_conn_id</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">extra_params</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">s3_conn</span><span class="o">.</span><span class="n">extra_dejson</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">profile</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">extra_params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'profile'</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">calling_format</span> <span class="o">=</span> <span class="bp">None</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_creds_in_conn</span> <span class="o">=</span> <span class="s1">'aws_secret_access_key'</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">extra_params</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_creds_in_config_file</span> <span class="o">=</span> <span class="s1">'s3_config_file'</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">extra_params</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_default_to_boto</span> <span class="o">=</span> <span class="bp">False</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_creds_in_conn</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_a_key</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">extra_params</span><span class="p">[</span><span class="s1">'aws_access_key_id'</span><span class="p">]</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_s_key</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">extra_params</span><span class="p">[</span><span class="s1">'aws_secret_access_key'</span><span class="p">]</span>
+ <span class="k">if</span> <span class="s1">'calling_format'</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">extra_params</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">calling_format</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">extra_params</span><span class="p">[</span><span class="s1">'calling_format'</span><span class="p">]</span>
+ <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">_creds_in_config_file</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">s3_config_file</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">extra_params</span><span class="p">[</span><span class="s1">'s3_config_file'</span><span class="p">]</span>
+ <span class="c1"># The format can be None and will default to boto in the parser</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">s3_config_format</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">extra_params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'s3_config_format'</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_default_to_boto</span> <span class="o">=</span> <span class="bp">True</span>
+ <span class="c1"># STS support for cross account resource access</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">_sts_conn_required</span> <span class="o">=</span> <span class="p">(</span><span class="s1">'aws_account_id'</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">extra_params</span> <span class="ow">or</span>
+ <span class="s1">'role_arn'</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">extra_params</span><span class="p">)</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_sts_conn_required</span><span class="p">:</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">role_arn</span> <span class="o">=</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">extra_params</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'role_arn'</span><span class="p">)</span> <span class="ow">or</span>
+ <span class="s2">"arn:aws:iam::"</span> <span class="o">+</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">extra_params</span><span class="p">[</span><span class="s1">'aws_account_id'</span><span class="p">]</span> <span class="o">+</span>
+ <span class="s2">":role/"</span> <span class="o">+</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">extra_params</span><span class="p">[</span><span class="s1">'aws_iam_role'</span><span class="p">])</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">connection</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span>
+
+ <span class="k">def</span> <span class="nf">__getstate__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="n">pickled_dict</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">__dict__</span><span class="p">)</span>
+ <span class="k">del</span> <span class="n">pickled_dict</span><span class="p">[</span><span class="s1">'connection'</span><span class="p">]</span>
+ <span class="k">return</span> <span class="n">pickled_dict</span>
+
+ <span class="k">def</span> <span class="nf">__setstate__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">d</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">__dict__</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">d</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">__dict__</span><span class="p">[</span><span class="s1">'connection'</span><span class="p">]</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span>
+
+ <span class="k">def</span> <span class="nf">_parse_s3_url</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">s3url</span><span class="p">):</span>
+ <span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span>
+ <span class="s1">'Please note: S3Hook._parse_s3_url() is now '</span>
+ <span class="s1">'S3Hook.parse_s3_url() (no leading underscore).'</span><span class="p">,</span>
+ <span class="ne">DeprecationWarning</span><span class="p">)</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_s3_url</span><span class="p">(</span><span class="n">s3url</span><span class="p">)</span>
+
+ <span class="nd">@staticmethod</span>
+ <span class="k">def</span> <span class="nf">parse_s3_url</span><span class="p">(</span><span class="n">s3url</span><span class="p">):</span>
+ <span class="n">parsed_url</span> <span class="o">=</span> <span class="n">urlparse</span><span class="p">(</span><span class="n">s3url</span><span class="p">)</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">netloc</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span><span class="s1">'Please provide a bucket_name'</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">bucket_name</span> <span class="o">=</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">netloc</span>
+ <span class="n">key</span> <span class="o">=</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">strip</span><span class="p">(</span><span class="s1">'/'</span><span class="p">)</span>
+ <span class="k">return</span> <span class="p">(</span><span class="n">bucket_name</span><span class="p">,</span> <span class="n">key</span><span class="p">)</span>
+
+<div class="viewcode-block" id="S3Hook.get_conn"><a class="viewcode-back" href="../code.html#airflow.hooks.S3Hook.get_conn">[docs]</a> <span class="k">def</span> <span class="nf">get_conn</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Returns the boto S3Connection object.</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_default_to_boto</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">S3Connection</span><span class="p">(</span><span class="n">profile_name</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">profile</span><span class="p">)</span>
+ <span class="n">a_key</span> <span class="o">=</span> <span class="n">s_key</span> <span class="o">=</span> <span class="bp">None</span>
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_creds_in_config_file</span><span class="p">:</span>
+ <span class="n">a_key</span><span class="p">,</span> <span class="n">s_key</span><span class="p">,</span> <span class="n">calling_format</span> <span class="o">=</span> <span class="n">_parse_s3_config</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">s3_config_file</span><span class="p">,</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">s3_config_format</span><span class="p">,</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">profile</span><span class="p">)</span>
+ <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">_creds_in_conn</span><span class="p">:</span>
+ <span class="n">a_key</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_a_key</span>
+ <span class="n">s_key</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_s_key</span>
+ <span class="n">calling_format</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">calling_format</span>
+
+ <span class="k">if</span> <span class="n">calling_format</span> <span class="ow">is</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="n">calling_format</span> <span class="o">=</span> <span class="s1">'boto.s3.connection.SubdomainCallingFormat'</span>
+
+ <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_sts_conn_required</span><span class="p">:</span>
+ <span class="n">sts_connection</span> <span class="o">=</span> <span class="n">STSConnection</span><span class="p">(</span><span class="n">aws_access_key_id</span><span class="o">=</span><span class="n">a_key</span><span class="p">,</span>
+ <span class="n">aws_secret_access_key</span><span class="o">=</span><span class="n">s_key</span><span class="p">,</span>
+ <span class="n">profile_name</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">profile</span><span class="p">)</span>
+ <span class="n">assumed_role_object</span> <span class="o">=</span> <span class="n">sts_connection</span><span class="o">.</span><span class="n">assume_role</span><span class="p">(</span>
+ <span class="n">role_arn</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">role_arn</span><span class="p">,</span>
+ <span class="n">role_session_name</span><span class="o">=</span><span class="s2">"Airflow_"</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">s3_conn_id</span>
+ <span class="p">)</span>
+ <span class="n">creds</span> <span class="o">=</span> <span class="n">assumed_role_object</span><span class="o">.</span><span class="n">credentials</span>
+ <span class="n">connection</span> <span class="o">=</span> <span class="n">S3Connection</span><span class="p">(</span>
+ <span class="n">aws_access_key_id</span><span class="o">=</span><span class="n">creds</span><span class="o">.</span><span class="n">access_key</span><span class="p">,</span>
+ <span class="n">aws_secret_access_key</span><span class="o">=</span><span class="n">creds</span><span class="o">.</span><span class="n">secret_key</span><span class="p">,</span>
+ <span class="n">calling_format</span><span class="o">=</span><span class="n">calling_format</span><span class="p">,</span>
+ <span class="n">security_token</span><span class="o">=</span><span class="n">creds</span><span class="o">.</span><span class="n">session_token</span>
+ <span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">connection</span> <span class="o">=</span> <span class="n">S3Connection</span><span class="p">(</span><span class="n">aws_access_key_id</span><span class="o">=</span><span class="n">a_key</span><span class="p">,</span>
+ <span class="n">aws_secret_access_key</span><span class="o">=</span><span class="n">s_key</span><span class="p">,</span>
+ <span class="n">calling_format</span><span class="o">=</span><span class="n">calling_format</span><span class="p">,</span>
+ <span class="n">profile_name</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">profile</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">connection</span></div>
+
+<div class="viewcode-block" id="S3Hook.check_for_bucket"><a class="viewcode-back" href="../code.html#airflow.hooks.S3Hook.check_for_bucket">[docs]</a> <span class="k">def</span> <span class="nf">check_for_bucket</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">bucket_name</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Check if bucket_name exists.</span>
+
+<span class="sd"> :param bucket_name: the name of the bucket</span>
+<span class="sd"> :type bucket_name: str</span>
+<span class="sd"> """</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">connection</span><span class="o">.</span><span class="n">lookup</span><span class="p">(</span><span class="n">bucket_name</span><span class="p">)</span> <span class="ow">is</span> <span class="ow">not</span> <span class="bp">None</span></div>
+
+<div class="viewcode-block" id="S3Hook.get_bucket"><a class="viewcode-back" href="../code.html#airflow.hooks.S3Hook.get_bucket">[docs]</a> <span class="k">def</span> <span class="nf">get_bucket</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">bucket_name</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Returns a boto.s3.bucket.Bucket object</span>
+
+<span class="sd"> :param bucket_name: the name of the bucket</span>
+<span class="sd"> :type bucket_name: str</span>
+<span class="sd"> """</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">connection</span><span class="o">.</span><span class="n">get_bucket</span><span class="p">(</span><span class="n">bucket_name</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="S3Hook.list_keys"><a class="viewcode-back" href="../code.html#airflow.hooks.S3Hook.list_keys">[docs]</a> <span class="k">def</span> <span class="nf">list_keys</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">bucket_name</span><span class="p">,</span> <span class="n">prefix</span><span class="o">=</span><span class="s1">''</span><span class="p">,</span> <span class="n">delimiter</span><span class="o">=</span><span class="s1">''</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Lists keys in a bucket under prefix and not containing delimiter</span>
+
+<span class="sd"> :param bucket_name: the name of the bucket</span>
+<span class="sd"> :type bucket_name: str</span>
+<span class="sd"> :param prefix: a key prefix</span>
+<span class="sd"> :type prefix: str</span>
+<span class="sd"> :param delimiter: the delimiter marks key hierarchy.</span>
+<span class="sd"> :type delimiter: str</span>
+<span class="sd"> """</span>
+ <span class="n">b</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_bucket</span><span class="p">(</span><span class="n">bucket_name</span><span class="p">)</span>
+ <span class="n">keylist</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">b</span><span class="o">.</span><span class="n">list</span><span class="p">(</span><span class="n">prefix</span><span class="o">=</span><span class="n">prefix</span><span class="p">,</span> <span class="n">delimiter</span><span class="o">=</span><span class="n">delimiter</span><span class="p">))</span>
+ <span class="k">return</span> <span class="p">[</span><span class="n">k</span><span class="o">.</span><span class="n">name</span> <span class="k">for</span> <span class="n">k</span> <span class="ow">in</span> <span class="n">keylist</span><span class="p">]</span> <span class="k">if</span> <span class="n">keylist</span> <span class="o">!=</span> <span class="p">[]</span> <span class="k">else</span> <span class="bp">None</span></div>
+
+<div class="viewcode-block" id="S3Hook.list_prefixes"><a class="viewcode-back" href="../code.html#airflow.hooks.S3Hook.list_prefixes">[docs]</a> <span class="k">def</span> <span class="nf">list_prefixes</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">bucket_name</span><span class="p">,</span> <span class="n">prefix</span><span class="o">=</span><span class="s1">''</span><span class="p">,</span> <span class="n">delimiter</span><span class="o">=</span><span class="s1">''</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Lists prefixes in a bucket under prefix</span>
+
+<span class="sd"> :param bucket_name: the name of the bucket</span>
+<span class="sd"> :type bucket_name: str</span>
+<span class="sd"> :param prefix: a key prefix</span>
+<span class="sd"> :type prefix: str</span>
+<span class="sd"> :param delimiter: the delimiter marks key hierarchy.</span>
+<span class="sd"> :type delimiter: str</span>
+<span class="sd"> """</span>
+ <span class="n">b</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_bucket</span><span class="p">(</span><span class="n">bucket_name</span><span class="p">)</span>
+ <span class="n">plist</span> <span class="o">=</span> <span class="n">b</span><span class="o">.</span><span class="n">list</span><span class="p">(</span><span class="n">prefix</span><span class="o">=</span><span class="n">prefix</span><span class="p">,</span> <span class="n">delimiter</span><span class="o">=</span><span class="n">delimiter</span><span class="p">)</span>
+ <span class="n">prefix_names</span> <span class="o">=</span> <span class="p">[</span><span class="n">p</span><span class="o">.</span><span class="n">name</span> <span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">plist</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">p</span><span class="p">,</span> <span class="n">boto</span><span class="o">.</span><span class="n">s3</span><span class="o">.</span><span class="n">prefix</span><span class="o">.</span><span class="n">Prefix</span><span class="p">)]</span>
+ <span class="k">return</span> <span class="n">prefix_names</span> <span class="k">if</span> <span class="n">prefix_names</span> <span class="o">!=</span> <span class="p">[]</span> <span class="k">else</span> <span class="bp">None</span></div>
+
+<div class="viewcode-block" id="S3Hook.check_for_key"><a class="viewcode-back" href="../code.html#airflow.hooks.S3Hook.check_for_key">[docs]</a> <span class="k">def</span> <span class="nf">check_for_key</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">,</span> <span class="n">bucket_name</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Checks that a key exists in a bucket</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">bucket_name</span><span class="p">:</span>
+ <span class="p">(</span><span class="n">bucket_name</span><span class="p">,</span> <span class="n">key</span><span class="p">)</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_s3_url</span><span class="p">(</span><span class="n">key</span><span class="p">)</span>
+ <span class="n">bucket</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_bucket</span><span class="p">(</span><span class="n">bucket_name</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">bucket</span><span class="o">.</span><span class="n">get_key</span><span class="p">(</span><span class="n">key</span><span class="p">)</span> <span class="ow">is</span> <span class="ow">not</span> <span class="bp">None</span></div>
+
+<div class="viewcode-block" id="S3Hook.get_key"><a class="viewcode-back" href="../code.html#airflow.hooks.S3Hook.get_key">[docs]</a> <span class="k">def</span> <span class="nf">get_key</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">,</span> <span class="n">bucket_name</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Returns a boto.s3.key.Key object</span>
+
+<span class="sd"> :param key: the path to the key</span>
+<span class="sd"> :type key: str</span>
+<span class="sd"> :param bucket_name: the name of the bucket</span>
+<span class="sd"> :type bucket_name: str</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">bucket_name</span><span class="p">:</span>
+ <span class="p">(</span><span class="n">bucket_name</span><span class="p">,</span> <span class="n">key</span><span class="p">)</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_s3_url</span><span class="p">(</span><span class="n">key</span><span class="p">)</span>
+ <span class="n">bucket</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_bucket</span><span class="p">(</span><span class="n">bucket_name</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">bucket</span><span class="o">.</span><span class="n">get_key</span><span class="p">(</span><span class="n">key</span><span class="p">)</span></div>
+
+<div class="viewcode-block" id="S3Hook.check_for_wildcard_key"><a class="viewcode-back" href="../code.html#airflow.hooks.S3Hook.check_for_wildcard_key">[docs]</a> <span class="k">def</span> <span class="nf">check_for_wildcard_key</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
+ <span class="n">wildcard_key</span><span class="p">,</span> <span class="n">bucket_name</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="n">delimiter</span><span class="o">=</span><span class="s1">''</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Checks that a key matching a wildcard expression exists in a bucket</span>
+<span class="sd"> """</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_wildcard_key</span><span class="p">(</span><span class="n">wildcard_key</span><span class="o">=</span><span class="n">wildcard_key</span><span class="p">,</span>
+ <span class="n">bucket_name</span><span class="o">=</span><span class="n">bucket_name</span><span class="p">,</span>
+ <span class="n">delimiter</span><span class="o">=</span><span class="n">delimiter</span><span class="p">)</span> <span class="ow">is</span> <span class="ow">not</span> <span class="bp">None</span></div>
+
+<div class="viewcode-block" id="S3Hook.get_wildcard_key"><a class="viewcode-back" href="../code.html#airflow.hooks.S3Hook.get_wildcard_key">[docs]</a> <span class="k">def</span> <span class="nf">get_wildcard_key</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">wildcard_key</span><span class="p">,</span> <span class="n">bucket_name</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="n">delimiter</span><span class="o">=</span><span class="s1">''</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Returns a boto.s3.key.Key object matching the regular expression</span>
+
+<span class="sd"> :param regex_key: the path to the key</span>
+<span class="sd"> :type regex_key: str</span>
+<span class="sd"> :param bucket_name: the name of the bucket</span>
+<span class="sd"> :type bucket_name: str</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">bucket_name</span><span class="p">:</span>
+ <span class="p">(</span><span class="n">bucket_name</span><span class="p">,</span> <span class="n">wildcard_key</span><span class="p">)</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_s3_url</span><span class="p">(</span><span class="n">wildcard_key</span><span class="p">)</span>
+ <span class="n">bucket</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_bucket</span><span class="p">(</span><span class="n">bucket_name</span><span class="p">)</span>
+ <span class="n">prefix</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">r'[*]'</span><span class="p">,</span> <span class="n">wildcard_key</span><span class="p">,</span> <span class="mi">1</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
+ <span class="n">klist</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">list_keys</span><span class="p">(</span><span class="n">bucket_name</span><span class="p">,</span> <span class="n">prefix</span><span class="o">=</span><span class="n">prefix</span><span class="p">,</span> <span class="n">delimiter</span><span class="o">=</span><span class="n">delimiter</span><span class="p">)</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">klist</span><span class="p">:</span>
+ <span class="k">return</span> <span class="bp">None</span>
+ <span class="n">key_matches</span> <span class="o">=</span> <span class="p">[</span><span class="n">k</span> <span class="k">for</span> <span class="n">k</span> <span class="ow">in</span> <span class="n">klist</span> <span class="k">if</span> <span class="n">fnmatch</span><span class="o">.</span><span class="n">fnmatch</span><span class="p">(</span><span class="n">k</span><span class="p">,</span> <span class="n">wildcard_key</span><span class="p">)]</span>
+ <span class="k">return</span> <span class="n">bucket</span><span class="o">.</span><span class="n">get_key</span><span class="p">(</span><span class="n">key_matches</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span> <span class="k">if</span> <span class="n">key_matches</span> <span class="k">else</span> <span class="bp">None</span></div>
+
+<div class="viewcode-block" id="S3Hook.check_for_prefix"><a class="viewcode-back" href="../code.html#airflow.hooks.S3Hook.check_for_prefix">[docs]</a> <span class="k">def</span> <span class="nf">check_for_prefix</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">bucket_name</span><span class="p">,</span> <span class="n">prefix</span><span class="p">,</span> <span class="n">delimiter</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Checks that a prefix exists in a bucket</span>
+<span class="sd"> """</span>
+ <span class="n">prefix</span> <span class="o">=</span> <span class="n">prefix</span> <span class="o">+</span> <span class="n">delimiter</span> <span class="k">if</span> <span class="n">prefix</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="o">!=</span> <span class="n">delimiter</span> <span class="k">else</span> <span class="n">prefix</span>
+ <span class="n">prefix_split</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">r'(\w+[{d}])$'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">d</span><span class="o">=</span><span class="n">delimiter</span><span class="p">),</span> <span class="n">prefix</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
+ <span class="n">previous_level</span> <span class="o">=</span> <span class="n">prefix_split</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
+ <span class="n">plist</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">list_prefixes</span><span class="p">(</span><span class="n">bucket_name</span><span class="p">,</span> <span class="n">previous_level</span><span class="p">,</span> <span class="n">delimiter</span><span class="p">)</span>
+ <span class="k">return</span> <span class="bp">False</span> <span class="k">if</span> <span class="n">plist</span> <span class="ow">is</span> <span class="bp">None</span> <span class="k">else</span> <span class="n">prefix</span> <span class="ow">in</span> <span class="n">plist</span></div>
+
+<div class="viewcode-block" id="S3Hook.load_file"><a class="viewcode-back" href="../code.html#airflow.hooks.S3Hook.load_file">[docs]</a> <span class="k">def</span> <span class="nf">load_file</span><span class="p">(</span>
+ <span class="bp">self</span><span class="p">,</span>
+ <span class="n">filename</span><span class="p">,</span>
+ <span class="n">key</span><span class="p">,</span>
+ <span class="n">bucket_name</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">replace</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="n">multipart_bytes</span><span class="o">=</span><span class="mi">5</span> <span class="o">*</span> <span class="p">(</span><span class="mi">1024</span> <span class="o">**</span> <span class="mi">3</span><span class="p">)):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Loads a local file to S3</span>
+
+<span class="sd"> :param filename: name of the file to load.</span>
+<span class="sd"> :type filename: str</span>
+<span class="sd"> :param key: S3 key that will point to the file</span>
+<span class="sd"> :type key: str</span>
+<span class="sd"> :param bucket_name: Name of the bucket in which to store the file</span>
+<span class="sd"> :type bucket_name: str</span>
+<span class="sd"> :param replace: A flag to decide whether or not to overwrite the key</span>
+<span class="sd"> if it already exists. If replace is False and the key exists, an</span>
+<span class="sd"> error will be raised.</span>
+<span class="sd"> :type replace: bool</span>
+<span class="sd"> :param multipart_bytes: If provided, the file is uploaded in parts of</span>
+<span class="sd"> this size (minimum 5242880). The default value is 5GB, since S3</span>
+<span class="sd"> cannot accept non-multipart uploads for files larger than 5GB. If</span>
+<span class="sd"> the file is smaller than the specified limit, the option will be</span>
+<span class="sd"> ignored.</span>
+<span class="sd"> :type multipart_bytes: int</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">bucket_name</span><span class="p">:</span>
+ <span class="p">(</span><span class="n">bucket_name</span><span class="p">,</span> <span class="n">key</span><span class="p">)</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_s3_url</span><span class="p">(</span><span class="n">key</span><span class="p">)</span>
+ <span class="n">bucket</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_bucket</span><span class="p">(</span><span class="n">bucket_name</span><span class="p">)</span>
+ <span class="n">key_obj</span> <span class="o">=</span> <span class="n">bucket</span><span class="o">.</span><span class="n">get_key</span><span class="p">(</span><span class="n">key</span><span class="p">)</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">replace</span> <span class="ow">and</span> <span class="n">key_obj</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"The key {key} already exists."</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
+ <span class="o">**</span><span class="nb">locals</span><span class="p">()))</span>
+
+ <span class="n">key_size</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">getsize</span><span class="p">(</span><span class="n">filename</span><span class="p">)</span>
+ <span class="k">if</span> <span class="n">multipart_bytes</span> <span class="ow">and</span> <span class="n">key_size</span> <span class="o">>=</span> <span class="n">multipart_bytes</span><span class="p">:</span>
+ <span class="c1"># multipart upload</span>
+ <span class="kn">from</span> <span class="nn">filechunkio</span> <span class="kn">import</span> <span class="n">FileChunkIO</span>
+ <span class="n">mp</span> <span class="o">=</span> <span class="n">bucket</span><span class="o">.</span><span class="n">initiate_multipart_upload</span><span class="p">(</span><span class="n">key_name</span><span class="o">=</span><span class="n">key</span><span class="p">)</span>
+ <span class="n">total_chunks</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">math</span><span class="o">.</span><span class="n">ceil</span><span class="p">(</span><span class="n">key_size</span> <span class="o">/</span> <span class="n">multipart_bytes</span><span class="p">))</span>
+ <span class="n">sent_bytes</span> <span class="o">=</span> <span class="mi">0</span>
+ <span class="k">try</span><span class="p">:</span>
+ <span class="k">for</span> <span class="n">chunk</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">total_chunks</span><span class="p">):</span>
+ <span class="n">offset</span> <span class="o">=</span> <span class="n">chunk</span> <span class="o">*</span> <span class="n">multipart_bytes</span>
+ <span class="nb">bytes</span> <span class="o">=</span> <span class="nb">min</span><span class="p">(</span><span class="n">multipart_bytes</span><span class="p">,</span> <span class="n">key_size</span> <span class="o">-</span> <span class="n">offset</span><span class="p">)</span>
+ <span class="k">with</span> <span class="n">FileChunkIO</span><span class="p">(</span>
+ <span class="n">filename</span><span class="p">,</span> <span class="s1">'r'</span><span class="p">,</span> <span class="n">offset</span><span class="o">=</span><span class="n">offset</span><span class="p">,</span> <span class="nb">bytes</span><span class="o">=</span><span class="nb">bytes</span><span class="p">)</span> <span class="k">as</span> <span class="n">fp</span><span class="p">:</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Sending chunk {c} of {tc}...'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
+ <span class="n">c</span><span class="o">=</span><span class="n">chunk</span> <span class="o">+</span> <span class="mi">1</span><span class="p">,</span> <span class="n">tc</span><span class="o">=</span><span class="n">total_chunks</span><span class="p">))</span>
+ <span class="n">mp</span><span class="o">.</span><span class="n">upload_part_from_file</span><span class="p">(</span><span class="n">fp</span><span class="p">,</span> <span class="n">part_num</span><span class="o">=</span><span class="n">chunk</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span>
+ <span class="k">except</span><span class="p">:</span>
+ <span class="n">mp</span><span class="o">.</span><span class="n">cancel_upload</span><span class="p">()</span>
+ <span class="k">raise</span>
+ <span class="n">mp</span><span class="o">.</span><span class="n">complete_upload</span><span class="p">()</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="c1"># regular upload</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">key_obj</span><span class="p">:</span>
+ <span class="n">key_obj</span> <span class="o">=</span> <span class="n">bucket</span><span class="o">.</span><span class="n">new_key</span><span class="p">(</span><span class="n">key_name</span><span class="o">=</span><span class="n">key</span><span class="p">)</span>
+ <span class="n">key_size</span> <span class="o">=</span> <span class="n">key_obj</span><span class="o">.</span><span class="n">set_contents_from_filename</span><span class="p">(</span><span class="n">filename</span><span class="p">,</span>
+ <span class="n">replace</span><span class="o">=</span><span class="n">replace</span><span class="p">)</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">"The key {key} now contains"</span>
+ <span class="s2">" {key_size} bytes"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="nb">locals</span><span class="p">()))</span></div>
+
+<div class="viewcode-block" id="S3Hook.load_string"><a class="viewcode-back" href="../code.html#airflow.hooks.S3Hook.load_string">[docs]</a> <span class="k">def</span> <span class="nf">load_string</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">string_data</span><span class="p">,</span>
+ <span class="n">key</span><span class="p">,</span> <span class="n">bucket_name</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">replace</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+ <span class="n">encrypt</span><span class="o">=</span><span class="bp">False</span><span class="p">):</span>
+ <span class="sd">"""</span>
+<span class="sd"> Loads a local file to S3</span>
+
+<span class="sd"> This is provided as a convenience to drop a file in S3. It uses the</span>
+<span class="sd"> boto infrastructure to ship a file to s3. It is currently using only</span>
+<span class="sd"> a single part download, and should not be used to move large files.</span>
+
+<span class="sd"> :param string_data: string to set as content for the key.</span>
+<span class="sd"> :type string_data: str</span>
+<span class="sd"> :param key: S3 key that will point to the file</span>
+<span class="sd"> :type key: str</span>
+<span class="sd"> :param bucket_name: Name of the bucket in which to store the file</span>
+<span class="sd"> :type bucket_name: str</span>
+<span class="sd"> :param replace: A flag to decide whether or not to overwrite the key</span>
+<span class="sd"> if it already exists</span>
+<span class="sd"> :type replace: bool</span>
+<span class="sd"> """</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">bucket_name</span><span class="p">:</span>
+ <span class="p">(</span><span class="n">bucket_name</span><span class="p">,</span> <span class="n">key</span><span class="p">)</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_s3_url</span><span class="p">(</span><span class="n">key</span><span class="p">)</span>
+ <span class="n">bucket</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_bucket</span><span class="p">(</span><span class="n">bucket_name</span><span class="p">)</span>
+ <span class="n">key_obj</span> <span class="o">=</span> <span class="n">bucket</span><span class="o">.</span><span class="n">get_key</span><span class="p">(</span><span class="n">key</span><span class="p">)</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">replace</span> <span class="ow">and</span> <span class="n">key_obj</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"The key {key} already exists."</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
+ <span class="o">**</span><span class="nb">locals</span><span class="p">()))</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">key_obj</span><span class="p">:</span>
+ <span class="n">key_obj</span> <span class="o">=</span> <span class="n">bucket</span><span class="o">.</span><span class="n">new_key</span><span class="p">(</span><span class="n">key_name</span><span class="o">=</span><span class="n">key</span><span class="p">)</span>
+ <span class="n">key_size</span> <span class="o">=</span> <span class="n">key_obj</span><span class="o">.</span><span class="n">set_contents_from_string</span><span class="p">(</span><span class="n">string_data</span><span class="p">,</span>
+ <span class="n">replace</span><span class="o">=</span><span class="n">replace</span><span class="p">,</span>
+ <span class="n">encrypt_key</span><span class="o">=</span><span class="n">encrypt</span><span class="p">)</span>
+ <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">"The key {key} now contains"</span>
+ <span class="s2">" {key_size} bytes"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="nb">locals</span><span class="p">()))</span></div></div>
+</pre></div>
+
+ </div>
+ </div>
+ <footer>
+
+
+ <hr/>
+
+ <div role="contentinfo">
+ <p>
+ © Copyright 2014, Maxime Beauchemin, Airbnb.
+
+ </p>
+ </div>
+ Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
+
+</footer>
+
+ </div>
+ </div>
+
+ </section>
+
+ </div>
+
+
+
+
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT:'../',
+ VERSION:'',
+ COLLAPSE_INDEX:false,
+ FILE_SUFFIX:'.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../_static/jquery.js"></script>
+ <script type="text/javascript" src="../_static/underscore.js"></script>
+ <script type="text/javascript" src="../_static/doctools.js"></script>
+
+
+
+
+
+ <script type="text/javascript" src="../_static/js/theme.js"></script>
+
+
+
+
+ <script type="text/javascript">
+ jQuery(function () {
+ SphinxRtdTheme.StickyNav.enable();
+ });
+ </script>
+
+
+</body>
+</html>
\ No newline at end of file