You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by ma...@apache.org on 2016/06/05 05:24:11 UTC

[21/34] incubator-airflow-site git commit: Initial commit

http://git-wip-us.apache.org/repos/asf/incubator-airflow-site/blob/9e19165c/_modules/presto_hook.html
----------------------------------------------------------------------
diff --git a/_modules/presto_hook.html b/_modules/presto_hook.html
new file mode 100644
index 0000000..3368d4c
--- /dev/null
+++ b/_modules/presto_hook.html
@@ -0,0 +1,298 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+  <meta charset="utf-8">
+  
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  
+  <title>presto_hook &mdash; Airflow Documentation</title>
+  
+
+  
+  
+
+  
+
+  
+  
+    
+
+  
+
+  
+  
+    <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
+  
+
+  
+
+  
+    <link rel="top" title="Airflow Documentation" href="../index.html"/>
+        <link rel="up" title="Module code" href="index.html"/> 
+
+  
+  <script src="../_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+  <div class="wy-grid-for-nav">
+
+    
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+      <div class="wy-side-scroll">
+        <div class="wy-side-nav-search">
+          
+
+          
+            <a href="../index.html" class="icon icon-home"> Airflow
+          
+
+          
+          </a>
+
+          
+            
+            
+          
+
+          
+<div role="search">
+  <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
+    <input type="text" name="q" placeholder="Search docs" />
+    <input type="hidden" name="check_keywords" value="yes" />
+    <input type="hidden" name="area" value="default" />
+  </form>
+</div>
+
+          
+        </div>
+
+        <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+          
+            
+            
+                <ul>
+<li class="toctree-l1"><a class="reference internal" href="../project.html">Project</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../license.html">License</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../start.html">Quick Start</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../tutorial.html">Tutorial</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../configuration.html">Configuration</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../ui.html">UI / Screenshots</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../concepts.html">Concepts</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../profiling.html">Data Profiling</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../cli.html">Command Line Interface</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../scheduler.html">Scheduling &amp; Triggers</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../plugins.html">Plugins</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../security.html">Security</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../faq.html">FAQ</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../code.html">API Reference</a></li>
+</ul>
+
+            
+          
+        </div>
+      </div>
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+      
+      <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+        <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+        <a href="../index.html">Airflow</a>
+      </nav>
+
+
+      
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          
+
+
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+  <ul class="wy-breadcrumbs">
+    <li><a href="../index.html">Docs</a> &raquo;</li>
+      
+          <li><a href="index.html">Module code</a> &raquo;</li>
+      
+    <li>presto_hook</li>
+      <li class="wy-breadcrumbs-aside">
+        
+          
+        
+      </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+           <div itemprop="articleBody">
+            
+  <h1>Source code for presto_hook</h1><div class="highlight"><pre>
+<span></span><span class="kn">from</span> <span class="nn">builtins</span> <span class="kn">import</span> <span class="nb">str</span>
+<span class="kn">from</span> <span class="nn">pyhive</span> <span class="kn">import</span> <span class="n">presto</span>
+<span class="kn">from</span> <span class="nn">pyhive.exc</span> <span class="kn">import</span> <span class="n">DatabaseError</span>
+
+<span class="kn">from</span> <span class="nn">airflow.hooks.dbapi_hook</span> <span class="kn">import</span> <span class="n">DbApiHook</span>
+
+<span class="kn">import</span> <span class="nn">logging</span>
+<span class="n">logging</span><span class="o">.</span><span class="n">getLogger</span><span class="p">(</span><span class="s2">&quot;pyhive&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">setLevel</span><span class="p">(</span><span class="n">logging</span><span class="o">.</span><span class="n">INFO</span><span class="p">)</span>
+
+
+<span class="k">class</span> <span class="nc">PrestoException</span><span class="p">(</span><span class="ne">Exception</span><span class="p">):</span>
+    <span class="k">pass</span>
+
+
+<div class="viewcode-block" id="PrestoHook"><a class="viewcode-back" href="../code.html#airflow.hooks.PrestoHook">[docs]</a><span class="k">class</span> <span class="nc">PrestoHook</span><span class="p">(</span><span class="n">DbApiHook</span><span class="p">):</span>
+    <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd">    Interact with Presto through PyHive!</span>
+
+<span class="sd">    &gt;&gt;&gt; ph = PrestoHook()</span>
+<span class="sd">    &gt;&gt;&gt; sql = &quot;SELECT count(1) AS num FROM airflow.static_babynames&quot;</span>
+<span class="sd">    &gt;&gt;&gt; ph.get_records(sql)</span>
+<span class="sd">    [[340698]]</span>
+<span class="sd">    &quot;&quot;&quot;</span>
+
+    <span class="n">conn_name_attr</span> <span class="o">=</span> <span class="s1">&#39;presto_conn_id&#39;</span>
+    <span class="n">default_conn_name</span> <span class="o">=</span> <span class="s1">&#39;presto_default&#39;</span>
+
+<div class="viewcode-block" id="PrestoHook.get_conn"><a class="viewcode-back" href="../code.html#airflow.hooks.PrestoHook.get_conn">[docs]</a>    <span class="k">def</span> <span class="nf">get_conn</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+        <span class="sd">&quot;&quot;&quot;Returns a connection object&quot;&quot;&quot;</span>
+        <span class="n">db</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_connection</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">presto_conn_id</span><span class="p">)</span>
+        <span class="k">return</span> <span class="n">presto</span><span class="o">.</span><span class="n">connect</span><span class="p">(</span>
+            <span class="n">host</span><span class="o">=</span><span class="n">db</span><span class="o">.</span><span class="n">host</span><span class="p">,</span>
+            <span class="n">port</span><span class="o">=</span><span class="n">db</span><span class="o">.</span><span class="n">port</span><span class="p">,</span>
+            <span class="n">username</span><span class="o">=</span><span class="n">db</span><span class="o">.</span><span class="n">login</span><span class="p">,</span>
+            <span class="n">catalog</span><span class="o">=</span><span class="n">db</span><span class="o">.</span><span class="n">extra_dejson</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;catalog&#39;</span><span class="p">,</span> <span class="s1">&#39;hive&#39;</span><span class="p">),</span>
+            <span class="n">schema</span><span class="o">=</span><span class="n">db</span><span class="o">.</span><span class="n">schema</span><span class="p">)</span></div>
+
+    <span class="nd">@staticmethod</span>
+    <span class="k">def</span> <span class="nf">_strip_sql</span><span class="p">(</span><span class="n">sql</span><span class="p">):</span>
+        <span class="k">return</span> <span class="n">sql</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span><span class="o">.</span><span class="n">rstrip</span><span class="p">(</span><span class="s1">&#39;;&#39;</span><span class="p">)</span>
+
+<div class="viewcode-block" id="PrestoHook.get_records"><a class="viewcode-back" href="../code.html#airflow.hooks.PrestoHook.get_records">[docs]</a>    <span class="k">def</span> <span class="nf">get_records</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">hql</span><span class="p">,</span> <span class="n">parameters</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+        <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd">        Get a set of records from Presto</span>
+<span class="sd">        &quot;&quot;&quot;</span>
+        <span class="k">try</span><span class="p">:</span>
+            <span class="k">return</span> <span class="nb">super</span><span class="p">(</span><span class="n">PrestoHook</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">get_records</span><span class="p">(</span>
+                <span class="bp">self</span><span class="o">.</span><span class="n">_strip_sql</span><span class="p">(</span><span class="n">hql</span><span class="p">),</span> <span class="n">parameters</span><span class="p">)</span>
+        <span class="k">except</span> <span class="n">DatabaseError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
+            <span class="k">if</span> <span class="p">(</span><span class="nb">hasattr</span><span class="p">(</span><span class="n">e</span><span class="p">,</span> <span class="s1">&#39;message&#39;</span><span class="p">)</span> <span class="ow">and</span>
+                <span class="s1">&#39;errorName&#39;</span> <span class="ow">in</span> <span class="n">e</span><span class="o">.</span><span class="n">message</span> <span class="ow">and</span>
+                <span class="s1">&#39;message&#39;</span> <span class="ow">in</span> <span class="n">e</span><span class="o">.</span><span class="n">message</span><span class="p">):</span>
+                <span class="c1"># Use the structured error data in the raised exception</span>
+                <span class="k">raise</span> <span class="n">PrestoException</span><span class="p">(</span><span class="s1">&#39;{name}: {message}&#39;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
+                    <span class="n">name</span><span class="o">=</span><span class="n">e</span><span class="o">.</span><span class="n">message</span><span class="p">[</span><span class="s1">&#39;errorName&#39;</span><span class="p">],</span> <span class="n">message</span><span class="o">=</span><span class="n">e</span><span class="o">.</span><span class="n">message</span><span class="p">[</span><span class="s1">&#39;message&#39;</span><span class="p">]))</span>
+            <span class="k">else</span><span class="p">:</span>
+                <span class="k">raise</span> <span class="n">PrestoException</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">))</span></div>
+
+<div class="viewcode-block" id="PrestoHook.get_first"><a class="viewcode-back" href="../code.html#airflow.hooks.PrestoHook.get_first">[docs]</a>    <span class="k">def</span> <span class="nf">get_first</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">hql</span><span class="p">,</span> <span class="n">parameters</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+        <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd">        Returns only the first row, regardless of how many rows the query</span>
+<span class="sd">        returns.</span>
+<span class="sd">        &quot;&quot;&quot;</span>
+        <span class="k">try</span><span class="p">:</span>
+            <span class="k">return</span> <span class="nb">super</span><span class="p">(</span><span class="n">PrestoHook</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">get_first</span><span class="p">(</span>
+                <span class="bp">self</span><span class="o">.</span><span class="n">_strip_sql</span><span class="p">(</span><span class="n">hql</span><span class="p">),</span> <span class="n">parameters</span><span class="p">)</span>
+        <span class="k">except</span> <span class="n">DatabaseError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
+            <span class="n">obj</span> <span class="o">=</span> <span class="nb">eval</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">))</span>
+            <span class="k">raise</span> <span class="n">PrestoException</span><span class="p">(</span><span class="n">obj</span><span class="p">[</span><span class="s1">&#39;message&#39;</span><span class="p">])</span></div>
+
+<div class="viewcode-block" id="PrestoHook.get_pandas_df"><a class="viewcode-back" href="../code.html#airflow.hooks.PrestoHook.get_pandas_df">[docs]</a>    <span class="k">def</span> <span class="nf">get_pandas_df</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">hql</span><span class="p">,</span> <span class="n">parameters</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+        <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd">        Get a pandas dataframe from a sql query.</span>
+<span class="sd">        &quot;&quot;&quot;</span>
+        <span class="kn">import</span> <span class="nn">pandas</span>
+        <span class="n">cursor</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_cursor</span><span class="p">()</span>
+        <span class="k">try</span><span class="p">:</span>
+            <span class="n">cursor</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_strip_sql</span><span class="p">(</span><span class="n">hql</span><span class="p">),</span> <span class="n">parameters</span><span class="p">)</span>
+            <span class="n">data</span> <span class="o">=</span> <span class="n">cursor</span><span class="o">.</span><span class="n">fetchall</span><span class="p">()</span>
+        <span class="k">except</span> <span class="n">DatabaseError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
+            <span class="n">obj</span> <span class="o">=</span> <span class="nb">eval</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">))</span>
+            <span class="k">raise</span> <span class="n">PrestoException</span><span class="p">(</span><span class="n">obj</span><span class="p">[</span><span class="s1">&#39;message&#39;</span><span class="p">])</span>
+        <span class="n">column_descriptions</span> <span class="o">=</span> <span class="n">cursor</span><span class="o">.</span><span class="n">description</span>
+        <span class="k">if</span> <span class="n">data</span><span class="p">:</span>
+            <span class="n">df</span> <span class="o">=</span> <span class="n">pandas</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
+            <span class="n">df</span><span class="o">.</span><span class="n">columns</span> <span class="o">=</span> <span class="p">[</span><span class="n">c</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">column_descriptions</span><span class="p">]</span>
+        <span class="k">else</span><span class="p">:</span>
+            <span class="n">df</span> <span class="o">=</span> <span class="n">pandas</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">()</span>
+        <span class="k">return</span> <span class="n">df</span></div>
+
+<div class="viewcode-block" id="PrestoHook.run"><a class="viewcode-back" href="../code.html#airflow.hooks.PrestoHook.run">[docs]</a>    <span class="k">def</span> <span class="nf">run</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">hql</span><span class="p">,</span> <span class="n">parameters</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+        <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd">        Execute the statement against Presto. Can be used to create views.</span>
+<span class="sd">        &quot;&quot;&quot;</span>
+        <span class="k">return</span> <span class="nb">super</span><span class="p">(</span><span class="n">PrestoHook</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_strip_sql</span><span class="p">(</span><span class="n">hql</span><span class="p">),</span> <span class="n">parameters</span><span class="p">)</span></div>
+
+    <span class="k">def</span> <span class="nf">insert_rows</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+        <span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">()</span></div>
+</pre></div>
+
+           </div>
+          </div>
+          <footer>
+  
+
+  <hr/>
+
+  <div role="contentinfo">
+    <p>
+        &copy; Copyright 2014, Maxime Beauchemin, Airbnb.
+
+    </p>
+  </div>
+  Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>. 
+
+</footer>
+
+        </div>
+      </div>
+
+    </section>
+
+  </div>
+  
+
+
+  
+
+    <script type="text/javascript">
+        var DOCUMENTATION_OPTIONS = {
+            URL_ROOT:'../',
+            VERSION:'',
+            COLLAPSE_INDEX:false,
+            FILE_SUFFIX:'.html',
+            HAS_SOURCE:  true
+        };
+    </script>
+      <script type="text/javascript" src="../_static/jquery.js"></script>
+      <script type="text/javascript" src="../_static/underscore.js"></script>
+      <script type="text/javascript" src="../_static/doctools.js"></script>
+
+  
+
+  
+  
+    <script type="text/javascript" src="../_static/js/theme.js"></script>
+  
+
+  
+  
+  <script type="text/javascript">
+      jQuery(function () {
+          SphinxRtdTheme.StickyNav.enable();
+      });
+  </script>
+   
+
+</body>
+</html>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-airflow-site/blob/9e19165c/_modules/python_operator.html
----------------------------------------------------------------------
diff --git a/_modules/python_operator.html b/_modules/python_operator.html
new file mode 100644
index 0000000..0a9b321
--- /dev/null
+++ b/_modules/python_operator.html
@@ -0,0 +1,338 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+  <meta charset="utf-8">
+  
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  
+  <title>python_operator &mdash; Airflow Documentation</title>
+  
+
+  
+  
+
+  
+
+  
+  
+    
+
+  
+
+  
+  
+    <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
+  
+
+  
+
+  
+    <link rel="top" title="Airflow Documentation" href="../index.html"/>
+        <link rel="up" title="Module code" href="index.html"/> 
+
+  
+  <script src="../_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+  <div class="wy-grid-for-nav">
+
+    
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+      <div class="wy-side-scroll">
+        <div class="wy-side-nav-search">
+          
+
+          
+            <a href="../index.html" class="icon icon-home"> Airflow
+          
+
+          
+          </a>
+
+          
+            
+            
+          
+
+          
+<div role="search">
+  <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
+    <input type="text" name="q" placeholder="Search docs" />
+    <input type="hidden" name="check_keywords" value="yes" />
+    <input type="hidden" name="area" value="default" />
+  </form>
+</div>
+
+          
+        </div>
+
+        <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+          
+            
+            
+                <ul>
+<li class="toctree-l1"><a class="reference internal" href="../project.html">Project</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../license.html">License</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../start.html">Quick Start</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../tutorial.html">Tutorial</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../configuration.html">Configuration</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../ui.html">UI / Screenshots</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../concepts.html">Concepts</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../profiling.html">Data Profiling</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../cli.html">Command Line Interface</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../scheduler.html">Scheduling &amp; Triggers</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../plugins.html">Plugins</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../security.html">Security</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../faq.html">FAQ</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../code.html">API Reference</a></li>
+</ul>
+
+            
+          
+        </div>
+      </div>
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+      
+      <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+        <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+        <a href="../index.html">Airflow</a>
+      </nav>
+
+
+      
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          
+
+
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+  <ul class="wy-breadcrumbs">
+    <li><a href="../index.html">Docs</a> &raquo;</li>
+      
+          <li><a href="index.html">Module code</a> &raquo;</li>
+      
+    <li>python_operator</li>
+      <li class="wy-breadcrumbs-aside">
+        
+          
+        
+      </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+           <div itemprop="articleBody">
+            
+  <h1>Source code for python_operator</h1><div class="highlight"><pre>
+<span></span><span class="kn">from</span> <span class="nn">builtins</span> <span class="kn">import</span> <span class="nb">str</span>
+<span class="kn">from</span> <span class="nn">datetime</span> <span class="kn">import</span> <span class="n">datetime</span>
+<span class="kn">import</span> <span class="nn">logging</span>
+
+<span class="kn">from</span> <span class="nn">airflow.models</span> <span class="kn">import</span> <span class="n">BaseOperator</span><span class="p">,</span> <span class="n">TaskInstance</span>
+<span class="kn">from</span> <span class="nn">airflow.utils.state</span> <span class="kn">import</span> <span class="n">State</span>
+<span class="kn">from</span> <span class="nn">airflow.utils.decorators</span> <span class="kn">import</span> <span class="n">apply_defaults</span>
+<span class="kn">from</span> <span class="nn">airflow</span> <span class="kn">import</span> <span class="n">settings</span>
+
+
+<div class="viewcode-block" id="PythonOperator"><a class="viewcode-back" href="../code.html#airflow.operators.PythonOperator">[docs]</a><span class="k">class</span> <span class="nc">PythonOperator</span><span class="p">(</span><span class="n">BaseOperator</span><span class="p">):</span>
+    <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd">    Executes a Python callable</span>
+
+<span class="sd">    :param python_callable: A reference to an object that is callable</span>
+<span class="sd">    :type python_callable: python callable</span>
+<span class="sd">    :param op_kwargs: a dictionary of keyword arguments that will get unpacked</span>
+<span class="sd">        in your function</span>
+<span class="sd">    :type op_kwargs: dict</span>
+<span class="sd">    :param op_args: a list of positional arguments that will get unpacked when</span>
+<span class="sd">        calling your callable</span>
+<span class="sd">    :type op_args: list</span>
+<span class="sd">    :param provide_context: if set to true, Airflow will pass a set of</span>
+<span class="sd">        keyword arguments that can be used in your function. This set of</span>
+<span class="sd">        kwargs correspond exactly to what you can use in your jinja</span>
+<span class="sd">        templates. For this to work, you need to define `**kwargs` in your</span>
+<span class="sd">        function header.</span>
+<span class="sd">    :type provide_context: bool</span>
+<span class="sd">    :param templates_dict: a dictionary where the values are templates that</span>
+<span class="sd">        will get templated by the Airflow engine sometime between</span>
+<span class="sd">        ``__init__`` and ``execute`` takes place and are made available</span>
+<span class="sd">        in your callable&#39;s context after the template has been applied</span>
+<span class="sd">    :type templates_dict: dict of str</span>
+<span class="sd">    :param templates_exts: a list of file extensions to resolve while</span>
+<span class="sd">        processing templated fields, for examples ``[&#39;.sql&#39;, &#39;.hql&#39;]``</span>
+<span class="sd">    &quot;&quot;&quot;</span>
+    <span class="n">template_fields</span> <span class="o">=</span> <span class="p">(</span><span class="s1">&#39;templates_dict&#39;</span><span class="p">,)</span>
+    <span class="n">template_ext</span> <span class="o">=</span> <span class="nb">tuple</span><span class="p">()</span>
+    <span class="n">ui_color</span> <span class="o">=</span> <span class="s1">&#39;#ffefeb&#39;</span>
+
+    <span class="nd">@apply_defaults</span>
+    <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span>
+            <span class="bp">self</span><span class="p">,</span>
+            <span class="n">python_callable</span><span class="p">,</span>
+            <span class="n">op_args</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+            <span class="n">op_kwargs</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+            <span class="n">provide_context</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+            <span class="n">templates_dict</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+            <span class="n">templates_exts</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+            <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+        <span class="nb">super</span><span class="p">(</span><span class="n">PythonOperator</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">python_callable</span> <span class="o">=</span> <span class="n">python_callable</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">op_args</span> <span class="o">=</span> <span class="n">op_args</span> <span class="ow">or</span> <span class="p">[]</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">op_kwargs</span> <span class="o">=</span> <span class="n">op_kwargs</span> <span class="ow">or</span> <span class="p">{}</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">provide_context</span> <span class="o">=</span> <span class="n">provide_context</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">templates_dict</span> <span class="o">=</span> <span class="n">templates_dict</span>
+        <span class="k">if</span> <span class="n">templates_exts</span><span class="p">:</span>
+            <span class="bp">self</span><span class="o">.</span><span class="n">template_ext</span> <span class="o">=</span> <span class="n">templates_exts</span>
+
+    <span class="k">def</span> <span class="nf">execute</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">context</span><span class="p">):</span>
+        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">provide_context</span><span class="p">:</span>
+            <span class="n">context</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">op_kwargs</span><span class="p">)</span>
+            <span class="n">context</span><span class="p">[</span><span class="s1">&#39;templates_dict&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">templates_dict</span>
+            <span class="bp">self</span><span class="o">.</span><span class="n">op_kwargs</span> <span class="o">=</span> <span class="n">context</span>
+
+        <span class="n">return_value</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">python_callable</span><span class="p">(</span><span class="o">*</span><span class="bp">self</span><span class="o">.</span><span class="n">op_args</span><span class="p">,</span> <span class="o">**</span><span class="bp">self</span><span class="o">.</span><span class="n">op_kwargs</span><span class="p">)</span>
+        <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">&quot;Done. Returned value was: &quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">return_value</span><span class="p">))</span>
+        <span class="k">return</span> <span class="n">return_value</span></div>
+
+
+<div class="viewcode-block" id="BranchPythonOperator"><a class="viewcode-back" href="../code.html#airflow.operators.BranchPythonOperator">[docs]</a><span class="k">class</span> <span class="nc">BranchPythonOperator</span><span class="p">(</span><span class="n">PythonOperator</span><span class="p">):</span>
+    <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd">    Allows a workflow to &quot;branch&quot; or follow a single path following the</span>
+<span class="sd">    execution of this task.</span>
+
+<span class="sd">    It derives the PythonOperator and expects a Python function that returns</span>
+<span class="sd">    the task_id to follow. The task_id returned should point to a task</span>
+<span class="sd">    directly downstream from {self}. All other &quot;branches&quot; or</span>
+<span class="sd">    directly downstream tasks are marked with a state of ``skipped`` so that</span>
+<span class="sd">    these paths can&#39;t move forward. The ``skipped`` states are propageted</span>
+<span class="sd">    downstream to allow for the DAG state to fill up and the DAG run&#39;s state</span>
+<span class="sd">    to be inferred.</span>
+
+<span class="sd">    Note that using tasks with ``depends_on_past=True`` downstream from</span>
+<span class="sd">    ``BranchPythonOperator`` is logically unsound as ``skipped`` status</span>
+<span class="sd">    will invariably lead to block tasks that depend on their past successes.</span>
+<span class="sd">    ``skipped`` states propagates where all directly upstream tasks are</span>
+<span class="sd">    ``skipped``.</span>
+<span class="sd">    &quot;&quot;&quot;</span>
+    <span class="k">def</span> <span class="nf">execute</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">context</span><span class="p">):</span>
+        <span class="n">branch</span> <span class="o">=</span> <span class="nb">super</span><span class="p">(</span><span class="n">BranchPythonOperator</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">context</span><span class="p">)</span>
+        <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">&quot;Following branch &quot;</span> <span class="o">+</span> <span class="n">branch</span><span class="p">)</span>
+        <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">&quot;Marking other directly downstream tasks as skipped&quot;</span><span class="p">)</span>
+        <span class="n">session</span> <span class="o">=</span> <span class="n">settings</span><span class="o">.</span><span class="n">Session</span><span class="p">()</span>
+        <span class="k">for</span> <span class="n">task</span> <span class="ow">in</span> <span class="n">context</span><span class="p">[</span><span class="s1">&#39;task&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">downstream_list</span><span class="p">:</span>
+            <span class="k">if</span> <span class="n">task</span><span class="o">.</span><span class="n">task_id</span> <span class="o">!=</span> <span class="n">branch</span><span class="p">:</span>
+                <span class="n">ti</span> <span class="o">=</span> <span class="n">TaskInstance</span><span class="p">(</span>
+                    <span class="n">task</span><span class="p">,</span> <span class="n">execution_date</span><span class="o">=</span><span class="n">context</span><span class="p">[</span><span class="s1">&#39;ti&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">execution_date</span><span class="p">)</span>
+                <span class="n">ti</span><span class="o">.</span><span class="n">state</span> <span class="o">=</span> <span class="n">State</span><span class="o">.</span><span class="n">SKIPPED</span>
+                <span class="n">ti</span><span class="o">.</span><span class="n">start_date</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span>
+                <span class="n">ti</span><span class="o">.</span><span class="n">end_date</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span>
+                <span class="n">session</span><span class="o">.</span><span class="n">merge</span><span class="p">(</span><span class="n">ti</span><span class="p">)</span>
+        <span class="n">session</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span>
+        <span class="n">session</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
+        <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">&quot;Done.&quot;</span><span class="p">)</span></div>
+
+
+<div class="viewcode-block" id="ShortCircuitOperator"><a class="viewcode-back" href="../code.html#airflow.operators.ShortCircuitOperator">[docs]</a><span class="k">class</span> <span class="nc">ShortCircuitOperator</span><span class="p">(</span><span class="n">PythonOperator</span><span class="p">):</span>
+    <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd">    Allows a workflow to continue only if a condition is met. Otherwise, the</span>
+<span class="sd">    workflow &quot;short-circuits&quot; and downstream tasks are skipped.</span>
+
+<span class="sd">    The ShortCircuitOperator is derived from the PythonOperator. It evaluates a</span>
+<span class="sd">    condition and short-circuits the workflow if the condition is False. Any</span>
+<span class="sd">    downstream tasks are marked with a state of &quot;skipped&quot;. If the condition is</span>
+<span class="sd">    True, downstream tasks proceed as normal.</span>
+
+<span class="sd">    The condition is determined by the result of `python_callable`.</span>
+<span class="sd">    &quot;&quot;&quot;</span>
+    <span class="k">def</span> <span class="nf">execute</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">context</span><span class="p">):</span>
+        <span class="n">condition</span> <span class="o">=</span> <span class="nb">super</span><span class="p">(</span><span class="n">ShortCircuitOperator</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">context</span><span class="p">)</span>
+        <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">&quot;Condition result is {}&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">condition</span><span class="p">))</span>
+        <span class="k">if</span> <span class="n">condition</span><span class="p">:</span>
+            <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;Proceeding with downstream tasks...&#39;</span><span class="p">)</span>
+            <span class="k">return</span>
+        <span class="k">else</span><span class="p">:</span>
+            <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;Skipping downstream tasks...&#39;</span><span class="p">)</span>
+            <span class="n">session</span> <span class="o">=</span> <span class="n">settings</span><span class="o">.</span><span class="n">Session</span><span class="p">()</span>
+            <span class="k">for</span> <span class="n">task</span> <span class="ow">in</span> <span class="n">context</span><span class="p">[</span><span class="s1">&#39;task&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">downstream_list</span><span class="p">:</span>
+                <span class="n">ti</span> <span class="o">=</span> <span class="n">TaskInstance</span><span class="p">(</span>
+                    <span class="n">task</span><span class="p">,</span> <span class="n">execution_date</span><span class="o">=</span><span class="n">context</span><span class="p">[</span><span class="s1">&#39;ti&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">execution_date</span><span class="p">)</span>
+                <span class="n">ti</span><span class="o">.</span><span class="n">state</span> <span class="o">=</span> <span class="n">State</span><span class="o">.</span><span class="n">SKIPPED</span>
+                <span class="n">ti</span><span class="o">.</span><span class="n">start_date</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span>
+                <span class="n">ti</span><span class="o">.</span><span class="n">end_date</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span>
+                <span class="n">session</span><span class="o">.</span><span class="n">merge</span><span class="p">(</span><span class="n">ti</span><span class="p">)</span>
+            <span class="n">session</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span>
+            <span class="n">session</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
+            <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">&quot;Done.&quot;</span><span class="p">)</span></div>
+</pre></div>
+
+           </div>
+          </div>
+          <footer>
+  
+
+  <hr/>
+
+  <div role="contentinfo">
+    <p>
+        &copy; Copyright 2014, Maxime Beauchemin, Airbnb.
+
+    </p>
+  </div>
+  Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>. 
+
+</footer>
+
+        </div>
+      </div>
+
+    </section>
+
+  </div>
+  
+
+
+  
+
+    <script type="text/javascript">
+        var DOCUMENTATION_OPTIONS = {
+            URL_ROOT:'../',
+            VERSION:'',
+            COLLAPSE_INDEX:false,
+            FILE_SUFFIX:'.html',
+            HAS_SOURCE:  true
+        };
+    </script>
+      <script type="text/javascript" src="../_static/jquery.js"></script>
+      <script type="text/javascript" src="../_static/underscore.js"></script>
+      <script type="text/javascript" src="../_static/doctools.js"></script>
+
+  
+
+  
+  
+    <script type="text/javascript" src="../_static/js/theme.js"></script>
+  
+
+  
+  
+  <script type="text/javascript">
+      jQuery(function () {
+          SphinxRtdTheme.StickyNav.enable();
+      });
+  </script>
+   
+
+</body>
+</html>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-airflow-site/blob/9e19165c/_modules/s3_to_hive_operator.html
----------------------------------------------------------------------
diff --git a/_modules/s3_to_hive_operator.html b/_modules/s3_to_hive_operator.html
new file mode 100644
index 0000000..e0016ac
--- /dev/null
+++ b/_modules/s3_to_hive_operator.html
@@ -0,0 +1,353 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+  <meta charset="utf-8">
+  
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  
+  <title>s3_to_hive_operator &mdash; Airflow Documentation</title>
+  
+
+  
+  
+
+  
+
+  
+  
+    
+
+  
+
+  
+  
+    <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
+  
+
+  
+
+  
+    <link rel="top" title="Airflow Documentation" href="../index.html"/>
+        <link rel="up" title="Module code" href="index.html"/> 
+
+  
+  <script src="../_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+  <div class="wy-grid-for-nav">
+
+    
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+      <div class="wy-side-scroll">
+        <div class="wy-side-nav-search">
+          
+
+          
+            <a href="../index.html" class="icon icon-home"> Airflow
+          
+
+          
+          </a>
+
+          
+            
+            
+          
+
+          
+<div role="search">
+  <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
+    <input type="text" name="q" placeholder="Search docs" />
+    <input type="hidden" name="check_keywords" value="yes" />
+    <input type="hidden" name="area" value="default" />
+  </form>
+</div>
+
+          
+        </div>
+
+        <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+          
+            
+            
+                <ul>
+<li class="toctree-l1"><a class="reference internal" href="../project.html">Project</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../license.html">License</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../start.html">Quick Start</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../tutorial.html">Tutorial</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../configuration.html">Configuration</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../ui.html">UI / Screenshots</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../concepts.html">Concepts</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../profiling.html">Data Profiling</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../cli.html">Command Line Interface</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../scheduler.html">Scheduling &amp; Triggers</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../plugins.html">Plugins</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../security.html">Security</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../faq.html">FAQ</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../code.html">API Reference</a></li>
+</ul>
+
+            
+          
+        </div>
+      </div>
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+      
+      <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+        <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+        <a href="../index.html">Airflow</a>
+      </nav>
+
+
+      
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          
+
+
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+  <ul class="wy-breadcrumbs">
+    <li><a href="../index.html">Docs</a> &raquo;</li>
+      
+          <li><a href="index.html">Module code</a> &raquo;</li>
+      
+    <li>s3_to_hive_operator</li>
+      <li class="wy-breadcrumbs-aside">
+        
+          
+        
+      </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+           <div itemprop="articleBody">
+            
+  <h1>Source code for s3_to_hive_operator</h1><div class="highlight"><pre>
+<span></span><span class="kn">from</span> <span class="nn">builtins</span> <span class="kn">import</span> <span class="nb">next</span>
+<span class="kn">from</span> <span class="nn">builtins</span> <span class="kn">import</span> <span class="nb">zip</span>
+<span class="kn">import</span> <span class="nn">logging</span>
+<span class="kn">from</span> <span class="nn">tempfile</span> <span class="kn">import</span> <span class="n">NamedTemporaryFile</span>
+
+<span class="kn">from</span> <span class="nn">airflow.exceptions</span> <span class="kn">import</span> <span class="n">AirflowException</span>
+<span class="kn">from</span> <span class="nn">airflow.hooks</span> <span class="kn">import</span> <span class="n">HiveCliHook</span><span class="p">,</span> <span class="n">S3Hook</span>
+<span class="kn">from</span> <span class="nn">airflow.models</span> <span class="kn">import</span> <span class="n">BaseOperator</span>
+<span class="kn">from</span> <span class="nn">airflow.utils.decorators</span> <span class="kn">import</span> <span class="n">apply_defaults</span>
+
+
+<div class="viewcode-block" id="S3ToHiveTransfer"><a class="viewcode-back" href="../code.html#airflow.operators.S3ToHiveTransfer">[docs]</a><span class="k">class</span> <span class="nc">S3ToHiveTransfer</span><span class="p">(</span><span class="n">BaseOperator</span><span class="p">):</span>
+    <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd">    Moves data from S3 to Hive. The operator downloads a file from S3,</span>
+<span class="sd">    stores the file locally before loading it into a Hive table.</span>
+<span class="sd">    If the ``create`` or ``recreate`` arguments are set to ``True``,</span>
+<span class="sd">    a ``CREATE TABLE`` and ``DROP TABLE`` statements are generated.</span>
+<span class="sd">    Hive data types are inferred from the cursor&#39;s metadata from.</span>
+
+<span class="sd">    Note that the table generated in Hive uses ``STORED AS textfile``</span>
+<span class="sd">    which isn&#39;t the most efficient serialization format. If a</span>
+<span class="sd">    large amount of data is loaded and/or if the tables gets</span>
+<span class="sd">    queried considerably, you may want to use this operator only to</span>
+<span class="sd">    stage the data into a temporary table before loading it into its</span>
+<span class="sd">    final destination using a ``HiveOperator``.</span>
+
+<span class="sd">    :param s3_key: The key to be retrieved from S3</span>
+<span class="sd">    :type s3_key: str</span>
+<span class="sd">    :param field_dict: A dictionary of the fields name in the file</span>
+<span class="sd">        as keys and their Hive types as values</span>
+<span class="sd">    :type field_dict: dict</span>
+<span class="sd">    :param hive_table: target Hive table, use dot notation to target a</span>
+<span class="sd">        specific database</span>
+<span class="sd">    :type hive_table: str</span>
+<span class="sd">    :param create: whether to create the table if it doesn&#39;t exist</span>
+<span class="sd">    :type create: bool</span>
+<span class="sd">    :param recreate: whether to drop and recreate the table at every</span>
+<span class="sd">        execution</span>
+<span class="sd">    :type recreate: bool</span>
+<span class="sd">    :param partition: target partition as a dict of partition columns</span>
+<span class="sd">        and values</span>
+<span class="sd">    :type partition: dict</span>
+<span class="sd">    :param headers: whether the file contains column names on the first</span>
+<span class="sd">        line</span>
+<span class="sd">    :type headers: bool</span>
+<span class="sd">    :param check_headers: whether the column names on the first line should be</span>
+<span class="sd">        checked against the keys of field_dict</span>
+<span class="sd">    :type check_headers: bool</span>
+<span class="sd">    :param wildcard_match: whether the s3_key should be interpreted as a Unix</span>
+<span class="sd">        wildcard pattern</span>
+<span class="sd">    :type wildcard_match: bool</span>
+<span class="sd">    :param delimiter: field delimiter in the file</span>
+<span class="sd">    :type delimiter: str</span>
+<span class="sd">    :param s3_conn_id: source s3 connection</span>
+<span class="sd">    :type s3_conn_id: str</span>
+<span class="sd">    :param hive_conn_id: destination hive connection</span>
+<span class="sd">    :type hive_conn_id: str</span>
+<span class="sd">    &quot;&quot;&quot;</span>
+
+    <span class="n">template_fields</span> <span class="o">=</span> <span class="p">(</span><span class="s1">&#39;s3_key&#39;</span><span class="p">,</span> <span class="s1">&#39;partition&#39;</span><span class="p">,</span> <span class="s1">&#39;hive_table&#39;</span><span class="p">)</span>
+    <span class="n">template_ext</span> <span class="o">=</span> <span class="p">()</span>
+    <span class="n">ui_color</span> <span class="o">=</span> <span class="s1">&#39;#a0e08c&#39;</span>
+
+    <span class="nd">@apply_defaults</span>
+    <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span>
+            <span class="bp">self</span><span class="p">,</span>
+            <span class="n">s3_key</span><span class="p">,</span>
+            <span class="n">field_dict</span><span class="p">,</span>
+            <span class="n">hive_table</span><span class="p">,</span>
+            <span class="n">delimiter</span><span class="o">=</span><span class="s1">&#39;,&#39;</span><span class="p">,</span>
+            <span class="n">create</span><span class="o">=</span><span class="bp">True</span><span class="p">,</span>
+            <span class="n">recreate</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+            <span class="n">partition</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+            <span class="n">headers</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+            <span class="n">check_headers</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+            <span class="n">wildcard_match</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
+            <span class="n">s3_conn_id</span><span class="o">=</span><span class="s1">&#39;s3_default&#39;</span><span class="p">,</span>
+            <span class="n">hive_cli_conn_id</span><span class="o">=</span><span class="s1">&#39;hive_cli_default&#39;</span><span class="p">,</span>
+            <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
+        <span class="nb">super</span><span class="p">(</span><span class="n">S3ToHiveTransfer</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">s3_key</span> <span class="o">=</span> <span class="n">s3_key</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">field_dict</span> <span class="o">=</span> <span class="n">field_dict</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">hive_table</span> <span class="o">=</span> <span class="n">hive_table</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">delimiter</span> <span class="o">=</span> <span class="n">delimiter</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">create</span> <span class="o">=</span> <span class="n">create</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">recreate</span> <span class="o">=</span> <span class="n">recreate</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">partition</span> <span class="o">=</span> <span class="n">partition</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">headers</span> <span class="o">=</span> <span class="n">headers</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">check_headers</span> <span class="o">=</span> <span class="n">check_headers</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">wildcard_match</span> <span class="o">=</span> <span class="n">wildcard_match</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">hive_cli_conn_id</span> <span class="o">=</span> <span class="n">hive_cli_conn_id</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">s3_conn_id</span> <span class="o">=</span> <span class="n">s3_conn_id</span>
+
+    <span class="k">def</span> <span class="nf">execute</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">context</span><span class="p">):</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">hive</span> <span class="o">=</span> <span class="n">HiveCliHook</span><span class="p">(</span><span class="n">hive_cli_conn_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">hive_cli_conn_id</span><span class="p">)</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">s3</span> <span class="o">=</span> <span class="n">S3Hook</span><span class="p">(</span><span class="n">s3_conn_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">s3_conn_id</span><span class="p">)</span>
+        <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">&quot;Downloading S3 file&quot;</span><span class="p">)</span>
+        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">wildcard_match</span><span class="p">:</span>
+            <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">s3</span><span class="o">.</span><span class="n">check_for_wildcard_key</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">s3_key</span><span class="p">):</span>
+                <span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span><span class="s2">&quot;No key matches {0}&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">s3_key</span><span class="p">))</span>
+            <span class="n">s3_key_object</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">s3</span><span class="o">.</span><span class="n">get_wildcard_key</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">s3_key</span><span class="p">)</span>
+        <span class="k">else</span><span class="p">:</span>
+            <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">s3</span><span class="o">.</span><span class="n">check_for_key</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">s3_key</span><span class="p">):</span>
+                <span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span>
+                    <span class="s2">&quot;The key {0} does not exists&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">s3_key</span><span class="p">))</span>
+            <span class="n">s3_key_object</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">s3</span><span class="o">.</span><span class="n">get_key</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">s3_key</span><span class="p">)</span>
+        <span class="k">with</span> <span class="n">NamedTemporaryFile</span><span class="p">(</span><span class="s2">&quot;w&quot;</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
+            <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">&quot;Dumping S3 key {0} contents to local&quot;</span>
+                         <span class="s2">&quot; file {1}&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">s3_key_object</span><span class="o">.</span><span class="n">key</span><span class="p">,</span> <span class="n">f</span><span class="o">.</span><span class="n">name</span><span class="p">))</span>
+            <span class="n">s3_key_object</span><span class="o">.</span><span class="n">get_contents_to_file</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
+            <span class="n">f</span><span class="o">.</span><span class="n">flush</span><span class="p">()</span>
+            <span class="bp">self</span><span class="o">.</span><span class="n">s3</span><span class="o">.</span><span class="n">connection</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
+            <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">headers</span><span class="p">:</span>
+                <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">&quot;Loading file into Hive&quot;</span><span class="p">)</span>
+                <span class="bp">self</span><span class="o">.</span><span class="n">hive</span><span class="o">.</span><span class="n">load_file</span><span class="p">(</span>
+                    <span class="n">f</span><span class="o">.</span><span class="n">name</span><span class="p">,</span>
+                    <span class="bp">self</span><span class="o">.</span><span class="n">hive_table</span><span class="p">,</span>
+                    <span class="n">field_dict</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">field_dict</span><span class="p">,</span>
+                    <span class="n">create</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">create</span><span class="p">,</span>
+                    <span class="n">partition</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">partition</span><span class="p">,</span>
+                    <span class="n">delimiter</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">delimiter</span><span class="p">,</span>
+                    <span class="n">recreate</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">recreate</span><span class="p">)</span>
+            <span class="k">else</span><span class="p">:</span>
+                <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">f</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="s1">&#39;r&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">tmpf</span><span class="p">:</span>
+                    <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">check_headers</span><span class="p">:</span>
+                        <span class="n">header_l</span> <span class="o">=</span> <span class="n">tmpf</span><span class="o">.</span><span class="n">readline</span><span class="p">()</span>
+                        <span class="n">header_line</span> <span class="o">=</span> <span class="n">header_l</span><span class="o">.</span><span class="n">rstrip</span><span class="p">()</span>
+                        <span class="n">header_list</span> <span class="o">=</span> <span class="n">header_line</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">delimiter</span><span class="p">)</span>
+                        <span class="n">field_names</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">field_dict</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span>
+                        <span class="n">test_field_match</span> <span class="o">=</span> <span class="p">[</span><span class="n">h1</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span> <span class="o">==</span> <span class="n">h2</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span> <span class="k">for</span> <span class="n">h1</span><span class="p">,</span> <span class="n">h2</span>
+                                            <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">header_list</span><span class="p">,</span> <span class="n">field_names</span><span class="p">)]</span>
+                        <span class="k">if</span> <span class="ow">not</span> <span class="nb">all</span><span class="p">(</span><span class="n">test_field_match</span><span class="p">):</span>
+                            <span class="n">logging</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s2">&quot;Headers do not match field names&quot;</span>
+                                            <span class="s2">&quot;File headers:</span><span class="se">\n</span><span class="s2"> {header_list}</span><span class="se">\n</span><span class="s2">&quot;</span>
+                                            <span class="s2">&quot;Field names: </span><span class="se">\n</span><span class="s2"> {field_names}</span><span class="se">\n</span><span class="s2">&quot;</span>
+                                            <span class="s2">&quot;&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="o">**</span><span class="nb">locals</span><span class="p">()))</span>
+                            <span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span><span class="s2">&quot;Headers do not match the &quot;</span>
+                                            <span class="s2">&quot;field_dict keys&quot;</span><span class="p">)</span>
+                    <span class="k">with</span> <span class="n">NamedTemporaryFile</span><span class="p">(</span><span class="s2">&quot;w&quot;</span><span class="p">)</span> <span class="k">as</span> <span class="n">f_no_headers</span><span class="p">:</span>
+                        <span class="n">tmpf</span><span class="o">.</span><span class="n">seek</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
+                        <span class="nb">next</span><span class="p">(</span><span class="n">tmpf</span><span class="p">)</span>
+                        <span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="n">tmpf</span><span class="p">:</span>
+                            <span class="n">f_no_headers</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">line</span><span class="p">)</span>
+                        <span class="n">f_no_headers</span><span class="o">.</span><span class="n">flush</span><span class="p">()</span>
+                        <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">&quot;Loading file without headers into Hive&quot;</span><span class="p">)</span>
+                        <span class="bp">self</span><span class="o">.</span><span class="n">hive</span><span class="o">.</span><span class="n">load_file</span><span class="p">(</span>
+                            <span class="n">f_no_headers</span><span class="o">.</span><span class="n">name</span><span class="p">,</span>
+                            <span class="bp">self</span><span class="o">.</span><span class="n">hive_table</span><span class="p">,</span>
+                            <span class="n">field_dict</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">field_dict</span><span class="p">,</span>
+                            <span class="n">create</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">create</span><span class="p">,</span>
+                            <span class="n">partition</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">partition</span><span class="p">,</span>
+                            <span class="n">delimiter</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">delimiter</span><span class="p">,</span>
+                            <span class="n">recreate</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">recreate</span><span class="p">)</span></div>
+</pre></div>
+
+           </div>
+          </div>
+          <footer>
+  
+
+  <hr/>
+
+  <div role="contentinfo">
+    <p>
+        &copy; Copyright 2014, Maxime Beauchemin, Airbnb.
+
+    </p>
+  </div>
+  Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>. 
+
+</footer>
+
+        </div>
+      </div>
+
+    </section>
+
+  </div>
+  
+
+
+  
+
+    <script type="text/javascript">
+        var DOCUMENTATION_OPTIONS = {
+            URL_ROOT:'../',
+            VERSION:'',
+            COLLAPSE_INDEX:false,
+            FILE_SUFFIX:'.html',
+            HAS_SOURCE:  true
+        };
+    </script>
+      <script type="text/javascript" src="../_static/jquery.js"></script>
+      <script type="text/javascript" src="../_static/underscore.js"></script>
+      <script type="text/javascript" src="../_static/doctools.js"></script>
+
+  
+
+  
+  
+    <script type="text/javascript" src="../_static/js/theme.js"></script>
+  
+
+  
+  
+  <script type="text/javascript">
+      jQuery(function () {
+          SphinxRtdTheme.StickyNav.enable();
+      });
+  </script>
+   
+
+</body>
+</html>
\ No newline at end of file