You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by ma...@apache.org on 2016/06/05 05:24:22 UTC
[32/34] incubator-airflow-site git commit: Initial commit
http://git-wip-us.apache.org/repos/asf/incubator-airflow-site/blob/9e19165c/_modules/airflow/macros/hive.html
----------------------------------------------------------------------
diff --git a/_modules/airflow/macros/hive.html b/_modules/airflow/macros/hive.html
new file mode 100644
index 0000000..cfcc1a0
--- /dev/null
+++ b/_modules/airflow/macros/hive.html
@@ -0,0 +1,298 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+ <meta charset="utf-8">
+
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+ <title>airflow.macros.hive — Airflow Documentation</title>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" />
+
+
+
+
+
+ <link rel="top" title="Airflow Documentation" href="../../../index.html"/>
+ <link rel="up" title="airflow.macros" href="../macros.html"/>
+
+
+ <script src="../../../_static/js/modernizr.min.js"></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+ <div class="wy-grid-for-nav">
+
+
+ <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+ <div class="wy-side-scroll">
+ <div class="wy-side-nav-search">
+
+
+
+ <a href="../../../index.html" class="icon icon-home"> Airflow
+
+
+
+ </a>
+
+
+
+
+
+
+
+<div role="search">
+ <form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
+ <input type="text" name="q" placeholder="Search docs" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+</div>
+
+
+ </div>
+
+ <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+
+
+
+ <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../project.html">Project</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../license.html">License</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../start.html">Quick Start</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../tutorial.html">Tutorial</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../configuration.html">Configuration</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../ui.html">UI / Screenshots</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../concepts.html">Concepts</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../profiling.html">Data Profiling</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../cli.html">Command Line Interface</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../scheduler.html">Scheduling & Triggers</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../plugins.html">Plugins</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../security.html">Security</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../faq.html">FAQ</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../code.html">API Reference</a></li>
+</ul>
+
+
+
+ </div>
+ </div>
+ </nav>
+
+ <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+
+ <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+ <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+ <a href="../../../index.html">Airflow</a>
+ </nav>
+
+
+
+ <div class="wy-nav-content">
+ <div class="rst-content">
+
+
+
+
+
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+ <ul class="wy-breadcrumbs">
+ <li><a href="../../../index.html">Docs</a> »</li>
+
+ <li><a href="../../index.html">Module code</a> »</li>
+
+ <li><a href="../macros.html">airflow.macros</a> »</li>
+
+ <li>airflow.macros.hive</li>
+ <li class="wy-breadcrumbs-aside">
+
+
+
+ </li>
+ </ul>
+ <hr/>
+</div>
+ <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+ <div itemprop="articleBody">
+
+ <h1>Source code for airflow.macros.hive</h1><div class="highlight"><pre>
+<span></span><span class="kn">import</span> <span class="nn">datetime</span>
+
+
+<div class="viewcode-block" id="max_partition"><a class="viewcode-back" href="../../../code.html#airflow.macros.hive.max_partition">[docs]</a><span class="k">def</span> <span class="nf">max_partition</span><span class="p">(</span>
+ <span class="n">table</span><span class="p">,</span> <span class="n">schema</span><span class="o">=</span><span class="s2">"default"</span><span class="p">,</span> <span class="n">field</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="nb">filter</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span>
+ <span class="n">metastore_conn_id</span><span class="o">=</span><span class="s1">'metastore_default'</span><span class="p">):</span>
+ <span class="sd">'''</span>
+<span class="sd"> Gets the max partition for a table.</span>
+
+<span class="sd"> :param schema: The hive schema the table lives in</span>
+<span class="sd"> :type schema: string</span>
+<span class="sd"> :param table: The hive table you are interested in, supports the dot</span>
+<span class="sd"> notation as in "my_database.my_table", if a dot is found,</span>
+<span class="sd"> the schema param is disregarded</span>
+<span class="sd"> :type table: string</span>
+<span class="sd"> :param hive_conn_id: The hive connection you are interested in.</span>
+<span class="sd"> If your default is set you don't need to use this parameter.</span>
+<span class="sd"> :type hive_conn_id: string</span>
+<span class="sd"> :param filter: filter on a subset of partition as in</span>
+<span class="sd"> `sub_part='specific_value'`</span>
+<span class="sd"> :type filter: string</span>
+<span class="sd"> :param field: the field to get the max value from. If there's only</span>
+<span class="sd"> one partition field, this will be inferred</span>
+
+<span class="sd"> >>> max_partition('airflow.static_babynames_partitioned')</span>
+<span class="sd"> '2015-01-01'</span>
+<span class="sd"> '''</span>
+ <span class="kn">from</span> <span class="nn">airflow.hooks</span> <span class="kn">import</span> <span class="n">HiveMetastoreHook</span>
+ <span class="k">if</span> <span class="s1">'.'</span> <span class="ow">in</span> <span class="n">table</span><span class="p">:</span>
+ <span class="n">schema</span><span class="p">,</span> <span class="n">table</span> <span class="o">=</span> <span class="n">table</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'.'</span><span class="p">)</span>
+ <span class="n">hh</span> <span class="o">=</span> <span class="n">HiveMetastoreHook</span><span class="p">(</span><span class="n">metastore_conn_id</span><span class="o">=</span><span class="n">metastore_conn_id</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">hh</span><span class="o">.</span><span class="n">max_partition</span><span class="p">(</span>
+ <span class="n">schema</span><span class="o">=</span><span class="n">schema</span><span class="p">,</span> <span class="n">table_name</span><span class="o">=</span><span class="n">table</span><span class="p">,</span> <span class="n">field</span><span class="o">=</span><span class="n">field</span><span class="p">,</span> <span class="nb">filter</span><span class="o">=</span><span class="nb">filter</span><span class="p">)</span></div>
+
+
+<span class="k">def</span> <span class="nf">_closest_date</span><span class="p">(</span><span class="n">target_dt</span><span class="p">,</span> <span class="n">date_list</span><span class="p">,</span> <span class="n">before_target</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
+ <span class="sd">'''</span>
+<span class="sd"> This function finds the date in a list closest to the target date.</span>
+<span class="sd"> An optional parameter can be given to get the closest before or after.</span>
+
+<span class="sd"> :param target_dt: The target date</span>
+<span class="sd"> :type target_dt: datetime.date</span>
+<span class="sd"> :param date_list: The list of dates to search</span>
+<span class="sd"> :type date_list: datetime.date list</span>
+<span class="sd"> :param before_target: closest before or after the target</span>
+<span class="sd"> :type before_target: bool or None</span>
+<span class="sd"> :returns: The closest date</span>
+<span class="sd"> :rtype: datetime.date or None</span>
+<span class="sd"> '''</span>
+ <span class="n">fb</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">d</span><span class="p">:</span> <span class="n">d</span> <span class="o">-</span> <span class="n">target_dt</span> <span class="k">if</span> <span class="n">d</span> <span class="o">>=</span> <span class="n">target_dt</span> <span class="k">else</span> <span class="n">datetime</span><span class="o">.</span><span class="n">timedelta</span><span class="o">.</span><span class="n">max</span>
+ <span class="n">fa</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">d</span><span class="p">:</span> <span class="n">d</span> <span class="o">-</span> <span class="n">target_dt</span> <span class="k">if</span> <span class="n">d</span> <span class="o"><=</span> <span class="n">target_dt</span> <span class="k">else</span> <span class="n">datetime</span><span class="o">.</span><span class="n">timedelta</span><span class="o">.</span><span class="n">min</span>
+ <span class="n">fnone</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">d</span><span class="p">:</span> <span class="n">target_dt</span> <span class="o">-</span> <span class="n">d</span> <span class="k">if</span> <span class="n">d</span> <span class="o"><</span> <span class="n">target_dt</span> <span class="k">else</span> <span class="n">d</span> <span class="o">-</span> <span class="n">target_dt</span>
+ <span class="k">if</span> <span class="n">before_target</span> <span class="ow">is</span> <span class="bp">None</span><span class="p">:</span>
+ <span class="k">return</span> <span class="nb">min</span><span class="p">(</span><span class="n">date_list</span><span class="p">,</span> <span class="n">key</span><span class="o">=</span><span class="n">fnone</span><span class="p">)</span><span class="o">.</span><span class="n">date</span><span class="p">()</span>
+ <span class="k">if</span> <span class="n">before_target</span><span class="p">:</span>
+ <span class="k">return</span> <span class="nb">min</span><span class="p">(</span><span class="n">date_list</span><span class="p">,</span> <span class="n">key</span><span class="o">=</span><span class="n">fb</span><span class="p">)</span><span class="o">.</span><span class="n">date</span><span class="p">()</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">return</span> <span class="nb">min</span><span class="p">(</span><span class="n">date_list</span><span class="p">,</span> <span class="n">key</span><span class="o">=</span><span class="n">fa</span><span class="p">)</span><span class="o">.</span><span class="n">date</span><span class="p">()</span>
+
+
+<div class="viewcode-block" id="closest_ds_partition"><a class="viewcode-back" href="../../../code.html#airflow.macros.hive.closest_ds_partition">[docs]</a><span class="k">def</span> <span class="nf">closest_ds_partition</span><span class="p">(</span>
+ <span class="n">table</span><span class="p">,</span> <span class="n">ds</span><span class="p">,</span> <span class="n">before</span><span class="o">=</span><span class="bp">True</span><span class="p">,</span> <span class="n">schema</span><span class="o">=</span><span class="s2">"default"</span><span class="p">,</span>
+ <span class="n">metastore_conn_id</span><span class="o">=</span><span class="s1">'metastore_default'</span><span class="p">):</span>
+ <span class="sd">'''</span>
+<span class="sd"> This function finds the date in a list closest to the target date.</span>
+<span class="sd"> An optional parameter can be given to get the closest before or after.</span>
+
+<span class="sd"> :param table: A hive table name</span>
+<span class="sd"> :type table: str</span>
+<span class="sd"> :param ds: A datestamp ``%Y-%m-%d`` e.g. ``yyyy-mm-dd``</span>
+<span class="sd"> :type ds: datetime.date list</span>
+<span class="sd"> :param before: closest before (True), after (False) or either side of ds</span>
+<span class="sd"> :type before: bool or None</span>
+<span class="sd"> :returns: The closest date</span>
+<span class="sd"> :rtype: str or None</span>
+
+<span class="sd"> >>> tbl = 'airflow.static_babynames_partitioned'</span>
+<span class="sd"> >>> closest_ds_partition(tbl, '2015-01-02')</span>
+<span class="sd"> '2015-01-01'</span>
+<span class="sd"> '''</span>
+ <span class="kn">from</span> <span class="nn">airflow.hooks</span> <span class="kn">import</span> <span class="n">HiveMetastoreHook</span>
+ <span class="k">if</span> <span class="s1">'.'</span> <span class="ow">in</span> <span class="n">table</span><span class="p">:</span>
+ <span class="n">schema</span><span class="p">,</span> <span class="n">table</span> <span class="o">=</span> <span class="n">table</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'.'</span><span class="p">)</span>
+ <span class="n">hh</span> <span class="o">=</span> <span class="n">HiveMetastoreHook</span><span class="p">(</span><span class="n">metastore_conn_id</span><span class="o">=</span><span class="n">metastore_conn_id</span><span class="p">)</span>
+ <span class="n">partitions</span> <span class="o">=</span> <span class="n">hh</span><span class="o">.</span><span class="n">get_partitions</span><span class="p">(</span><span class="n">schema</span><span class="o">=</span><span class="n">schema</span><span class="p">,</span> <span class="n">table_name</span><span class="o">=</span><span class="n">table</span><span class="p">)</span>
+ <span class="k">if</span> <span class="ow">not</span> <span class="n">partitions</span><span class="p">:</span>
+ <span class="k">return</span> <span class="bp">None</span>
+ <span class="n">part_vals</span> <span class="o">=</span> <span class="p">[</span><span class="nb">list</span><span class="p">(</span><span class="n">p</span><span class="o">.</span><span class="n">values</span><span class="p">())[</span><span class="mi">0</span><span class="p">]</span> <span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">partitions</span><span class="p">]</span>
+ <span class="k">if</span> <span class="n">ds</span> <span class="ow">in</span> <span class="n">part_vals</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">ds</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="n">parts</span> <span class="o">=</span> <span class="p">[</span><span class="n">datetime</span><span class="o">.</span><span class="n">datetime</span><span class="o">.</span><span class="n">strptime</span><span class="p">(</span><span class="n">pv</span><span class="p">,</span> <span class="s1">'%Y-%m-</span><span class="si">%d</span><span class="s1">'</span><span class="p">)</span>
+ <span class="k">for</span> <span class="n">pv</span> <span class="ow">in</span> <span class="n">part_vals</span><span class="p">]</span>
+ <span class="n">target_dt</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">datetime</span><span class="o">.</span><span class="n">strptime</span><span class="p">(</span><span class="n">ds</span><span class="p">,</span> <span class="s1">'%Y-%m-</span><span class="si">%d</span><span class="s1">'</span><span class="p">)</span>
+ <span class="n">closest_ds</span> <span class="o">=</span> <span class="n">_closest_date</span><span class="p">(</span><span class="n">target_dt</span><span class="p">,</span> <span class="n">parts</span><span class="p">,</span> <span class="n">before_target</span><span class="o">=</span><span class="n">before</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">closest_ds</span><span class="o">.</span><span class="n">isoformat</span><span class="p">()</span></div>
+</pre></div>
+
+ </div>
+ </div>
+ <footer>
+
+
+ <hr/>
+
+ <div role="contentinfo">
+ <p>
+ © Copyright 2014, Maxime Beauchemin, Airbnb.
+
+ </p>
+ </div>
+ Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
+
+</footer>
+
+ </div>
+ </div>
+
+ </section>
+
+ </div>
+
+
+
+
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT:'../../../',
+ VERSION:'',
+ COLLAPSE_INDEX:false,
+ FILE_SUFFIX:'.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../../_static/doctools.js"></script>
+
+
+
+
+
+ <script type="text/javascript" src="../../../_static/js/theme.js"></script>
+
+
+
+
+ <script type="text/javascript">
+ jQuery(function () {
+ SphinxRtdTheme.StickyNav.enable();
+ });
+ </script>
+
+
+</body>
+</html>
\ No newline at end of file