You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@flink.apache.org by tw...@apache.org on 2017/03/29 16:47:39 UTC

[2/3] flink-web git commit: Rebuild website and update date of release 1.1.5

http://git-wip-us.apache.org/repos/asf/flink-web/blob/661f7648/content/blog/page5/index.html
----------------------------------------------------------------------
diff --git a/content/blog/page5/index.html b/content/blog/page5/index.html
index 3d3cc18..e212840 100644
--- a/content/blog/page5/index.html
+++ b/content/blog/page5/index.html
@@ -142,6 +142,19 @@
     <!-- Blog posts -->
     
     <article>
+      <h2 class="blog-title"><a href="/news/2014/10/03/upcoming_events.html">Upcoming Events</a></h2>
+      <p>03 Oct 2014</p>
+
+      <p><p>We are happy to announce several upcoming Flink events both in Europe and the US. Starting with a <strong>Flink hackathon in Stockholm</strong> (Oct 8-9) and a talk about Flink at the <strong>Stockholm Hadoop User Group</strong> (Oct 8). This is followed by the very first <strong>Flink Meetup in Berlin</strong> (Oct 15). In the US, there will be two Flink Meetup talks: the first one at the <strong>Pasadena Big Data User Group</strong> (Oct 29) and the second one at <strong>Silicon Valley Hands On Programming Events</strong> (Nov 4).</p>
+
+</p>
+
+      <p><a href="/news/2014/10/03/upcoming_events.html">Continue reading &raquo;</a></p>
+    </article>
+
+    <hr>
+    
+    <article>
       <h2 class="blog-title"><a href="/news/2014/09/26/release-0.6.1.html">Apache Flink 0.6.1 available</a></h2>
       <p>26 Sep 2014</p>
 
@@ -202,6 +215,16 @@ academic and open source project that Flink originates from.</p>
 
     <ul id="markdown-toc">
       
+      <li><a href="/news/2017/03/29/table-sql-api-update.html">From Streams to Tables and Back Again: An Update on Flink's Table & SQL API</a></li>
+      
+      
+        
+      
+    
+      
+      
+
+      
       <li><a href="/news/2017/03/23/release-1.1.5.html">Apache Flink 1.1.5 Released</a></li>
       
       

http://git-wip-us.apache.org/repos/asf/flink-web/blob/661f7648/content/index.html
----------------------------------------------------------------------
diff --git a/content/index.html b/content/index.html
index ccb17f7..f0ec841 100644
--- a/content/index.html
+++ b/content/index.html
@@ -168,6 +168,9 @@
 
   <dl>
       
+        <dt> <a href="/news/2017/03/29/table-sql-api-update.html">From Streams to Tables and Back Again: An Update on Flink's Table &amp; SQL API</a></dt>
+        <dd><p>Broadening the user base and unifying batch &amp; streaming with relational APIs</p></dd>
+      
         <dt> <a href="/news/2017/03/23/release-1.1.5.html">Apache Flink 1.1.5 Released</a></dt>
         <dd><p>The Apache Flink community released the next bugfix version of the Apache Flink 1.1 series.</p>
 
@@ -183,11 +186,6 @@
       
         <dt> <a href="/news/2016/12/19/2016-year-in-review.html">Apache Flink in 2016: Year in Review</a></dt>
         <dd><p>As 2016 comes to a close, let's take a moment to look back on the Flink community's great work during the past year.</p></dd>
-      
-        <dt> <a href="/news/2016/10/12/release-1.1.3.html">Apache Flink 1.1.3 Released</a></dt>
-        <dd><p>The Apache Flink community released the next bugfix version of the Apache Flink 1.1. series.</p>
-
-</dd>
     
   </dl>
 

http://git-wip-us.apache.org/repos/asf/flink-web/blob/661f7648/content/news/2017/03/29/table-sql-api-update.html
----------------------------------------------------------------------
diff --git a/content/news/2017/03/29/table-sql-api-update.html b/content/news/2017/03/29/table-sql-api-update.html
new file mode 100644
index 0000000..0c95fb0
--- /dev/null
+++ b/content/news/2017/03/29/table-sql-api-update.html
@@ -0,0 +1,364 @@
+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <meta http-equiv="X-UA-Compatible" content="IE=edge">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags -->
+    <title>Apache Flink: From Streams to Tables and Back Again: An Update on Flink's Table & SQL API</title>
+    <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon">
+    <link rel="icon" href="/favicon.ico" type="image/x-icon">
+
+    <!-- Bootstrap -->
+    <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap.min.css">
+    <link rel="stylesheet" href="/css/flink.css">
+    <link rel="stylesheet" href="/css/syntax.css">
+
+    <!-- Blog RSS feed -->
+    <link href="/blog/feed.xml" rel="alternate" type="application/rss+xml" title="Apache Flink Blog: RSS feed" />
+
+    <!-- jQuery (necessary for Bootstrap's JavaScript plugins) -->
+    <!-- We need to load Jquery in the header for custom google analytics event tracking-->
+    <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.2/jquery.min.js"></script>
+
+    <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
+    <!-- WARNING: Respond.js doesn't work if you view the page via file:// -->
+    <!--[if lt IE 9]>
+      <script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
+      <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
+    <![endif]-->
+  </head>
+  <body>  
+    
+
+    <!-- Main content. -->
+    <div class="container">
+    <div class="row">
+
+      
+     <div id="sidebar" class="col-sm-3">
+          <!-- Top navbar. -->
+    <nav class="navbar navbar-default">
+        <!-- The logo. -->
+        <div class="navbar-header">
+          <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1">
+            <span class="icon-bar"></span>
+            <span class="icon-bar"></span>
+            <span class="icon-bar"></span>
+          </button>
+          <div class="navbar-logo">
+            <a href="/">
+              <img alt="Apache Flink" src="/img/flink-header-logo.svg" width="147px" height="73px">
+            </a>
+          </div>
+        </div><!-- /.navbar-header -->
+
+        <!-- The navigation links. -->
+        <div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1">
+          <ul class="nav navbar-nav navbar-main">
+
+            <!-- Downloads -->
+            <li class=""><a class="btn btn-info" href="/downloads.html">Download Flink</a></li>
+
+            <!-- Overview -->
+            <li><a href="/index.html">Home</a></li>
+
+            <!-- Intro -->
+            <li><a href="/introduction.html">Introduction to Flink</a></li>
+
+            <!-- Use cases -->
+            <li><a href="/usecases.html">Flink Use Cases</a></li>
+
+            <!-- Powered by -->
+            <li><a href="/poweredby.html">Powered by Flink</a></li>
+
+            <!-- Ecosystem -->
+            <li><a href="/ecosystem.html">Ecosystem</a></li>
+
+            <!-- Community -->
+            <li><a href="/community.html">Community &amp; Project Info</a></li>
+
+            <!-- Contribute -->
+            <li><a href="/how-to-contribute.html">How to Contribute</a></li>
+
+            <!-- Blog -->
+            <li class=" active hidden-md hidden-sm"><a href="/blog/"><b>Flink Blog</b></a></li>
+
+            <hr />
+
+
+
+            <!-- Documentation -->
+            <!-- <li>
+              <a href="http://ci.apache.org/projects/flink/flink-docs-release-1.2" target="_blank">Documentation <small><span class="glyphicon glyphicon-new-window"></span></small></a>
+            </li> -->
+            <li class="dropdown">
+              <a class="dropdown-toggle" data-toggle="dropdown" href="#">Documentation
+                <span class="caret"></span></a>
+                <ul class="dropdown-menu">
+                  <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.2" target="_blank">1.2 (Latest stable release) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
+                  <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.3" target="_blank">1.3 (Snapshot) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
+                </ul>
+              </li>
+
+            <!-- Quickstart -->
+            <li>
+              <a href="http://ci.apache.org/projects/flink/flink-docs-release-1.2/quickstart/setup_quickstart.html" target="_blank">Quickstart <small><span class="glyphicon glyphicon-new-window"></span></small></a>
+            </li>
+
+            <!-- GitHub -->
+            <li>
+              <a href="https://github.com/apache/flink" target="_blank">Flink on GitHub <small><span class="glyphicon glyphicon-new-window"></span></small></a>
+            </li>
+
+          </ul>
+
+
+
+          <ul class="nav navbar-nav navbar-bottom">
+          <hr />
+
+            <!-- FAQ -->
+            <li ><a href="/faq.html">Project FAQ</a></li>
+
+            <!-- Twitter -->
+            <li><a href="https://twitter.com/apacheflink" target="_blank">@ApacheFlink <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
+
+            <!-- Visualizer -->
+            <li class=" hidden-md hidden-sm"><a href="/visualizer/" target="_blank">Plan Visualizer <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
+
+          </ul>
+        </div><!-- /.navbar-collapse -->
+    </nav>
+
+      </div>
+      <div class="col-sm-9">
+      <div class="row-fluid">
+  <div class="col-sm-12">
+    <div class="row">
+      <h1>From Streams to Tables and Back Again: An Update on Flink's Table & SQL API</h1>
+
+      <article>
+        <p>29 Mar 2017 by Timo Walther (<a href="https://twitter.com/twalthr">@twalthr</a>)</p>
+
+<p>Stream processing can deliver a lot of value. Many organizations have recognized the benefit of managing large volumes of data in real-time, reacting quickly to trends, and providing customers with live services at scale. Streaming applications with well-defined business logic can deliver a competitive advantage.</p>
+
+<p>Flink\u2019s <a href="https://ci.apache.org/projects/flink/flink-docs-release-1.2/dev/datastream_api.html">DataStream</a> abstraction is a powerful API which lets you flexibly define both basic and complex streaming pipelines. Additionally, it offers low-level operations such as <a href="https://ci.apache.org/projects/flink/flink-docs-release-1.2/dev/stream/asyncio.html">Async IO</a> and <a href="https://ci.apache.org/projects/flink/flink-docs-release-1.2/dev/stream/process_function.html">ProcessFunctions</a>. However, many users do not need such a deep level of flexibility. They need an API which quickly solves 80% of their use cases where simple tasks can be defined using little code.</p>
+
+<p>To deliver the power of stream processing to a broader set of users, the Apache Flink community is developing APIs that provide simpler abstractions and more concise syntax so that users can focus on their business logic instead of advanced streaming concepts. Along with other APIs (such as <a href="https://ci.apache.org/projects/flink/flink-docs-release-1.2/dev/libs/cep.html">CEP</a> for complex event processing on streams), Flink offers a relational API that aims to unify stream and batch processing: the <a href="https://ci.apache.org/projects/flink/flink-docs-release-1.2/dev/table_api.html">Table &amp; SQL API</a>, often referred to as the Table API.</p>
+
+<p>Recently, contributors working for companies such as Alibaba, Huawei, data Artisans, and more decided to further develop the Table API. Over the past year, the Table API has been rewritten entirely. Since Flink 1.1, its core has been based on <a href="http://calcite.apache.org/">Apache Calcite</a>, which parses SQL and optimizes all relational queries. Today, the Table API can address a wide range of use cases in both batch and stream environments with unified semantics.</p>
+
+<p>This blog post summarizes the current status of Flink\u2019s Table API and showcases some of the recently-added features in Apache Flink. Among the features presented here are the unified access to batch and streaming data, data transformation, and window operators.
+The following paragraphs are not only supposed to give you a general overview of the Table API, but also to illustrate the potential of relational APIs in the future.</p>
+
+<p>Because the Table API is built on top of Flink\u2019s core APIs, <a href="https://ci.apache.org/projects/flink/flink-docs-release-1.2/dev/datastream_api.html">DataStreams</a> and <a href="https://ci.apache.org/projects/flink/flink-docs-release-1.2/dev/batch/index.html">DataSets</a> can be converted to a Table and vice-versa without much overhead. Hereafter, we show how to create tables from different sources and specify programs that can be executed locally or in a distributed setting. In this post, we will use the Scala version of the Table API, but there is also a Java version as well as a SQL API with an equivalent set of features.</p>
+
+<h2 id="data-transformation-and-etl">Data Transformation and ETL</h2>
+
+<p>A common task in every data processing pipeline is importing data from one or multiple systems, applying some transformations to it, then exporting the data to another system. The Table API can help to manage these recurring tasks. For reading data, the API provides a set of ready-to-use <code>TableSources</code> such as a <code>CsvTableSource</code> and <code>KafkaTableSource</code>, however, it also allows the implementation of custom <code>TableSources</code> that can hide configuration specifics (e.g. watermark generation) from users who are less familiar with streaming concepts.</p>
+
+<p>Let\u2019s assume we have a CSV file that stores customer information. The values are delimited by a \u201c|\u201d-character and contain a customer identifier, name, timestamp of the last update, and preferences encoded in a comma-separated key-value string:</p>
+
+<div class="highlight"><pre><code>42|Bob Smith|2016-07-23 16:10:11|color=12,length=200,size=200
+</code></pre></div>
+
+<p>The following example illustrates how to read a CSV file and perform some data cleansing before converting it to a regular DataStream program.</p>
+
+<div class="highlight"><pre><code class="language-scala"><span class="c1">// set up execution environment</span>
+<span class="k">val</span> <span class="n">env</span> <span class="k">=</span> <span class="nc">StreamExecutionEnvironment</span><span class="o">.</span><span class="n">getExecutionEnvironment</span>
+<span class="k">val</span> <span class="n">tEnv</span> <span class="k">=</span> <span class="nc">TableEnvironment</span><span class="o">.</span><span class="n">getTableEnvironment</span><span class="o">(</span><span class="n">env</span><span class="o">)</span>
+
+<span class="c1">// configure table source</span>
+<span class="k">val</span> <span class="n">customerSource</span> <span class="k">=</span> <span class="nc">CsvTableSource</span><span class="o">.</span><span class="n">builder</span><span class="o">()</span>
+  <span class="o">.</span><span class="n">path</span><span class="o">(</span><span class="s">&quot;/path/to/customer_data.csv&quot;</span><span class="o">)</span>
+  <span class="o">.</span><span class="n">ignoreFirstLine</span><span class="o">()</span>
+  <span class="o">.</span><span class="n">fieldDelimiter</span><span class="o">(</span><span class="s">&quot;|&quot;</span><span class="o">)</span>
+  <span class="o">.</span><span class="n">field</span><span class="o">(</span><span class="s">&quot;id&quot;</span><span class="o">,</span> <span class="nc">Types</span><span class="o">.</span><span class="nc">LONG</span><span class="o">)</span>
+  <span class="o">.</span><span class="n">field</span><span class="o">(</span><span class="s">&quot;name&quot;</span><span class="o">,</span> <span class="nc">Types</span><span class="o">.</span><span class="nc">STRING</span><span class="o">)</span>
+  <span class="o">.</span><span class="n">field</span><span class="o">(</span><span class="s">&quot;last_update&quot;</span><span class="o">,</span> <span class="nc">Types</span><span class="o">.</span><span class="nc">TIMESTAMP</span><span class="o">)</span>
+  <span class="o">.</span><span class="n">field</span><span class="o">(</span><span class="s">&quot;prefs&quot;</span><span class="o">,</span> <span class="nc">Types</span><span class="o">.</span><span class="nc">STRING</span><span class="o">)</span>
+  <span class="o">.</span><span class="n">build</span><span class="o">()</span>
+
+<span class="c1">// name your table source</span>
+<span class="n">tEnv</span><span class="o">.</span><span class="n">registerTableSource</span><span class="o">(</span><span class="s">&quot;customers&quot;</span><span class="o">,</span> <span class="n">customerSource</span><span class="o">)</span>
+
+<span class="c1">// define your table program</span>
+<span class="k">val</span> <span class="n">table</span> <span class="k">=</span> <span class="n">tEnv</span>
+  <span class="o">.</span><span class="n">scan</span><span class="o">(</span><span class="s">&quot;customers&quot;</span><span class="o">)</span>
+  <span class="o">.</span><span class="n">filter</span><span class="o">(</span><span class="-Symbol">&#39;name</span><span class="o">.</span><span class="n">isNotNull</span> <span class="o">&amp;&amp;</span> <span class="-Symbol">&#39;last_update</span> <span class="o">&gt;</span> <span class="s">&quot;2016-01-01 00:00:00&quot;</span><span class="o">.</span><span class="n">toTimestamp</span><span class="o">)</span>
+  <span class="o">.</span><span class="n">select</span><span class="o">(</span><span class="-Symbol">&#39;id</span><span class="o">,</span> <span class="-Symbol">&#39;name</span><span class="o">.</span><span class="n">lowerCase</span><span class="o">(),</span> <span class="-Symbol">&#39;prefs</span><span class="o">)</span>
+
+<span class="c1">// convert it to a data stream</span>
+<span class="k">val</span> <span class="n">ds</span> <span class="k">=</span> <span class="n">table</span><span class="o">.</span><span class="n">toDataStream</span><span class="o">[</span><span class="kt">Row</span><span class="o">]</span>
+
+<span class="n">ds</span><span class="o">.</span><span class="n">print</span><span class="o">()</span>
+<span class="n">env</span><span class="o">.</span><span class="n">execute</span><span class="o">()</span></code></pre></div>
+
+<p>The Table API comes with a large set of built-in functions that make it easy to specify  business logic using a language integrated query (LINQ) syntax. In the example above, we filter out customers with invalid names and only select those that updated their preferences recently. We convert names to lowercase for normalization. For debugging purposes, we convert the table into a DataStream and print it.</p>
+
+<p>The <code>CsvTableSource</code> supports both batch and stream environments. If the programmer wants to execute the program above in a batch application, all he or she has to do is to replace the environment via <code>ExecutionEnvironment</code> and change the output conversion from <code>DataStream</code> to <code>DataSet</code>. The Table API program itself doesn\u2019t change.</p>
+
+<p>In the example, we converted the table program to a data stream of <code>Row</code> objects. However, we are not limited to row data types. The Table API supports all types from the underlying APIs such as Java and Scala Tuples, Case Classes, POJOs, or generic types that are serialized using Kryo. Let\u2019s assume that we want to have regular object (POJO) with the following format instead of generic rows:</p>
+
+<div class="highlight"><pre><code class="language-scala"><span class="k">class</span> <span class="nc">Customer</span> <span class="o">{</span>
+  <span class="k">var</span> <span class="n">id</span><span class="k">:</span> <span class="kt">Int</span> <span class="o">=</span> <span class="k">_</span>
+  <span class="k">var</span> <span class="n">name</span><span class="k">:</span> <span class="kt">String</span> <span class="o">=</span> <span class="k">_</span>
+  <span class="k">var</span> <span class="n">update</span><span class="k">:</span> <span class="kt">Long</span> <span class="o">=</span> <span class="k">_</span>
+  <span class="k">var</span> <span class="n">prefs</span><span class="k">:</span> <span class="kt">java.util.Properties</span> <span class="o">=</span> <span class="k">_</span>
+<span class="o">}</span></code></pre></div>
+<p>We can use the following table program to convert the CSV file into Customer objects. Flink takes care of creating objects and mapping fields for us.</p>
+
+<div class="highlight"><pre><code class="language-scala"><span class="k">val</span> <span class="n">ds</span> <span class="k">=</span> <span class="n">tEnv</span>
+  <span class="o">.</span><span class="n">scan</span><span class="o">(</span><span class="s">&quot;customers&quot;</span><span class="o">)</span>
+  <span class="o">.</span><span class="n">select</span><span class="o">(</span><span class="-Symbol">&#39;id</span><span class="o">,</span> <span class="-Symbol">&#39;name</span><span class="o">,</span> <span class="-Symbol">&#39;last_update</span> <span class="n">as</span> <span class="-Symbol">&#39;update</span><span class="o">,</span> <span class="n">parseProperties</span><span class="o">(</span><span class="-Symbol">&#39;prefs</span><span class="o">)</span> <span class="n">as</span> <span class="-Symbol">&#39;prefs</span><span class="o">)</span>
+  <span class="o">.</span><span class="n">toDataStream</span><span class="o">[</span><span class="kt">Customer</span><span class="o">]</span></code></pre></div>
+
+<p>You might have noticed that the query above uses a function to parse the preferences field. Even though Flink\u2019s Table API is shipped with a large set of built-in functions, is often necessary to define custom user-defined scalar functions. In the above example we use a user-defined function <code>parseProperties</code>. The following code snippet shows how easily we can implement a scalar function.</p>
+
+<div class="highlight"><pre><code class="language-scala"><span class="k">object</span> <span class="nc">parseProperties</span> <span class="k">extends</span> <span class="nc">ScalarFunction</span> <span class="o">{</span>
+  <span class="k">def</span> <span class="n">eval</span><span class="o">(</span><span class="n">str</span><span class="k">:</span> <span class="kt">String</span><span class="o">)</span><span class="k">:</span> <span class="kt">Properties</span> <span class="o">=</span> <span class="o">{</span>
+    <span class="k">val</span> <span class="n">props</span> <span class="k">=</span> <span class="k">new</span> <span class="nc">Properties</span><span class="o">()</span>
+    <span class="n">str</span>
+      <span class="o">.</span><span class="n">split</span><span class="o">(</span><span class="s">&quot;,&quot;</span><span class="o">)</span>
+      <span class="o">.</span><span class="n">map</span><span class="o">(\</span><span class="k">_</span><span class="o">.</span><span class="n">split</span><span class="o">(</span><span class="s">&quot;=&quot;</span><span class="o">))</span>
+      <span class="o">.</span><span class="n">foreach</span><span class="o">(</span><span class="n">split</span> <span class="k">=&gt;</span> <span class="n">props</span><span class="o">.</span><span class="n">setProperty</span><span class="o">(</span><span class="n">split</span><span class="o">(</span><span class="mi">0</span><span class="o">),</span> <span class="n">split</span><span class="o">(</span><span class="mi">1</span><span class="o">)))</span>
+    <span class="n">props</span>
+  <span class="o">}</span>
+<span class="o">}</span></code></pre></div>
+
+<p>Scalar functions can be used to deserialize, extract, or convert values (and more). By overwriting the <code>open()</code> method we can even have access to runtime information such as distributed cached files or metrics. Even the <code>open()</code> method is only called once during the runtime\u2019s <a href="https://ci.apache.org/projects/flink/flink-docs-release-1.3/internals/task_lifecycle.html">task lifecycle</a>.</p>
+
+<h2 id="unified-windowing-for-static-and-streaming-data">Unified Windowing for Static and Streaming Data</h2>
+
+<p>Another very common task, especially when working with continuous data, is the definition of windows to split a stream into pieces of finite size, over which we can apply computations. At the moment, the Table API supports three types of windows: sliding windows, tumbling windows, and session windows (for general definitions of the different types of windows, we recommend <a href="https://ci.apache.org/projects/flink/flink-docs-release-1.2/dev/windows.html">Flink\u2019s documentation</a>). All three window types work on <a href="https://ci.apache.org/projects/flink/flink-docs-release-1.2/dev/event_time.html">event or processing time</a>. Session windows can be defined over time intervals, sliding and tumbling windows can be defined over time intervals or a number of rows.</p>
+
+<p>Let\u2019s assume that our customer data from the example above is an event stream of updates generated whenever the customer updated his or her preferences. We assume that events come from a TableSource that has assigned timestamps and watermarks. The definition of a window happens again in a LINQ-style fashion. The following example could be used to count the updates to the preferences during one day.</p>
+
+<div class="highlight"><pre><code class="language-scala"><span class="n">table</span>
+  <span class="o">.</span><span class="n">window</span><span class="o">(</span><span class="nc">Tumble</span> <span class="n">over</span> <span class="mf">1.d</span><span class="n">ay</span> <span class="n">on</span> <span class="-Symbol">&#39;rowtime</span> <span class="n">as</span> <span class="-Symbol">&#39;w</span><span class="o">)</span>
+  <span class="o">.</span><span class="n">groupBy</span><span class="o">(</span><span class="-Symbol">&#39;id</span><span class="o">,</span> <span class="-Symbol">&#39;w</span><span class="o">)</span>
+  <span class="o">.</span><span class="n">select</span><span class="o">(</span><span class="-Symbol">&#39;id</span><span class="o">,</span> <span class="-Symbol">&#39;w</span><span class="o">.</span><span class="n">start</span> <span class="n">as</span> <span class="-Symbol">&#39;from</span><span class="o">,</span> <span class="-Symbol">&#39;w</span><span class="o">.</span><span class="n">end</span> <span class="n">as</span> <span class="-Symbol">&#39;to</span><span class="o">,</span> <span class="-Symbol">&#39;prefs</span><span class="o">.</span><span class="n">count</span> <span class="n">as</span> <span class="-Symbol">&#39;updates</span><span class="o">)</span></code></pre></div>
+
+<p>By using the <code>on()</code> parameter, we can specify whether the window is supposed to work on event-time or not. The Table API assumes that timestamps and watermarks are assigned correctly when using event-time. Elements with timestamps smaller than the last received watermark are dropped. Since the extraction of timestamps and generation of watermarks depends on the data source and requires some deeper knowledge of their origin, the TableSource or the upstream DataStream is usually responsible for assigning these properties.</p>
+
+<p>The following code shows how to define other types of windows:</p>
+
+<div class="highlight"><pre><code class="language-scala"><span class="c1">// using processing-time</span>
+<span class="n">table</span><span class="o">.</span><span class="n">window</span><span class="o">(</span><span class="nc">Tumble</span> <span class="n">over</span> <span class="mf">100.</span><span class="n">rows</span> <span class="n">as</span> <span class="-Symbol">&#39;manyRowWindow</span><span class="o">)</span>
+<span class="c1">// using event-time</span>
+<span class="n">table</span><span class="o">.</span><span class="n">window</span><span class="o">(</span><span class="nc">Session</span> <span class="n">withGap</span> <span class="mf">15.</span><span class="n">minutes</span> <span class="n">on</span> <span class="-Symbol">&#39;rowtime</span> <span class="n">as</span> <span class="-Symbol">&#39;sessionWindow</span><span class="o">)</span>
+<span class="n">table</span><span class="o">.</span><span class="n">window</span><span class="o">(</span><span class="nc">Slide</span> <span class="n">over</span> <span class="mf">1.d</span><span class="n">ay</span> <span class="n">every</span> <span class="mf">1.</span><span class="n">hour</span> <span class="n">on</span> <span class="-Symbol">&#39;rowtime</span> <span class="n">as</span> <span class="-Symbol">&#39;dailyWindow</span><span class="o">)</span></code></pre></div>
+
+<p>Since batch is just a special case of streaming (where a batch happens to have a defined start and end point), it is also possible to apply all of these windows in a batch execution environment. Without any modification of the table program itself, we can run the code on a DataSet given that we specified a column named \u201crowtime\u201d. This is particularly interesting if we want to compute exact results from time-to-time, so that late events that are heavily out-of-order can be included in the computation.</p>
+
+<p>At the moment, the Table API only supports so-called \u201cgroup windows\u201d that also exist in the DataStream API. Other windows such as SQL\u2019s OVER clause windows are in development and <a href="https://cwiki.apache.org/confluence/display/FLINK/FLIP-11%3A+Table+API+Stream+Aggregations">planned for Flink 1.3</a>.</p>
+
+<p>In order to demonstrate the expressiveness and capabilities of the API, here\u2019s a snippet with a more advanced example of an exponentially decaying moving average over a sliding window of one hour which returns aggregated results every second. The table program weighs recent orders more heavily than older orders. This example is borrowed from <a href="https://calcite.apache.org/docs/stream.html#hopping-windows">Apache Calcite</a> and shows what will be possible in future Flink releases for both the Table API and SQL.</p>
+
+<div class="highlight"><pre><code class="language-scala"><span class="n">table</span>
+  <span class="o">.</span><span class="n">window</span><span class="o">(</span><span class="nc">Slide</span> <span class="n">over</span> <span class="mf">1.</span><span class="n">hour</span> <span class="n">every</span> <span class="mf">1.</span><span class="n">second</span> <span class="n">as</span> <span class="-Symbol">&#39;w</span><span class="o">)</span>
+  <span class="o">.</span><span class="n">groupBy</span><span class="o">(</span><span class="-Symbol">&#39;productId</span><span class="o">,</span> <span class="-Symbol">&#39;w</span><span class="o">)</span>
+  <span class="o">.</span><span class="n">select</span><span class="o">(</span>
+    <span class="-Symbol">&#39;w</span><span class="o">.</span><span class="n">end</span><span class="o">,</span>
+    <span class="-Symbol">&#39;productId</span><span class="o">,</span>
+    <span class="o">(</span><span class="-Symbol">&#39;unitPrice</span> <span class="o">*</span> <span class="o">(</span><span class="-Symbol">&#39;rowtime</span> <span class="o">-</span> <span class="-Symbol">&#39;w</span><span class="o">.</span><span class="n">start</span><span class="o">).</span><span class="n">exp</span><span class="o">()</span> <span class="o">/</span> <span class="mf">1.</span><span class="n">hour</span><span class="o">).</span><span class="n">sum</span> <span class="o">/</span> <span class="o">((</span><span class="-Symbol">&#39;rowtime</span> <span class="o">-</span> <span class="-Symbol">&#39;w</span><span class="o">.</span><span class="n">start</span><span class="o">).</span><span class="n">exp</span><span class="o">()</span> <span class="o">/</span> <span class="mf">1.</span><span class="n">hour</span><span class="o">).</span><span class="n">sum</span><span class="o">)</span></code></pre></div>
+
+<h2 id="user-defined-table-functions">User-defined Table Functions</h2>
+
+<p><a href="https://ci.apache.org/projects/flink/flink-docs-release-1.2/dev/table_api.html#user-defined-table-functions">User-defined table functions</a> were added in Flink 1.2. These can be quite useful for table columns containing non-atomic values which need to be extracted and mapped to separate fields before processing. Table functions take an arbitrary number of scalar values and allow for returning an arbitrary number of rows as output instead of a single value, similar to a flatMap function in the DataStream or DataSet API. The output of a table function can then be joined with the original row in the table by using either a left-outer join or cross join.</p>
+
+<p>Using the previously-mentioned customer table, let\u2019s assume we want to produce a table that contains the color and size preferences as separate columns. The table program would look like this:</p>
+
+<div class="highlight"><pre><code class="language-scala"><span class="c1">// create an instance of the table function</span>
+<span class="k">val</span> <span class="n">extractPrefs</span> <span class="k">=</span> <span class="k">new</span> <span class="nc">PropertiesExtractor</span><span class="o">()</span>
+
+<span class="c1">// derive rows and join them with original row</span>
+<span class="n">table</span>
+  <span class="o">.</span><span class="n">join</span><span class="o">(</span><span class="n">extractPrefs</span><span class="o">(</span><span class="-Symbol">&#39;prefs</span><span class="o">)</span> <span class="n">as</span> <span class="o">(</span><span class="-Symbol">&#39;color</span><span class="o">,</span> <span class="-Symbol">&#39;size</span><span class="o">))</span>
+  <span class="o">.</span><span class="n">select</span><span class="o">(</span><span class="-Symbol">&#39;id</span><span class="o">,</span> <span class="-Symbol">&#39;username</span><span class="o">,</span> <span class="-Symbol">&#39;color</span><span class="o">,</span> <span class="-Symbol">&#39;size</span><span class="o">)</span></code></pre></div>
+
+<p>The <code>PropertiesExtractor</code> is a user-defined table function that extracts the color and size. We are not interested in customers that haven\u2019t set these preferences and thus don\u2019t emit anything if both properties are not present in the string value. Since we are using a (cross) join in the program, customers without a result on the right side of the join will be filtered out.</p>
+
+<div class="highlight"><pre><code class="language-scala"><span class="k">class</span> <span class="nc">PropertiesExtractor</span> <span class="k">extends</span> <span class="nc">TableFunction</span><span class="o">[</span><span class="kt">Row</span><span class="o">]</span> <span class="o">{</span>
+  <span class="k">def</span> <span class="n">eval</span><span class="o">(</span><span class="n">prefs</span><span class="k">:</span> <span class="kt">String</span><span class="o">)</span><span class="k">:</span> <span class="kt">Unit</span> <span class="o">=</span> <span class="o">{</span>
+    <span class="c1">// split string into (key, value) pairs</span>
+    <span class="k">val</span> <span class="n">pairs</span> <span class="k">=</span> <span class="n">prefs</span>
+      <span class="o">.</span><span class="n">split</span><span class="o">(</span><span class="s">&quot;,&quot;</span><span class="o">)</span>
+      <span class="o">.</span><span class="n">map</span> <span class="o">{</span> <span class="n">kv</span> <span class="k">=&gt;</span>
+        <span class="k">val</span> <span class="n">split</span> <span class="k">=</span> <span class="n">kv</span><span class="o">.</span><span class="n">split</span><span class="o">(</span><span class="s">&quot;=&quot;</span><span class="o">)</span>
+        <span class="o">(</span><span class="n">split</span><span class="o">(</span><span class="mi">0</span><span class="o">),</span> <span class="n">split</span><span class="o">(</span><span class="mi">1</span><span class="o">))</span>
+      <span class="o">}</span>
+
+    <span class="k">val</span> <span class="n">color</span> <span class="k">=</span> <span class="n">pairs</span><span class="o">.</span><span class="n">find</span><span class="o">(\</span><span class="k">_</span><span class="o">.\</span><span class="n">_1</span> <span class="o">==</span> <span class="s">&quot;color&quot;</span><span class="o">).</span><span class="n">map</span><span class="o">(\</span><span class="k">_</span><span class="o">.\</span><span class="n">_2</span><span class="o">)</span>
+    <span class="k">val</span> <span class="n">size</span> <span class="k">=</span> <span class="n">pairs</span><span class="o">.</span><span class="n">find</span><span class="o">(\</span><span class="k">_</span><span class="o">.\</span><span class="n">_1</span> <span class="o">==</span> <span class="s">&quot;size&quot;</span><span class="o">).</span><span class="n">map</span><span class="o">(\</span><span class="k">_</span><span class="o">.\</span><span class="n">_2</span><span class="o">)</span>
+
+    <span class="c1">// emit a row if color and size are specified</span>
+    <span class="o">(</span><span class="n">color</span><span class="o">,</span> <span class="n">size</span><span class="o">)</span> <span class="k">match</span> <span class="o">{</span>
+      <span class="k">case</span> <span class="o">(</span><span class="nc">Some</span><span class="o">(</span><span class="n">c</span><span class="o">),</span> <span class="nc">Some</span><span class="o">(</span><span class="n">s</span><span class="o">))</span> <span class="k">=&gt;</span> <span class="n">collect</span><span class="o">(</span><span class="nc">Row</span><span class="o">.</span><span class="n">of</span><span class="o">(</span><span class="n">c</span><span class="o">,</span> <span class="n">s</span><span class="o">))</span>
+      <span class="k">case</span> <span class="k">_</span> <span class="k">=&gt;</span> <span class="c1">// skip</span>
+    <span class="o">}</span>
+  <span class="o">}</span>
+
+  <span class="k">override</span> <span class="k">def</span> <span class="n">getResultType</span> <span class="k">=</span> <span class="k">new</span> <span class="nc">RowTypeInfo</span><span class="o">(</span><span class="nc">Types</span><span class="o">.</span><span class="nc">STRING</span><span class="o">,</span> <span class="nc">Types</span><span class="o">.</span><span class="nc">STRING</span><span class="o">)</span>
+<span class="o">}</span></code></pre></div>
+
+<h2 id="conclusion">Conclusion</h2>
+
+<p>There is significant interest in making streaming more accessible and easier to use. Flink\u2019s Table API development is happening quickly, and we believe that soon, you will be able to implement large batch or streaming pipelines using purely relational APIs or even convert existing Flink jobs to table programs. The Table API is already a very useful tool since you can work around limitations and missing features at any time by switching back-and-forth between the DataSet/DataStream abstraction to the Table abstraction.</p>
+
+<p>Contributions like support of Apache Hive UDFs, external catalogs, more TableSources, additional windows, and more operators will make the Table API an even more useful tool. Particularly, the upcoming introduction of Dynamic Tables, which is worth a blog post of its own, shows that even in 2017, new relational APIs open the door to a number of possibilities.</p>
+
+<p>Try it out, or even better, join the design discussions on the <a href="http://flink.apache.org/community.html#mailing-lists">mailing lists</a> and <a href="https://issues.apache.org/jira/browse/FLINK/?selectedTab=com.atlassian.jira.jira-projects-plugin:summary-panel">JIRA</a> and start contributing!</p>
+
+      </article>
+    </div>
+
+    <div class="row">
+      <div id="disqus_thread"></div>
+      <script type="text/javascript">
+        /* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */
+        var disqus_shortname = 'stratosphere-eu'; // required: replace example with your forum shortname
+
+        /* * * DON'T EDIT BELOW THIS LINE * * */
+        (function() {
+            var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
+            dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
+             (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
+        })();
+      </script>
+    </div>
+  </div>
+</div>
+      </div>
+    </div>
+
+    <hr />
+
+    <div class="row">
+      <div class="footer text-center col-sm-12">
+        <p>Copyright � 2014-2016 <a href="http://apache.org">The Apache Software Foundation</a>. All Rights Reserved.</p>
+        <p>Apache Flink, Apache, and the Apache feather logo are either registered trademarks or trademarks of The Apache Software Foundation.</p>
+        <p><a href="/privacy-policy.html">Privacy Policy</a> &middot; <a href="/blog/feed.xml">RSS feed</a></p>
+      </div>
+    </div>
+    </div><!-- /.container -->
+
+    <!-- Include all compiled plugins (below), or include individual files as needed -->
+    <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js"></script>
+    <script src="/js/codetabs.js"></script>
+    <script src="/js/stickysidebar.js"></script>
+
+
+    <!-- Google Analytics -->
+    <script>
+      (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+      m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+      })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+      ga('create', 'UA-52545728-1', 'auto');
+      ga('send', 'pageview');
+    </script>
+  </body>
+</html>