You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by da...@apache.org on 2017/01/10 01:47:52 UTC

beam-site git commit: Publish a blog about Apache Apex, second attempt

Repository: beam-site
Updated Branches:
  refs/heads/asf-site ba5fe2b2a -> 973bd73a7


Publish a blog about Apache Apex, second attempt

The issue in the previous commit was usage of year 2016, instead of 2017 ;-)


Project: http://git-wip-us.apache.org/repos/asf/beam-site/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam-site/commit/973bd73a
Tree: http://git-wip-us.apache.org/repos/asf/beam-site/tree/973bd73a
Diff: http://git-wip-us.apache.org/repos/asf/beam-site/diff/973bd73a

Branch: refs/heads/asf-site
Commit: 973bd73a7e90f8eeda0781a11b41f4a169fb1fc7
Parents: ba5fe2b
Author: Davor Bonaci <da...@google.com>
Authored: Mon Jan 9 17:47:15 2017 -0800
Committer: Davor Bonaci <da...@google.com>
Committed: Mon Jan 9 17:47:15 2017 -0800

----------------------------------------------------------------------
 content/blog/2016/01/09/added-apex-runner.html | 211 --------------------
 content/blog/2017/01/09/added-apex-runner.html | 211 ++++++++++++++++++++
 content/blog/index.html                        |  32 +--
 content/feed.xml                               |  69 ++++---
 content/index.html                             |   4 +-
 src/_posts/2017-01-09-added-apex-runner.md     |   2 +-
 6 files changed, 271 insertions(+), 258 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam-site/blob/973bd73a/content/blog/2016/01/09/added-apex-runner.html
----------------------------------------------------------------------
diff --git a/content/blog/2016/01/09/added-apex-runner.html b/content/blog/2016/01/09/added-apex-runner.html
deleted file mode 100644
index 3d0ab40..0000000
--- a/content/blog/2016/01/09/added-apex-runner.html
+++ /dev/null
@@ -1,211 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-
-  <head>
-  <meta charset="utf-8">
-  <meta http-equiv="X-UA-Compatible" content="IE=edge">
-  <meta name="viewport" content="width=device-width, initial-scale=1">
-
-  <title>Release 0.4.0 adds a runner for Apache Apex</title>
-  <meta name="description" content="The latest release 0.4.0 of Apache Beam adds a new runner for Apache Apex. We are excited to reach this initial milestone and are looking forward to continue...">
-
-  <link rel="stylesheet" href="/styles/site.css">
-  <link rel="stylesheet" href="/css/theme.css">
-  <script src="https://ajax.googleapis.com/ajax/libs/jquery/2.2.0/jquery.min.js"></script>
-  <script src="/js/bootstrap.min.js"></script>
-  <script src="/js/language-switch.js"></script>
-  <link rel="canonical" href="http://beam.apache.org/blog/2016/01/09/added-apex-runner.html" data-proofer-ignore>
-  <link rel="alternate" type="application/rss+xml" title="Apache Beam" href="http://beam.apache.org/feed.xml">
-  <script>
-    (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
-    (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
-    m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
-    })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
-
-    ga('create', 'UA-73650088-1', 'auto');
-    ga('send', 'pageview');
-
-  </script>
-  <link rel="shortcut icon" type="image/x-icon" href="/images/favicon.ico">
-</head>
-
-
-  <body role="document">
-
-    <nav class="navbar navbar-default navbar-fixed-top">
-  <div class="container">
-    <div class="navbar-header">
-      <a href="/" class="navbar-brand" >
-        <img alt="Brand" style="height: 25px" src="/images/beam_logo_navbar.png">
-      </a>
-      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false" aria-controls="navbar">
-        <span class="sr-only">Toggle navigation</span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-        <span class="icon-bar"></span>
-      </button>
-    </div>
-    <div id="navbar" class="navbar-collapse collapse">
-      <ul class="nav navbar-nav">
-        <li class="dropdown">
-		  <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false">Get Started <span class="caret"></span></a>
-		  <ul class="dropdown-menu">
-			  <li><a href="/get-started/beam-overview/">Beam Overview</a></li>
-              <li><a href="/get-started/quickstart/">Quickstart</a></li>
-			  <li role="separator" class="divider"></li>
-			  <li class="dropdown-header">Example Walkthroughs</li>
-			  <li><a href="/get-started/wordcount-example/">WordCount</a></li>
-			  <li><a href="/get-started/mobile-gaming-example/">Mobile Gaming</a></li>
-              <li role="separator" class="divider"></li>
-              <li class="dropdown-header">Resources</li>
-              <li><a href="/get-started/downloads">Downloads</a></li>
-              <li><a href="/get-started/support">Support</a></li>
-		  </ul>
-	    </li>
-        <li class="dropdown">
-		  <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false">Documentation <span class="caret"></span></a>
-		  <ul class="dropdown-menu">
-			  <li><a href="/documentation">Using the Documentation</a></li>
-			  <li role="separator" class="divider"></li>
-			  <li class="dropdown-header">Beam Concepts</li>
-			  <li><a href="/documentation/programming-guide/">Programming Guide</a></li>
-			  <li><a href="/documentation/resources/">Additional Resources</a></li>
-			  <li role="separator" class="divider"></li>
-              <li class="dropdown-header">Pipeline Fundamentals</li>
-              <li><a href="/documentation/pipelines/design-your-pipeline/">Design Your Pipeline</a></li>
-              <li><a href="/documentation/pipelines/create-your-pipeline/">Create Your Pipeline</a></li>
-              <li><a href="/documentation/pipelines/test-your-pipeline/">Test Your Pipeline</a></li>
-              <li role="separator" class="divider"></li>
-			  <li class="dropdown-header">SDKs</li>
-			  <li><a href="/documentation/sdks/java/">Java SDK</a></li>
-			  <li><a href="/documentation/sdks/javadoc/0.4.0/" target="_blank">Java SDK API Reference <img src="/images/external-link-icon.png"
-                 width="14" height="14"
-                 alt="External link."></a>
-        </li>
-        <li><a href="/documentation/sdks/python/">Python SDK</a></li>
-			  <li role="separator" class="divider"></li>
-			  <li class="dropdown-header">Runners</li>
-			  <li><a href="/documentation/runners/capability-matrix/">Capability Matrix</a></li>
-			  <li><a href="/documentation/runners/direct/">Direct Runner</a></li>
-			  <li><a href="/documentation/runners/apex/">Apache Apex Runner</a></li>
-			  <li><a href="/documentation/runners/flink/">Apache Flink Runner</a></li>
-			  <li><a href="/documentation/runners/spark/">Apache Spark Runner</a></li>
-			  <li><a href="/documentation/runners/dataflow/">Cloud Dataflow Runner</a></li>
-		  </ul>
-	    </li>
-        <li class="dropdown">
-		  <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false">Contribute <span class="caret"></span></a>
-		  <ul class="dropdown-menu">
-			  <li><a href="/contribute">Get Started Contributing</a></li>
-        <li role="separator" class="divider"></li>
-        <li class="dropdown-header">Guides</li>
-			  <li><a href="/contribute/contribution-guide/">Contribution Guide</a></li>
-        <li><a href="/contribute/testing/">Testing Guide</a></li>
-        <li><a href="/contribute/release-guide/">Release Guide</a></li>
-        <li role="separator" class="divider"></li>
-        <li class="dropdown-header">Technical References</li>
-        <li><a href="/contribute/design-principles/">Design Principles</a></li>
-			  <li><a href="/contribute/work-in-progress/">Ongoing Projects</a></li>
-        <li><a href="/contribute/source-repository/">Source Repository</a></li>      
-        <li role="separator" class="divider"></li>
-			  <li class="dropdown-header">Promotion</li>
-        <li><a href="/contribute/presentation-materials/">Presentation Materials</a></li>
-        <li><a href="/contribute/logos/">Logos and Design</a></li>
-        <li role="separator" class="divider"></li>
-        <li><a href="/contribute/maturity-model/">Maturity Model</a></li>
-        <li><a href="/contribute/team/">Team</a></li>
-		  </ul>
-	    </li>
-
-        <li><a href="/blog">Blog</a></li>
-      </ul>
-      <ul class="nav navbar-nav navbar-right">
-        <li class="dropdown">
-          <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false"><img src="https://www.apache.org/foundation/press/kit/feather_small.png" alt="Apache Logo" style="height:24px;">Apache Software Foundation<span class="caret"></span></a>
-          <ul class="dropdown-menu dropdown-menu-right">
-            <li><a href="http://www.apache.org/">ASF Homepage</a></li>
-            <li><a href="http://www.apache.org/licenses/">License</a></li>
-            <li><a href="http://www.apache.org/security/">Security</a></li>
-            <li><a href="http://www.apache.org/foundation/thanks.html">Thanks</a></li>
-            <li><a href="http://www.apache.org/foundation/sponsorship.html">Sponsorship</a></li>
-            <li><a href="https://www.apache.org/foundation/policies/conduct">Code of Conduct</a></li>
-          </ul>
-        </li>
-      </ul>
-    </div><!--/.nav-collapse -->
-  </div>
-</nav>
-
-
-<link rel="stylesheet" href="">
-
-
-    <div class="container" role="main">
-
-      <div class="row">
-        
-
-<article class="post" itemscope itemtype="http://schema.org/BlogPosting">
-
-  <header class="post-header">
-    <h1 class="post-title" itemprop="name headline">Release 0.4.0 adds a runner for Apache Apex</h1>
-    <p class="post-meta"><time datetime="2016-01-09T09:00:01-08:00" itemprop="datePublished">Jan 9, 2016</time> \u2022  Thomas Weise [<a href="https://twitter.com/thweise">@thweise</a>]
-</p>
-  </header>
-
-  <div class="post-content" itemprop="articleBody">
-    <p>The latest release 0.4.0 of <a href="https://beam.apache.org">Apache Beam</a> adds a new runner for <a href="http://apex.apache.org/">Apache Apex</a>. We are excited to reach this initial milestone and are looking forward to continued collaboration between the Beam and Apex communities to advance the runner.</p>
-
-<!--more-->
-
-<p>Beam evolved from the Google Dataflow SDK and as incubator project has quickly adapted the Apache way, grown the community and attracts increasing interest from users that hope to benefit from a conceptual strong unified programming model that is portable between different big data processing frameworks (see <a href="https://www.oreilly.com/ideas/the-world-beyond-batch-streaming-101">Streaming-101</a> and <a href="https://www.oreilly.com/ideas/the-world-beyond-batch-streaming-102">Streaming-102</a>). Multiple Apache projects already provide runners for Beam (see <a href="http://beam.apache.org/documentation/runners/capability-matrix/">runners and capabilities matrix</a>).</p>
-
-<p>Apex is a stream processing framework for low-latency, high-throughput, stateful and reliable processing of complex analytics pipelines on clusters. Apex was developed since 2012 and is used in production by large companies for real-time and batch processing at scale.</p>
-
-<p>The initial revision of the runner was focussed on broad coverage of the Beam model on a functional level. That means, there will be follow up work in several areas to take the runner from functional to scalable and high performance to match the capabilities of Apex and its native API. The runner capabilities matrix shows that the Apex capabilities are well aligned with the Beam model. Specifically, the ability to track computational state in a fault tolerant and efficient manner is needed to broadly support the windowing concepts, including event time based processing.</p>
-
-<h2 id="stateful-stream-processor">Stateful Stream Processor</h2>
-
-<p>Apex was built as stateful stream processor from the ground up. Operators <a href="https://www.datatorrent.com/blog/blog-introduction-to-checkpoint/">checkpoint</a> state in a distributed and asynchronous manner that produces a consistent snapshot for the entire processing graph, which can be used for recovery. Apex also supports such recovery in an incremental, or fine grained, manner. This means only the portion of the DAG that is actually affected by a failure will be recovered while the remaining pipeline continues processing (this can be leveraged to implement use cases with special needs, such as speculative execution to achieve SLA on the processing latency). The state checkpointing along with idempotent processing guarantee is the basis for <a href="https://www.datatorrent.com/blog/end-to-end-exactly-once-with-apache-apex/">exactly-once results</a> support in Apex.</p>
-
-<h2 id="translation-to-apex-dag">Translation to Apex DAG</h2>
-
-<p>A Beam runner needs to implement the translation from the Beam model to the underlying frameworks execution model. In the case of Apex, the runner will translate the pipeline into the <a href="https://www.datatorrent.com/blog/tracing-dags-from-specification-to-execution/">native (compositional, low level) DAG API</a> (which is also the base for a number of other API that are available to specify applications that run on Apex). The DAG consists of operators (functional building blocks that are connected with streams. The runner provides the execution layer. In the case of Apex it is distributed stream processing, operators process data event by event. The minimum set of operators covers Beam\u2019s primitive transforms: <code class="highlighter-rouge">ParDo.Bound</code>,  <code class="highlighter-rouge">ParDo.BoundMulti</code>, <code class="highlighter-rouge">Read.Unbounded</code>, <code class="highlighter-rouge">Read.Bounded</code>, <code class="highlighter-rouge">GroupByKey</code>,
  <code class="highlighter-rouge">Flatten.FlattenPCollectionList</code> etc.</p>
-
-<h2 id="execution-and-testing">Execution and Testing</h2>
-
-<p>In this release, the Apex runner executes the pipelines in embedded mode, where, similar to the direct runner, everything is executed in a single JVM. See <a href="https://beam.apache.org/get-started/quickstart/">quickstart</a> on how to run the Beam examples with the Apex runner.</p>
-
-<p>Embedded mode is useful for development and debugging. Apex in production runs distributed on Apache Hadoop YARN clusters. An example how a Beam pipeline can be embedded into an Apex application package to run on YARN can be found <a href="https://github.com/tweise/apex-samples/tree/master/beam-apex-wordcount">here</a> and support for direct launch in the runner is currently being worked on.</p>
-
-<p>The Beam project has a strong focus on development process and tooling, including testing. For the runners, there is a comprehensive test suite with more than 200 integration tests that are executed against each runner to ensure they don\u2019t break as changes are made. The tests cover the capabilities of the matrix and thus are a measure of completeness and correctness of the runner implementations. The suite was very helpful when developing the Apex runner.</p>
-
-<h2 id="outlook">Outlook</h2>
-
-<p>The next step is to take the Apex runner from functional to ready for real applications that run distributed, leveraging the scalability and performance features of Apex, similar to its native API. This includes chaining of ParDos, partitioning, optimizing combine operations etc. To get involved, please see <a href="https://issues.apache.org/jira/issues/?jql=project%20%3D%20BEAM%20and%20component%20%3D%20runner-apex%20and%20resolution%20%3D%20unresolved">JIRA</a> and join the Beam community.</p>
-
-  </div>
-
-</article>
-
-      </div>
-
-
-    <hr>
-  <div class="row">
-      <div class="col-xs-12">
-          <footer>
-              <p class="text-center">&copy; Copyright 2016
-                <a href="http://www.apache.org">The Apache Software Foundation.</a> All Rights Reserved.</p>
-                <p class="text-center"><a href="/privacy_policy">Privacy Policy</a> |
-                <a href="/feed.xml">RSS Feed</a></p>
-          </footer>
-      </div>
-  </div>
-  <!-- container div end -->
-</div>
-
-
-  </body>
-
-</html>

http://git-wip-us.apache.org/repos/asf/beam-site/blob/973bd73a/content/blog/2017/01/09/added-apex-runner.html
----------------------------------------------------------------------
diff --git a/content/blog/2017/01/09/added-apex-runner.html b/content/blog/2017/01/09/added-apex-runner.html
new file mode 100644
index 0000000..260f350
--- /dev/null
+++ b/content/blog/2017/01/09/added-apex-runner.html
@@ -0,0 +1,211 @@
+<!DOCTYPE html>
+<html lang="en">
+
+  <head>
+  <meta charset="utf-8">
+  <meta http-equiv="X-UA-Compatible" content="IE=edge">
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+
+  <title>Release 0.4.0 adds a runner for Apache Apex</title>
+  <meta name="description" content="The latest release 0.4.0 of Apache Beam adds a new runner for Apache Apex. We are excited to reach this initial milestone and are looking forward to continue...">
+
+  <link rel="stylesheet" href="/styles/site.css">
+  <link rel="stylesheet" href="/css/theme.css">
+  <script src="https://ajax.googleapis.com/ajax/libs/jquery/2.2.0/jquery.min.js"></script>
+  <script src="/js/bootstrap.min.js"></script>
+  <script src="/js/language-switch.js"></script>
+  <link rel="canonical" href="http://beam.apache.org/blog/2017/01/09/added-apex-runner.html" data-proofer-ignore>
+  <link rel="alternate" type="application/rss+xml" title="Apache Beam" href="http://beam.apache.org/feed.xml">
+  <script>
+    (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+    (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+    m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+    })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+    ga('create', 'UA-73650088-1', 'auto');
+    ga('send', 'pageview');
+
+  </script>
+  <link rel="shortcut icon" type="image/x-icon" href="/images/favicon.ico">
+</head>
+
+
+  <body role="document">
+
+    <nav class="navbar navbar-default navbar-fixed-top">
+  <div class="container">
+    <div class="navbar-header">
+      <a href="/" class="navbar-brand" >
+        <img alt="Brand" style="height: 25px" src="/images/beam_logo_navbar.png">
+      </a>
+      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false" aria-controls="navbar">
+        <span class="sr-only">Toggle navigation</span>
+        <span class="icon-bar"></span>
+        <span class="icon-bar"></span>
+        <span class="icon-bar"></span>
+      </button>
+    </div>
+    <div id="navbar" class="navbar-collapse collapse">
+      <ul class="nav navbar-nav">
+        <li class="dropdown">
+		  <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false">Get Started <span class="caret"></span></a>
+		  <ul class="dropdown-menu">
+			  <li><a href="/get-started/beam-overview/">Beam Overview</a></li>
+              <li><a href="/get-started/quickstart/">Quickstart</a></li>
+			  <li role="separator" class="divider"></li>
+			  <li class="dropdown-header">Example Walkthroughs</li>
+			  <li><a href="/get-started/wordcount-example/">WordCount</a></li>
+			  <li><a href="/get-started/mobile-gaming-example/">Mobile Gaming</a></li>
+              <li role="separator" class="divider"></li>
+              <li class="dropdown-header">Resources</li>
+              <li><a href="/get-started/downloads">Downloads</a></li>
+              <li><a href="/get-started/support">Support</a></li>
+		  </ul>
+	    </li>
+        <li class="dropdown">
+		  <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false">Documentation <span class="caret"></span></a>
+		  <ul class="dropdown-menu">
+			  <li><a href="/documentation">Using the Documentation</a></li>
+			  <li role="separator" class="divider"></li>
+			  <li class="dropdown-header">Beam Concepts</li>
+			  <li><a href="/documentation/programming-guide/">Programming Guide</a></li>
+			  <li><a href="/documentation/resources/">Additional Resources</a></li>
+			  <li role="separator" class="divider"></li>
+              <li class="dropdown-header">Pipeline Fundamentals</li>
+              <li><a href="/documentation/pipelines/design-your-pipeline/">Design Your Pipeline</a></li>
+              <li><a href="/documentation/pipelines/create-your-pipeline/">Create Your Pipeline</a></li>
+              <li><a href="/documentation/pipelines/test-your-pipeline/">Test Your Pipeline</a></li>
+              <li role="separator" class="divider"></li>
+			  <li class="dropdown-header">SDKs</li>
+			  <li><a href="/documentation/sdks/java/">Java SDK</a></li>
+			  <li><a href="/documentation/sdks/javadoc/0.4.0/" target="_blank">Java SDK API Reference <img src="/images/external-link-icon.png"
+                 width="14" height="14"
+                 alt="External link."></a>
+        </li>
+        <li><a href="/documentation/sdks/python/">Python SDK</a></li>
+			  <li role="separator" class="divider"></li>
+			  <li class="dropdown-header">Runners</li>
+			  <li><a href="/documentation/runners/capability-matrix/">Capability Matrix</a></li>
+			  <li><a href="/documentation/runners/direct/">Direct Runner</a></li>
+			  <li><a href="/documentation/runners/apex/">Apache Apex Runner</a></li>
+			  <li><a href="/documentation/runners/flink/">Apache Flink Runner</a></li>
+			  <li><a href="/documentation/runners/spark/">Apache Spark Runner</a></li>
+			  <li><a href="/documentation/runners/dataflow/">Cloud Dataflow Runner</a></li>
+		  </ul>
+	    </li>
+        <li class="dropdown">
+		  <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false">Contribute <span class="caret"></span></a>
+		  <ul class="dropdown-menu">
+			  <li><a href="/contribute">Get Started Contributing</a></li>
+        <li role="separator" class="divider"></li>
+        <li class="dropdown-header">Guides</li>
+			  <li><a href="/contribute/contribution-guide/">Contribution Guide</a></li>
+        <li><a href="/contribute/testing/">Testing Guide</a></li>
+        <li><a href="/contribute/release-guide/">Release Guide</a></li>
+        <li role="separator" class="divider"></li>
+        <li class="dropdown-header">Technical References</li>
+        <li><a href="/contribute/design-principles/">Design Principles</a></li>
+			  <li><a href="/contribute/work-in-progress/">Ongoing Projects</a></li>
+        <li><a href="/contribute/source-repository/">Source Repository</a></li>      
+        <li role="separator" class="divider"></li>
+			  <li class="dropdown-header">Promotion</li>
+        <li><a href="/contribute/presentation-materials/">Presentation Materials</a></li>
+        <li><a href="/contribute/logos/">Logos and Design</a></li>
+        <li role="separator" class="divider"></li>
+        <li><a href="/contribute/maturity-model/">Maturity Model</a></li>
+        <li><a href="/contribute/team/">Team</a></li>
+		  </ul>
+	    </li>
+
+        <li><a href="/blog">Blog</a></li>
+      </ul>
+      <ul class="nav navbar-nav navbar-right">
+        <li class="dropdown">
+          <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false"><img src="https://www.apache.org/foundation/press/kit/feather_small.png" alt="Apache Logo" style="height:24px;">Apache Software Foundation<span class="caret"></span></a>
+          <ul class="dropdown-menu dropdown-menu-right">
+            <li><a href="http://www.apache.org/">ASF Homepage</a></li>
+            <li><a href="http://www.apache.org/licenses/">License</a></li>
+            <li><a href="http://www.apache.org/security/">Security</a></li>
+            <li><a href="http://www.apache.org/foundation/thanks.html">Thanks</a></li>
+            <li><a href="http://www.apache.org/foundation/sponsorship.html">Sponsorship</a></li>
+            <li><a href="https://www.apache.org/foundation/policies/conduct">Code of Conduct</a></li>
+          </ul>
+        </li>
+      </ul>
+    </div><!--/.nav-collapse -->
+  </div>
+</nav>
+
+
+<link rel="stylesheet" href="">
+
+
+    <div class="container" role="main">
+
+      <div class="row">
+        
+
+<article class="post" itemscope itemtype="http://schema.org/BlogPosting">
+
+  <header class="post-header">
+    <h1 class="post-title" itemprop="name headline">Release 0.4.0 adds a runner for Apache Apex</h1>
+    <p class="post-meta"><time datetime="2017-01-09T09:00:01-08:00" itemprop="datePublished">Jan 9, 2017</time> \u2022  Thomas Weise [<a href="https://twitter.com/thweise">@thweise</a>]
+</p>
+  </header>
+
+  <div class="post-content" itemprop="articleBody">
+    <p>The latest release 0.4.0 of <a href="https://beam.apache.org">Apache Beam</a> adds a new runner for <a href="http://apex.apache.org/">Apache Apex</a>. We are excited to reach this initial milestone and are looking forward to continued collaboration between the Beam and Apex communities to advance the runner.</p>
+
+<!--more-->
+
+<p>Beam evolved from the Google Dataflow SDK and as incubator project has quickly adapted the Apache way, grown the community and attracts increasing interest from users that hope to benefit from a conceptual strong unified programming model that is portable between different big data processing frameworks (see <a href="https://www.oreilly.com/ideas/the-world-beyond-batch-streaming-101">Streaming-101</a> and <a href="https://www.oreilly.com/ideas/the-world-beyond-batch-streaming-102">Streaming-102</a>). Multiple Apache projects already provide runners for Beam (see <a href="http://beam.apache.org/documentation/runners/capability-matrix/">runners and capabilities matrix</a>).</p>
+
+<p>Apex is a stream processing framework for low-latency, high-throughput, stateful and reliable processing of complex analytics pipelines on clusters. Apex was developed since 2012 and is used in production by large companies for real-time and batch processing at scale.</p>
+
+<p>The initial revision of the runner was focussed on broad coverage of the Beam model on a functional level. That means, there will be follow up work in several areas to take the runner from functional to scalable and high performance to match the capabilities of Apex and its native API. The runner capabilities matrix shows that the Apex capabilities are well aligned with the Beam model. Specifically, the ability to track computational state in a fault tolerant and efficient manner is needed to broadly support the windowing concepts, including event time based processing.</p>
+
+<h2 id="stateful-stream-processor">Stateful Stream Processor</h2>
+
+<p>Apex was built as stateful stream processor from the ground up. Operators <a href="https://www.datatorrent.com/blog/blog-introduction-to-checkpoint/">checkpoint</a> state in a distributed and asynchronous manner that produces a consistent snapshot for the entire processing graph, which can be used for recovery. Apex also supports such recovery in an incremental, or fine grained, manner. This means only the portion of the DAG that is actually affected by a failure will be recovered while the remaining pipeline continues processing (this can be leveraged to implement use cases with special needs, such as speculative execution to achieve SLA on the processing latency). The state checkpointing along with idempotent processing guarantee is the basis for <a href="https://www.datatorrent.com/blog/end-to-end-exactly-once-with-apache-apex/">exactly-once results</a> support in Apex.</p>
+
+<h2 id="translation-to-apex-dag">Translation to Apex DAG</h2>
+
+<p>A Beam runner needs to implement the translation from the Beam model to the underlying frameworks execution model. In the case of Apex, the runner will translate the pipeline into the <a href="https://www.datatorrent.com/blog/tracing-dags-from-specification-to-execution/">native (compositional, low level) DAG API</a> (which is also the base for a number of other API that are available to specify applications that run on Apex). The DAG consists of operators (functional building blocks that are connected with streams. The runner provides the execution layer. In the case of Apex it is distributed stream processing, operators process data event by event. The minimum set of operators covers Beam\u2019s primitive transforms: <code class="highlighter-rouge">ParDo.Bound</code>,  <code class="highlighter-rouge">ParDo.BoundMulti</code>, <code class="highlighter-rouge">Read.Unbounded</code>, <code class="highlighter-rouge">Read.Bounded</code>, <code class="highlighter-rouge">GroupByKey</code>,
  <code class="highlighter-rouge">Flatten.FlattenPCollectionList</code> etc.</p>
+
+<h2 id="execution-and-testing">Execution and Testing</h2>
+
+<p>In this release, the Apex runner executes the pipelines in embedded mode, where, similar to the direct runner, everything is executed in a single JVM. See <a href="https://beam.apache.org/get-started/quickstart/">quickstart</a> on how to run the Beam examples with the Apex runner.</p>
+
+<p>Embedded mode is useful for development and debugging. Apex in production runs distributed on Apache Hadoop YARN clusters. An example how a Beam pipeline can be embedded into an Apex application package to run on YARN can be found <a href="https://github.com/tweise/apex-samples/tree/master/beam-apex-wordcount">here</a> and support for direct launch in the runner is currently being worked on.</p>
+
+<p>The Beam project has a strong focus on development process and tooling, including testing. For the runners, there is a comprehensive test suite with more than 200 integration tests that are executed against each runner to ensure they don\u2019t break as changes are made. The tests cover the capabilities of the matrix and thus are a measure of completeness and correctness of the runner implementations. The suite was very helpful when developing the Apex runner.</p>
+
+<h2 id="outlook">Outlook</h2>
+
+<p>The next step is to take the Apex runner from functional to ready for real applications that run distributed, leveraging the scalability and performance features of Apex, similar to its native API. This includes chaining of ParDos, partitioning, optimizing combine operations etc. To get involved, please see <a href="https://issues.apache.org/jira/issues/?jql=project%20%3D%20BEAM%20and%20component%20%3D%20runner-apex%20and%20resolution%20%3D%20unresolved">JIRA</a> and join the Beam community.</p>
+
+  </div>
+
+</article>
+
+      </div>
+
+
+    <hr>
+  <div class="row">
+      <div class="col-xs-12">
+          <footer>
+              <p class="text-center">&copy; Copyright 2016
+                <a href="http://www.apache.org">The Apache Software Foundation.</a> All Rights Reserved.</p>
+                <p class="text-center"><a href="/privacy_policy">Privacy Policy</a> |
+                <a href="/feed.xml">RSS Feed</a></p>
+          </footer>
+      </div>
+  </div>
+  <!-- container div end -->
+</div>
+
+
+  </body>
+
+</html>

http://git-wip-us.apache.org/repos/asf/beam-site/blob/973bd73a/content/blog/index.html
----------------------------------------------------------------------
diff --git a/content/blog/index.html b/content/blog/index.html
index 7ac5976..552b31f 100644
--- a/content/blog/index.html
+++ b/content/blog/index.html
@@ -149,6 +149,22 @@
 <p>This is the blog for the Apache Beam project. This blog contains news and updates
 for the project.</p>
 
+<h3 id="a-classpost-link-hrefblog20170109added-apex-runnerhtmlrelease-040-adds-a-runner-for-apache-apexa"><a class="post-link" href="/blog/2017/01/09/added-apex-runner.html">Release 0.4.0 adds a runner for Apache Apex</a></h3>
+<p><i>Jan 9, 2017 \u2022  Thomas Weise [<a href="https://twitter.com/thweise">@thweise</a>]
+</i></p>
+
+<p>The latest release 0.4.0 of <a href="https://beam.apache.org">Apache Beam</a> adds a new runner for <a href="http://apex.apache.org/">Apache Apex</a>. We are excited to reach this initial milestone and are looking forward to continued collaboration between the Beam and Apex communities to advance the runner.</p>
+
+<!-- Render a "read more" button if the post is longer than the excerpt -->
+
+<p>
+<a class="btn btn-default btn-sm" href="/blog/2017/01/09/added-apex-runner.html" role="button">
+Read more&nbsp;<span class="glyphicon glyphicon-menu-right" aria-hidden="true"></span>
+</a>
+</p>
+
+<hr />
+
 <h3 id="a-classpost-link-hrefblog20161020test-streamhtmltesting-unbounded-pipelines-in-apache-beama"><a class="post-link" href="/blog/2016/10/20/test-stream.html">Testing Unbounded Pipelines in Apache Beam</a></h3>
 <p><i>Oct 20, 2016 \u2022  Thomas Groh 
 </i></p>
@@ -337,22 +353,6 @@ Read more&nbsp;<span class="glyphicon glyphicon-menu-right" aria-hidden="true"><
 
 <hr />
 
-<h3 id="a-classpost-link-hrefblog20160109added-apex-runnerhtmlrelease-040-adds-a-runner-for-apache-apexa"><a class="post-link" href="/blog/2016/01/09/added-apex-runner.html">Release 0.4.0 adds a runner for Apache Apex</a></h3>
-<p><i>Jan 9, 2016 \u2022  Thomas Weise [<a href="https://twitter.com/thweise">@thweise</a>]
-</i></p>
-
-<p>The latest release 0.4.0 of <a href="https://beam.apache.org">Apache Beam</a> adds a new runner for <a href="http://apex.apache.org/">Apache Apex</a>. We are excited to reach this initial milestone and are looking forward to continued collaboration between the Beam and Apex communities to advance the runner.</p>
-
-<!-- Render a "read more" button if the post is longer than the excerpt -->
-
-<p>
-<a class="btn btn-default btn-sm" href="/blog/2016/01/09/added-apex-runner.html" role="button">
-Read more&nbsp;<span class="glyphicon glyphicon-menu-right" aria-hidden="true"></span>
-</a>
-</p>
-
-<hr />
-
 
       </div>
 

http://git-wip-us.apache.org/repos/asf/beam-site/blob/973bd73a/content/feed.xml
----------------------------------------------------------------------
diff --git a/content/feed.xml b/content/feed.xml
index a875df7..84eb2eb 100644
--- a/content/feed.xml
+++ b/content/feed.xml
@@ -9,6 +9,47 @@
     <generator>Jekyll v3.2.0</generator>
     
       <item>
+        <title>Release 0.4.0 adds a runner for Apache Apex</title>
+        <description>&lt;p&gt;The latest release 0.4.0 of &lt;a href=&quot;https://beam.apache.org&quot;&gt;Apache Beam&lt;/a&gt; adds a new runner for &lt;a href=&quot;http://apex.apache.org/&quot;&gt;Apache Apex&lt;/a&gt;. We are excited to reach this initial milestone and are looking forward to continued collaboration between the Beam and Apex communities to advance the runner.&lt;/p&gt;
+
+&lt;!--more--&gt;
+
+&lt;p&gt;Beam evolved from the Google Dataflow SDK and as incubator project has quickly adapted the Apache way, grown the community and attracts increasing interest from users that hope to benefit from a conceptual strong unified programming model that is portable between different big data processing frameworks (see &lt;a href=&quot;https://www.oreilly.com/ideas/the-world-beyond-batch-streaming-101&quot;&gt;Streaming-101&lt;/a&gt; and &lt;a href=&quot;https://www.oreilly.com/ideas/the-world-beyond-batch-streaming-102&quot;&gt;Streaming-102&lt;/a&gt;). Multiple Apache projects already provide runners for Beam (see &lt;a href=&quot;http://beam.apache.org/documentation/runners/capability-matrix/&quot;&gt;runners and capabilities matrix&lt;/a&gt;).&lt;/p&gt;
+
+&lt;p&gt;Apex is a stream processing framework for low-latency, high-throughput, stateful and reliable processing of complex analytics pipelines on clusters. Apex was developed since 2012 and is used in production by large companies for real-time and batch processing at scale.&lt;/p&gt;
+
+&lt;p&gt;The initial revision of the runner was focussed on broad coverage of the Beam model on a functional level. That means, there will be follow up work in several areas to take the runner from functional to scalable and high performance to match the capabilities of Apex and its native API. The runner capabilities matrix shows that the Apex capabilities are well aligned with the Beam model. Specifically, the ability to track computational state in a fault tolerant and efficient manner is needed to broadly support the windowing concepts, including event time based processing.&lt;/p&gt;
+
+&lt;h2 id=&quot;stateful-stream-processor&quot;&gt;Stateful Stream Processor&lt;/h2&gt;
+
+&lt;p&gt;Apex was built as stateful stream processor from the ground up. Operators &lt;a href=&quot;https://www.datatorrent.com/blog/blog-introduction-to-checkpoint/&quot;&gt;checkpoint&lt;/a&gt; state in a distributed and asynchronous manner that produces a consistent snapshot for the entire processing graph, which can be used for recovery. Apex also supports such recovery in an incremental, or fine grained, manner. This means only the portion of the DAG that is actually affected by a failure will be recovered while the remaining pipeline continues processing (this can be leveraged to implement use cases with special needs, such as speculative execution to achieve SLA on the processing latency). The state checkpointing along with idempotent processing guarantee is the basis for &lt;a href=&quot;https://www.datatorrent.com/blog/end-to-end-exactly-once-with-apache-apex/&quot;&gt;exactly-once results&lt;/a&gt; support in Apex.&lt;/p&gt;
+
+&lt;h2 id=&quot;translation-to-apex-dag&quot;&gt;Translation to Apex DAG&lt;/h2&gt;
+
+&lt;p&gt;A Beam runner needs to implement the translation from the Beam model to the underlying frameworks execution model. In the case of Apex, the runner will translate the pipeline into the &lt;a href=&quot;https://www.datatorrent.com/blog/tracing-dags-from-specification-to-execution/&quot;&gt;native (compositional, low level) DAG API&lt;/a&gt; (which is also the base for a number of other API that are available to specify applications that run on Apex). The DAG consists of operators (functional building blocks that are connected with streams. The runner provides the execution layer. In the case of Apex it is distributed stream processing, operators process data event by event. The minimum set of operators covers Beam\u2019s primitive transforms: &lt;code class=&quot;highlighter-rouge&quot;&gt;ParDo.Bound&lt;/code&gt;,  &lt;code class=&quot;highlighter-rouge&quot;&gt;ParDo.BoundMulti&lt;/code&gt;, &lt;code class=&quot;highlighter-rouge&quot;&gt;Read.Unbounded&lt;/code&gt;, &lt;code 
 class=&quot;highlighter-rouge&quot;&gt;Read.Bounded&lt;/code&gt;, &lt;code class=&quot;highlighter-rouge&quot;&gt;GroupByKey&lt;/code&gt;, &lt;code class=&quot;highlighter-rouge&quot;&gt;Flatten.FlattenPCollectionList&lt;/code&gt; etc.&lt;/p&gt;
+
+&lt;h2 id=&quot;execution-and-testing&quot;&gt;Execution and Testing&lt;/h2&gt;
+
+&lt;p&gt;In this release, the Apex runner executes the pipelines in embedded mode, where, similar to the direct runner, everything is executed in a single JVM. See &lt;a href=&quot;https://beam.apache.org/get-started/quickstart/&quot;&gt;quickstart&lt;/a&gt; on how to run the Beam examples with the Apex runner.&lt;/p&gt;
+
+&lt;p&gt;Embedded mode is useful for development and debugging. Apex in production runs distributed on Apache Hadoop YARN clusters. An example how a Beam pipeline can be embedded into an Apex application package to run on YARN can be found &lt;a href=&quot;https://github.com/tweise/apex-samples/tree/master/beam-apex-wordcount&quot;&gt;here&lt;/a&gt; and support for direct launch in the runner is currently being worked on.&lt;/p&gt;
+
+&lt;p&gt;The Beam project has a strong focus on development process and tooling, including testing. For the runners, there is a comprehensive test suite with more than 200 integration tests that are executed against each runner to ensure they don\u2019t break as changes are made. The tests cover the capabilities of the matrix and thus are a measure of completeness and correctness of the runner implementations. The suite was very helpful when developing the Apex runner.&lt;/p&gt;
+
+&lt;h2 id=&quot;outlook&quot;&gt;Outlook&lt;/h2&gt;
+
+&lt;p&gt;The next step is to take the Apex runner from functional to ready for real applications that run distributed, leveraging the scalability and performance features of Apex, similar to its native API. This includes chaining of ParDos, partitioning, optimizing combine operations etc. To get involved, please see &lt;a href=&quot;https://issues.apache.org/jira/issues/?jql=project%20%3D%20BEAM%20and%20component%20%3D%20runner-apex%20and%20resolution%20%3D%20unresolved&quot;&gt;JIRA&lt;/a&gt; and join the Beam community.&lt;/p&gt;
+</description>
+        <pubDate>Mon, 09 Jan 2017 09:00:01 -0800</pubDate>
+        <link>http://beam.apache.org/blog/2017/01/09/added-apex-runner.html</link>
+        <guid isPermaLink="true">http://beam.apache.org/blog/2017/01/09/added-apex-runner.html</guid>
+        
+        
+        <category>blog</category>
+        
+      </item>
+    
+      <item>
         <title>Testing Unbounded Pipelines in Apache Beam</title>
         <description>&lt;p&gt;The Beam Programming Model unifies writing pipelines for Batch and Streaming
 pipelines. We\u2019ve recently introduced a new PTransform to write tests for
@@ -1429,33 +1470,5 @@ PCollection&amp;lt;O&amp;gt; output = input
         
       </item>
     
-      <item>
-        <title>Dataflow Python SDK is now public!</title>
-        <description>&lt;p&gt;When the Apache Beam project proposed entry into the &lt;a href=&quot;http://wiki.apache.org/incubator/BeamProposal&quot;&gt;Apache Incubator&lt;/a&gt; the proposal
-included the &lt;a href=&quot;https://github.com/GoogleCloudPlatform/DataflowJavaSDK&quot;&gt;Dataflow Java SDK&lt;/a&gt;. In the long term, however, Apache Beam aims to support SDKs implemented in multiple languages, such as Python.&lt;/p&gt;
-
-&lt;!--more--&gt;
-
-&lt;p&gt;Today, Google submitted the &lt;a href=&quot;http://github.com/GoogleCloudPlatform/DataflowPythonSDK&quot;&gt;Dataflow Python (2.x) SDK&lt;/a&gt; on GitHub. Google is committed to including the in progress python SDK in Apache Beam and, in that spirit, we\u2019ve moved development of the Python SDK to a public repository. While this SDK will not be included with the initial (incubating) releases of Apache Beam, our we plan on incorporating the Python SDK into beam during incubation. We want to take the time to implement changes from the &lt;a href=&quot;https://goo.gl/nk5OM0&quot;&gt;technical vision&lt;/a&gt; into the Java SDK before we introduce a Python SDK for Apache Beam. We believe this will allow us to work on the model and SDKs in an ordered fashion.&lt;/p&gt;
-
-&lt;p&gt;You can look for the Apache Beam Python SDK in the coming months once we finish forking and refactoring the Java SDK.&lt;/p&gt;
-
-&lt;p&gt;Best,&lt;/p&gt;
-
-&lt;p&gt;Apache Beam Team&lt;/p&gt;
-</description>
-        <pubDate>Thu, 25 Feb 2016 13:00:00 -0800</pubDate>
-        <link>http://beam.apache.org/beam/python/sdk/2016/02/25/python-sdk-now-public.html</link>
-        <guid isPermaLink="true">http://beam.apache.org/beam/python/sdk/2016/02/25/python-sdk-now-public.html</guid>
-        
-        
-        <category>beam</category>
-        
-        <category>python</category>
-        
-        <category>sdk</category>
-        
-      </item>
-    
   </channel>
 </rss>

http://git-wip-us.apache.org/repos/asf/beam-site/blob/973bd73a/content/index.html
----------------------------------------------------------------------
diff --git a/content/index.html b/content/index.html
index f116c6a..54682f7 100644
--- a/content/index.html
+++ b/content/index.html
@@ -170,6 +170,8 @@
     <h2>Blog</h2>
     <div class="list-group">
     
+    <a class="list-group-item" href="/blog/2017/01/09/added-apex-runner.html">Jan 9, 2017 - Release 0.4.0 adds a runner for Apache Apex</a>
+    
     <a class="list-group-item" href="/blog/2016/10/20/test-stream.html">Oct 20, 2016 - Testing Unbounded Pipelines in Apache Beam</a>
     
     <a class="list-group-item" href="/beam/update/2016/10/11/strata-hadoop-world-and-beam.html">Oct 11, 2016 - Strata+Hadoop World and Beam</a>
@@ -192,8 +194,6 @@
     
     <a class="list-group-item" href="/beam/update/website/2016/02/22/beam-has-a-logo.html">Feb 22, 2016 - Apache Beam has a logo!</a>
     
-    <a class="list-group-item" href="/blog/2016/01/09/added-apex-runner.html">Jan 9, 2016 - Release 0.4.0 adds a runner for Apache Apex</a>
-    
     </div>
   </div>
   <div class="col-md-6">

http://git-wip-us.apache.org/repos/asf/beam-site/blob/973bd73a/src/_posts/2017-01-09-added-apex-runner.md
----------------------------------------------------------------------
diff --git a/src/_posts/2017-01-09-added-apex-runner.md b/src/_posts/2017-01-09-added-apex-runner.md
index fa02080..2e28fd6 100644
--- a/src/_posts/2017-01-09-added-apex-runner.md
+++ b/src/_posts/2017-01-09-added-apex-runner.md
@@ -1,7 +1,7 @@
 ---
 layout: post
 title:  "Release 0.4.0 adds a runner for Apache Apex"
-date:   2016-01-09 10:00:01 -0700
+date:   2017-01-09 10:00:01 -0700
 excerpt_separator: <!--more-->
 categories: blog
 authors: