You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@samza.apache.org by xi...@apache.org on 2018/01/04 00:31:43 UTC

svn commit: r1820026 [18/24] - in /samza/site: ./ img/latest/learn/documentation/hadoop/ img/latest/learn/documentation/rest/ learn/documentation/latest/ learn/documentation/latest/api/ learn/documentation/latest/api/javadocs/ learn/documentation/lates...

Added: samza/site/learn/documentation/latest/azure/eventhubs.html
URL: http://svn.apache.org/viewvc/samza/site/learn/documentation/latest/azure/eventhubs.html?rev=1820026&view=auto
==============================================================================
--- samza/site/learn/documentation/latest/azure/eventhubs.html (added)
+++ samza/site/learn/documentation/latest/azure/eventhubs.html Thu Jan  4 00:31:39 2018
@@ -0,0 +1,335 @@
+<!DOCTYPE html>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <title>Samza - Connecting to Eventhubs</title>
+    <link href='/css/ropa-sans.css' rel='stylesheet' type='text/css'/>
+    <link href="/css/bootstrap.min.css" rel="stylesheet"/>
+    <link href="/css/font-awesome.min.css" rel="stylesheet"/>
+    <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
+    <link rel="icon" type="image/png" href="/img/samza-icon.png">
+    <script src="/js/jquery-1.11.1.min.js"></script>
+  </head>
+  <body>
+    <div class="wrapper">
+      <div class="wrapper-content">
+
+        <div class="masthead">
+          <div class="container">
+            <div class="masthead-logo">
+              <a href="/" class="logo">samza</a>
+            </div>
+            <div class="masthead-icons">
+              <div class="pull-right">
+                <a href="/startup/download"><i class="fa fa-arrow-circle-o-down masthead-icon"></i></a>
+                <a href="https://git-wip-us.apache.org/repos/asf?p=samza.git;a=tree" target="_blank"><i class="fa fa-code masthead-icon" style="font-weight: bold;"></i></a>
+                <a href="https://twitter.com/samzastream" target="_blank"><i class="fa fa-twitter masthead-icon"></i></a>
+                <!-- this icon only shows in versioned pages -->
+                
+                  
+                    
+                  
+                  <a href="http://samza.apache.org/learn/documentation/0.14/azure/eventhubs.html"><i id="switch-version-button"></i></a>
+                   <!-- links for the navigation bar -->
+                
+
+              </div>
+            </div>
+          </div><!-- /.container -->
+        </div>
+
+        <div class="container">
+          <div class="menu">
+            <h1><i class="fa fa-rocket"></i> Getting Started</h1>
+            <ul>
+              <li><a href="/startup/hello-samza/latest">Hello Samza</a></li>
+              <li><a href="/startup/download">Download</a></li>
+              <li><a href="/startup/preview">Feature Preview</a></li>
+            </ul>
+
+            <h1><i class="fa fa-book"></i> Learn</h1>
+            <ul>
+              <li><a href="/learn/documentation/latest">Documentation</a></li>
+              <li><a href="/learn/documentation/latest/jobs/configuration-table.html">Configuration</a></li>
+              <li><a href="/learn/documentation/latest/container/metrics-table.html">Metrics</a></li>
+              <li><a href="/learn/documentation/latest/api/javadocs/">Javadocs</a></li>
+              <li><a href="/learn/tutorials/latest">Tutorials</a></li>
+              <li><a href="https://cwiki.apache.org/confluence/display/SAMZA/FAQ">FAQ</a></li>
+              <li><a href="https://cwiki.apache.org/confluence/display/SAMZA/Apache+Samza">Wiki</a></li>
+              <li><a href="https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=51812876">Papers &amp; Talks</a></li>
+              <li><a href="http://blogs.apache.org/samza">Blog</a></li>
+            </ul>
+
+            <h1><i class="fa fa-comments"></i> Community</h1>
+            <ul>
+              <li><a href="/community/mailing-lists.html">Mailing Lists</a></li>
+              <li><a href="/community/irc.html">IRC</a></li>
+              <li><a href="https://issues.apache.org/jira/browse/SAMZA">Bugs</a></li>
+              <li><a href="https://cwiki.apache.org/confluence/display/SAMZA/Powered+By">Powered by</a></li>
+              <li><a href="https://cwiki.apache.org/confluence/display/SAMZA/Ecosystem">Ecosystem</a></li>
+              <li><a href="/community/committers.html">Committers</a></li>
+            </ul>
+
+            <h1><i class="fa fa-code"></i> Contribute</h1>
+            <ul>
+              <li><a href="/contribute/contributors-corner.html">Contributor's Corner</a></li>
+              <li><a href="/contribute/coding-guide.html">Coding Guide</a></li>
+              <li><a href="/contribute/design-documents.html">Design Documents</a></li>
+              <li><a href="/contribute/code.html">Code</a></li>
+              <li><a href="/contribute/tests.html">Tests</a></li>
+            </ul>
+
+            <h1><i class="fa fa-history"></i> Archive</h1>
+            <ul>
+              <li><a href="/archive/index.html#latest">latest</a></li>
+              <li><a href="/archive/index.html#14">0.14</a></li>
+              <li><a href="/archive/index.html#13">0.13</a></li>
+              <li><a href="/archive/index.html#12">0.12</a></li>
+              <li><a href="/archive/index.html#11">0.11</a></li>
+              <li><a href="/archive/index.html#10">0.10</a></li>
+              <li><a href="/archive/index.html#09">0.9</a></li>
+              <li><a href="/archive/index.html#08">0.8</a></li>
+              <li><a href="/archive/index.html#07">0.7</a></li>
+            </ul>
+          </div>
+
+          <div class="content">
+            <!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<h2>Connecting to Eventhubs</h2>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<p>You can configure your Samza jobs to process data from <a href="https://docs.microsoft.com/en-us/azure/event-hubs/event-hubs-features">Azure Eventhubs</a>, Microsoft&rsquo;s data streaming service. An <code>event hub</code> is similar to a Kafka topic and can have multiple partitions with producers and consumers. Each message produced or consumed from an event hub is an instance of <a href="https://docs.microsoft.com/en-us/java/api/com.microsoft.azure.eventhubs._event_data">EventData</a>. </p>
+
+<h3 id="consuming-from-eventhubs:">Consuming from EventHubs:</h3>
+
+<p>Samza&rsquo;s <a href="https://github.com/apache/samza/blob/master/samza-azure/src/main/java/org/apache/samza/system/eventhub/consumer/EventHubSystemConsumer.java">EventHubSystemConsumer</a> wraps the EventData into an <a href="https://github.com/apache/samza/blob/master/samza-azure/src/main/java/org/apache/samza/system/eventhub/consumer/EventHubIncomingMessageEnvelope.java">EventHubIncomingMessageEnvelope</a>. The key of the message is set to the partition key of the EventData. The message is obtained from the EventData body. </p>
+
+<p>To configure Samza to configure from EventHub streams: </p>
+<div class="highlight"><pre><code class="language-text" data-lang="text"># define an event hub system factory with your identifier. eg: eh-system
+systems.eh-system.samza.factory=org.apache.samza.system.eventhub.EventHubSystemFactory
+
+# define your streams
+systems.eh-system.stream.list=input0, output0
+
+# define required properties for your streams
+systems.eh-system.streams.input0.eventhubs.namespace=YOUR-STREAM-NAMESPACE
+systems.eh-system.streams.input0.eventhubs.entitypath=YOUR-ENTITY-NAME
+systems.eh-system.streams.input0.eventhubs.sas.keyname=YOUR-SAS-KEY-NAME
+systems.eh-system.streams.input0.eventhubs.sas.token=YOUR-SAS-KEY-TOKEN
+
+systems.eh-system.streams.output0.eventhubs.namespace=YOUR-STREAM-NAMESPACE
+systems.eh-system.streams.output0.eventhubs.entitypath=YOUR-ENTITY-NAME
+systems.eh-system.streams.output0.eventhubs.sas.keyname=YOUR-SAS-KEY-NAME
+systems.eh-system.streams.output0.eventhubs.sas.token=YOUR-SAS-KEY-TOKEN
+</code></pre></div>
+<p>The tuple required to access the Eventhubs entity per stream must be provided, namely the fields <code>YOUR-STREAM-NAMESPACE</code>, <code>YOUR-ENTITY-NAME</code>, <code>YOUR-SAS-KEY-NAME</code>, <code>YOUR-SAS-KEY-TOKEN</code>.</p>
+
+<h3 id="producing-to-eventhubs:">Producing to EventHubs:</h3>
+
+<p>Similarly, you can also configure your Samza job to write to EventHubs.<br>
+<code>
+OutgoingMessageEnvelope envelope = new OutgoingMessageEnvelope(new SystemStream(&quot;eh-system&quot;, &quot;output0&quot;), key, message);
+collector.send(envelope);
+</code></p>
+
+<p>Each <a href="https://samza.apache.org/learn/documentation/latest/api/javadocs/org/apache/samza/system/OutgoingMessageEnvelope.html">OutgoingMessageEnvelope</a> is converted into an <a href="https://docs.microsoft.com/en-us/java/api/com.microsoft.azure.eventhubs._event_data">EventData</a> instance whose body is set to the <code>message</code> in the envelope. Additionally, the <code>key</code> and the <code>produce timestamp</code> are set as properties in the EventData before sending it to EventHubs.</p>
+
+<h3 id="advanced-configuration:">Advanced configuration:</h3>
+
+<h5 id="producer-partitioning:">Producer partitioning:</h5>
+
+<p>The <code>partition.method</code> property determines how outgoing messages are partitioned. Valid values for this config are <code>EVENT_HUB_HASHING</code>, <code>PARTITION_KEY_AS_PARTITION</code> or <code>ROUND_ROBIN</code>. </p>
+
+<p><code>EVENT_HUB_HASHING</code>: By default, Samza computes the partition for an outgoing message based on the hash of its partition-key. This ensures that events with the same key are sent to the same partition. If this option is chosen, the partition key should be a string. If the partition key is not set, the key in the message is used for partitioning.</p>
+
+<p><code>PARTITION_KEY_AS_PARTITION</code>: In this method, each message is sent to the partition specified by its partition key. This requires the partition key to be an integer. If the key is greater than the number of partitions, a modulo operation will be performed on the key. Similar to EVENT<em>HUB</em>HASHING, the key in the message is used if the partition key is not specified.</p>
+
+<p><code>ROUND_ROBIN</code>: In this method, outgoing messages are distributed in a round-robin across all partitions. The key and the partition key in the message are ignored.</p>
+
+<p><img src="/img/latest/learn/documentation/azure/eventhub_send_methods.png" alt="diagram-medium"></p>
+<div class="highlight"><pre><code class="language-text" data-lang="text">systems.eh-system.partition.method = EVENT_HUB_HASHING
+</code></pre></div>
+<h5 id="consumer-groups:">Consumer groups:</h5>
+
+<p>Eventhub supports a notion of <a href="https://docs.microsoft.com/en-us/azure/event-hubs/event-hubs-features#consumer-groups">consumer groups</a> which enable multiple applications have their own view of the event stream. Each event hub stream has a pre-defined consumer group named <code>$Default</code>. You can define your own consumer group for your job and configure a <code>eventhubs.consumer.group</code>  </p>
+<div class="highlight"><pre><code class="language-text" data-lang="text">systems.eh-system.streams.eh-input0.eventhubs.consumer.group = my-group
+</code></pre></div>
+<h5 id="serde:">Serde:</h5>
+
+<p>By default, the messages from EventHubs are sent and received as byte arrays. You can configure a serializer and deserializer for your message by setting a value for <code>msg.serde</code> for your stream. </p>
+<div class="highlight"><pre><code class="language-text" data-lang="text">streams.input0.samza.msg.serde = json
+streams.output0.samza.msg.serde = json
+</code></pre></div>
+<h5 id="consumer-buffer-size:">Consumer buffer size:</h5>
+
+<p>When the consumer reads a message from event hubs, it appends them to a shared producer-consumer buffer corresponding to its partition. This config determines the per-partition queue size. Setting a higher value for this config typically achieves a higher throughput at the expense of increased on-heap memory.</p>
+<div class="highlight"><pre><code class="language-text" data-lang="text">systems.eh-system.eventhubs.receive.queue.size = 10
+</code></pre></div>
+<p>For the list of all configs, check out the configuration table page <a href="../jobs/configuration-table.html">here</a></p>
+
+<h3 id="azure-eventhubs-hello-samza-example">Azure Eventhubs Hello-Samza Example</h3>
+
+<p>The <a href="https://github.com/apache/samza-hello-samza">hello-samza</a> project contains an example of a high level job that consumes and produces to Eventhub using the Zookeeper deployment model.</p>
+
+<h4 id="get-the-code">Get the Code</h4>
+
+<p>Let&rsquo;s get started by cloning the hello-samza project</p>
+<div class="highlight"><pre><code class="language-text" data-lang="text">git clone https://git.apache.org/samza-hello-samza.git hello-samza
+cd hello-samza
+git checkout latest
+</code></pre></div>
+<p>The project comes up with numerous examples and for this tutorial, we will pick the Azure application.</p>
+
+<h4 id="setting-up-the-deployment-environment">Setting up the Deployment Environment</h4>
+
+<p>For our Azure application, we require <a href="http://zookeeper.apache.org/">ZooKeeper</a>. The hello-samza project comes with a script called &ldquo;grid&rdquo; to help with the environment setup</p>
+<div class="highlight"><pre><code class="language-text" data-lang="text">./bin/grid standalone
+</code></pre></div>
+<p>This command will download, install, and start ZooKeeper and Kafka. It will also check out the latest version of Samza and build it. All package files will be put in a sub-directory called &ldquo;deploy&rdquo; inside hello-samza&rsquo;s root folder.</p>
+
+<p>If you get a complaint that JAVA_HOME is not set, then you&rsquo;ll need to set it to the path where Java is installed on your system.</p>
+
+<h4 id="configuring-the-azure-application">Configuring the Azure application</h4>
+
+<p>Here are the configs you must set before building the project. Configure these in the <code>src/main/config/azure-application-local-runner.properties</code> file.</p>
+<div class="highlight"><pre><code class="language-text" data-lang="text"># Add your EventHubs input stream credentials here
+systems.eventhubs.streams.input-stream.eventhubs.namespace=YOUR-STREAM-NAMESPACE
+systems.eventhubs.streams.input-stream.eventhubs.entitypath=YOUR-ENTITY-NAME
+systems.eventhubs.streams.input-stream.eventhubs.sas.keyname=YOUR-SAS-KEY-NAME
+systems.eventhubs.streams.input-stream.eventhubs.sas.token=YOUR-SAS-KEY-TOKEN
+
+# Add your EventHubs output stream credentials here
+systems.eventhubs.streams.output-stream.eventhubs.namespace=YOUR-STREAM-NAMESPACE
+systems.eventhubs.streams.output-stream.eventhubs.entitypath=YOUR-ENTITY-NAME
+systems.eventhubs.streams.output-stream.eventhubs.sas.keyname=YOUR-SAS-KEY-NAME
+systems.eventhubs.streams.output-stream.eventhubs.sas.token=YOUR-SAS-KEY-TOKEN
+</code></pre></div>
+<p>Optionally, you may also use the Azure Checkpoint Manager. Otherwise, comment out both these lines.</p>
+<div class="highlight"><pre><code class="language-text" data-lang="text"># Azure Table Checkpoint Manager
+task.checkpoint.factory=org.apache.samza.checkpoint.azure.AzureCheckpointManagerFactory
+azure.storage.connect=YOUR-STORAGE-ACCOUNT-CONNECTION-STRING
+</code></pre></div>
+<h4 id="building-the-hello-samza-project">Building the Hello Samza Project</h4>
+
+<p>With the environment setup complete, let us move on to building the hello-samza project. Execute the following commands:</p>
+<div class="highlight"><pre><code class="language-text" data-lang="text">mvn clean package
+mkdir -p deploy/samza
+tar -xvf ./target/hello-samza-0.14.1-SNAPSHOT-dist.tar.gz -C deploy/samza
+</code></pre></div>
+<p>We are now all set to deploy the application locally.</p>
+
+<h4 id="running-the-azure-application">Running the Azure application</h4>
+
+<p>In order to run the application, we will use the <em>run-azure-application</em> script.</p>
+<div class="highlight"><pre><code class="language-text" data-lang="text">./deploy/samza/bin/run-azure-application.sh
+</code></pre></div>
+<p>The above command executes the helper script which invokes the <em>AzureZKLocalApplication</em> main class, which starts the <em>AzureApplication</em>. This application filters out the messages consumed without keys, prints them out and send them the configured output stream.</p>
+
+<p>The messages consumed should be printed in the following format:
+<code>
+Sending: 
+Received Key: &lt;KEY&gt;
+Received Message: &lt;VALUE&gt;
+</code></p>
+
+<h4 id="shutdown">Shutdown</h4>
+
+<p>This application can be shutdown by terminating the <em>run-azure-application</em> script.
+We can use the <em>grid</em> script to tear down the local environment (<a href="http://kafka.apache.org/">Kafka</a> and <a href="http://zookeeper.apache.org/">Zookeeper</a>).</p>
+<div class="highlight"><pre><code class="language-text" data-lang="text">bin/grid stop all
+</code></pre></div>
+
+          </div>
+        </div>
+
+      </div><!-- /.wrapper-content -->
+    </div><!-- /.wrapper -->
+
+    <div class="footer">
+      <div class="container">
+        <!-- nothing for now. -->
+      </div>
+    </div>
+
+  
+    <script>
+      $( document ).ready(function() {
+        if ( $.fn.urlExists( "/learn/documentation/0.14/azure/eventhubs.html" ) ) {
+          $("#switch-version-button").addClass("fa fa-history masthead-icon");
+        }
+      });
+
+      /* a function to test whether the url exists or not */
+      (function( $ ) {
+        $.fn.urlExists = function(url) {
+          var http = new XMLHttpRequest();
+          http.open('HEAD', url, false);
+          http.send();
+          return http.status != 404;
+        };
+      }( jQuery ));
+    </script>
+  
+
+    <!-- Google Analytics -->
+    <script>
+      (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+      m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+      })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+      ga('create', 'UA-43122768-1', 'apache.org');
+      ga('send', 'pageview');
+
+    </script>
+  </body>
+</html>

Modified: samza/site/learn/documentation/latest/comparisons/introduction.html
URL: http://svn.apache.org/viewvc/samza/site/learn/documentation/latest/comparisons/introduction.html?rev=1820026&r1=1820025&r2=1820026&view=diff
==============================================================================
--- samza/site/learn/documentation/latest/comparisons/introduction.html (original)
+++ samza/site/learn/documentation/latest/comparisons/introduction.html Thu Jan  4 00:31:39 2018
@@ -46,7 +46,7 @@
                   
                     
                   
-                  <a href="http://samza.apache.org/learn/documentation/0.13/comparisons/introduction.html"><i id="switch-version-button"></i></a>
+                  <a href="http://samza.apache.org/learn/documentation/0.14/comparisons/introduction.html"><i id="switch-version-button"></i></a>
                    <!-- links for the navigation bar -->
                 
 
@@ -99,6 +99,7 @@
             <h1><i class="fa fa-history"></i> Archive</h1>
             <ul>
               <li><a href="/archive/index.html#latest">latest</a></li>
+              <li><a href="/archive/index.html#14">0.14</a></li>
               <li><a href="/archive/index.html#13">0.13</a></li>
               <li><a href="/archive/index.html#12">0.12</a></li>
               <li><a href="/archive/index.html#11">0.11</a></li>
@@ -224,7 +225,7 @@ example above, where you have a stream o
   
     <script>
       $( document ).ready(function() {
-        if ( $.fn.urlExists( "/learn/documentation/0.13/comparisons/introduction.html" ) ) {
+        if ( $.fn.urlExists( "/learn/documentation/0.14/comparisons/introduction.html" ) ) {
           $("#switch-version-button").addClass("fa fa-history masthead-icon");
         }
       });

Modified: samza/site/learn/documentation/latest/comparisons/mupd8.html
URL: http://svn.apache.org/viewvc/samza/site/learn/documentation/latest/comparisons/mupd8.html?rev=1820026&r1=1820025&r2=1820026&view=diff
==============================================================================
--- samza/site/learn/documentation/latest/comparisons/mupd8.html (original)
+++ samza/site/learn/documentation/latest/comparisons/mupd8.html Thu Jan  4 00:31:39 2018
@@ -46,7 +46,7 @@
                   
                     
                   
-                  <a href="http://samza.apache.org/learn/documentation/0.13/comparisons/mupd8.html"><i id="switch-version-button"></i></a>
+                  <a href="http://samza.apache.org/learn/documentation/0.14/comparisons/mupd8.html"><i id="switch-version-button"></i></a>
                    <!-- links for the navigation bar -->
                 
 
@@ -99,6 +99,7 @@
             <h1><i class="fa fa-history"></i> Archive</h1>
             <ul>
               <li><a href="/archive/index.html#latest">latest</a></li>
+              <li><a href="/archive/index.html#14">0.14</a></li>
               <li><a href="/archive/index.html#13">0.13</a></li>
               <li><a href="/archive/index.html#12">0.12</a></li>
               <li><a href="/archive/index.html#11">0.11</a></li>
@@ -230,7 +231,7 @@
   
     <script>
       $( document ).ready(function() {
-        if ( $.fn.urlExists( "/learn/documentation/0.13/comparisons/mupd8.html" ) ) {
+        if ( $.fn.urlExists( "/learn/documentation/0.14/comparisons/mupd8.html" ) ) {
           $("#switch-version-button").addClass("fa fa-history masthead-icon");
         }
       });

Modified: samza/site/learn/documentation/latest/comparisons/spark-streaming.html
URL: http://svn.apache.org/viewvc/samza/site/learn/documentation/latest/comparisons/spark-streaming.html?rev=1820026&r1=1820025&r2=1820026&view=diff
==============================================================================
--- samza/site/learn/documentation/latest/comparisons/spark-streaming.html (original)
+++ samza/site/learn/documentation/latest/comparisons/spark-streaming.html Thu Jan  4 00:31:39 2018
@@ -46,7 +46,7 @@
                   
                     
                   
-                  <a href="http://samza.apache.org/learn/documentation/0.13/comparisons/spark-streaming.html"><i id="switch-version-button"></i></a>
+                  <a href="http://samza.apache.org/learn/documentation/0.14/comparisons/spark-streaming.html"><i id="switch-version-button"></i></a>
                    <!-- links for the navigation bar -->
                 
 
@@ -99,6 +99,7 @@
             <h1><i class="fa fa-history"></i> Archive</h1>
             <ul>
               <li><a href="/archive/index.html#latest">latest</a></li>
+              <li><a href="/archive/index.html#14">0.14</a></li>
               <li><a href="/archive/index.html#13">0.13</a></li>
               <li><a href="/archive/index.html#12">0.12</a></li>
               <li><a href="/archive/index.html#11">0.11</a></li>
@@ -265,7 +266,7 @@
   
     <script>
       $( document ).ready(function() {
-        if ( $.fn.urlExists( "/learn/documentation/0.13/comparisons/spark-streaming.html" ) ) {
+        if ( $.fn.urlExists( "/learn/documentation/0.14/comparisons/spark-streaming.html" ) ) {
           $("#switch-version-button").addClass("fa fa-history masthead-icon");
         }
       });

Modified: samza/site/learn/documentation/latest/comparisons/storm.html
URL: http://svn.apache.org/viewvc/samza/site/learn/documentation/latest/comparisons/storm.html?rev=1820026&r1=1820025&r2=1820026&view=diff
==============================================================================
--- samza/site/learn/documentation/latest/comparisons/storm.html (original)
+++ samza/site/learn/documentation/latest/comparisons/storm.html Thu Jan  4 00:31:39 2018
@@ -46,7 +46,7 @@
                   
                     
                   
-                  <a href="http://samza.apache.org/learn/documentation/0.13/comparisons/storm.html"><i id="switch-version-button"></i></a>
+                  <a href="http://samza.apache.org/learn/documentation/0.14/comparisons/storm.html"><i id="switch-version-button"></i></a>
                    <!-- links for the navigation bar -->
                 
 
@@ -99,6 +99,7 @@
             <h1><i class="fa fa-history"></i> Archive</h1>
             <ul>
               <li><a href="/archive/index.html#latest">latest</a></li>
+              <li><a href="/archive/index.html#14">0.14</a></li>
               <li><a href="/archive/index.html#13">0.13</a></li>
               <li><a href="/archive/index.html#12">0.12</a></li>
               <li><a href="/archive/index.html#11">0.11</a></li>
@@ -268,7 +269,7 @@
   
     <script>
       $( document ).ready(function() {
-        if ( $.fn.urlExists( "/learn/documentation/0.13/comparisons/storm.html" ) ) {
+        if ( $.fn.urlExists( "/learn/documentation/0.14/comparisons/storm.html" ) ) {
           $("#switch-version-button").addClass("fa fa-history masthead-icon");
         }
       });

Modified: samza/site/learn/documentation/latest/container/checkpointing.html
URL: http://svn.apache.org/viewvc/samza/site/learn/documentation/latest/container/checkpointing.html?rev=1820026&r1=1820025&r2=1820026&view=diff
==============================================================================
--- samza/site/learn/documentation/latest/container/checkpointing.html (original)
+++ samza/site/learn/documentation/latest/container/checkpointing.html Thu Jan  4 00:31:39 2018
@@ -46,7 +46,7 @@
                   
                     
                   
-                  <a href="http://samza.apache.org/learn/documentation/0.13/container/checkpointing.html"><i id="switch-version-button"></i></a>
+                  <a href="http://samza.apache.org/learn/documentation/0.14/container/checkpointing.html"><i id="switch-version-button"></i></a>
                    <!-- links for the navigation bar -->
                 
 
@@ -99,6 +99,7 @@
             <h1><i class="fa fa-history"></i> Archive</h1>
             <ul>
               <li><a href="/archive/index.html#latest">latest</a></li>
+              <li><a href="/archive/index.html#14">0.14</a></li>
               <li><a href="/archive/index.html#13">0.13</a></li>
               <li><a href="/archive/index.html#12">0.12</a></li>
               <li><a href="/archive/index.html#11">0.11</a></li>
@@ -281,7 +282,7 @@ Note that the callback will happen after
   
     <script>
       $( document ).ready(function() {
-        if ( $.fn.urlExists( "/learn/documentation/0.13/container/checkpointing.html" ) ) {
+        if ( $.fn.urlExists( "/learn/documentation/0.14/container/checkpointing.html" ) ) {
           $("#switch-version-button").addClass("fa fa-history masthead-icon");
         }
       });

Modified: samza/site/learn/documentation/latest/container/coordinator-stream.html
URL: http://svn.apache.org/viewvc/samza/site/learn/documentation/latest/container/coordinator-stream.html?rev=1820026&r1=1820025&r2=1820026&view=diff
==============================================================================
--- samza/site/learn/documentation/latest/container/coordinator-stream.html (original)
+++ samza/site/learn/documentation/latest/container/coordinator-stream.html Thu Jan  4 00:31:39 2018
@@ -46,7 +46,7 @@
                   
                     
                   
-                  <a href="http://samza.apache.org/learn/documentation/0.13/container/coordinator-stream.html"><i id="switch-version-button"></i></a>
+                  <a href="http://samza.apache.org/learn/documentation/0.14/container/coordinator-stream.html"><i id="switch-version-button"></i></a>
                    <!-- links for the navigation bar -->
                 
 
@@ -99,6 +99,7 @@
             <h1><i class="fa fa-history"></i> Archive</h1>
             <ul>
               <li><a href="/archive/index.html#latest">latest</a></li>
+              <li><a href="/archive/index.html#14">0.14</a></li>
               <li><a href="/archive/index.html#13">0.13</a></li>
               <li><a href="/archive/index.html#12">0.12</a></li>
               <li><a href="/archive/index.html#11">0.11</a></li>
@@ -294,7 +295,7 @@
   
     <script>
       $( document ).ready(function() {
-        if ( $.fn.urlExists( "/learn/documentation/0.13/container/coordinator-stream.html" ) ) {
+        if ( $.fn.urlExists( "/learn/documentation/0.14/container/coordinator-stream.html" ) ) {
           $("#switch-version-button").addClass("fa fa-history masthead-icon");
         }
       });

Modified: samza/site/learn/documentation/latest/container/event-loop.html
URL: http://svn.apache.org/viewvc/samza/site/learn/documentation/latest/container/event-loop.html?rev=1820026&r1=1820025&r2=1820026&view=diff
==============================================================================
--- samza/site/learn/documentation/latest/container/event-loop.html (original)
+++ samza/site/learn/documentation/latest/container/event-loop.html Thu Jan  4 00:31:39 2018
@@ -46,7 +46,7 @@
                   
                     
                   
-                  <a href="http://samza.apache.org/learn/documentation/0.13/container/event-loop.html"><i id="switch-version-button"></i></a>
+                  <a href="http://samza.apache.org/learn/documentation/0.14/container/event-loop.html"><i id="switch-version-button"></i></a>
                    <!-- links for the navigation bar -->
                 
 
@@ -99,6 +99,7 @@
             <h1><i class="fa fa-history"></i> Archive</h1>
             <ul>
               <li><a href="/archive/index.html#latest">latest</a></li>
+              <li><a href="/archive/index.html#14">0.14</a></li>
               <li><a href="/archive/index.html#13">0.13</a></li>
               <li><a href="/archive/index.html#12">0.12</a></li>
               <li><a href="/archive/index.html#11">0.11</a></li>
@@ -216,7 +217,7 @@
   
     <script>
       $( document ).ready(function() {
-        if ( $.fn.urlExists( "/learn/documentation/0.13/container/event-loop.html" ) ) {
+        if ( $.fn.urlExists( "/learn/documentation/0.14/container/event-loop.html" ) ) {
           $("#switch-version-button").addClass("fa fa-history masthead-icon");
         }
       });

Modified: samza/site/learn/documentation/latest/container/jmx.html
URL: http://svn.apache.org/viewvc/samza/site/learn/documentation/latest/container/jmx.html?rev=1820026&r1=1820025&r2=1820026&view=diff
==============================================================================
--- samza/site/learn/documentation/latest/container/jmx.html (original)
+++ samza/site/learn/documentation/latest/container/jmx.html Thu Jan  4 00:31:39 2018
@@ -46,7 +46,7 @@
                   
                     
                   
-                  <a href="http://samza.apache.org/learn/documentation/0.13/container/jmx.html"><i id="switch-version-button"></i></a>
+                  <a href="http://samza.apache.org/learn/documentation/0.14/container/jmx.html"><i id="switch-version-button"></i></a>
                    <!-- links for the navigation bar -->
                 
 
@@ -99,6 +99,7 @@
             <h1><i class="fa fa-history"></i> Archive</h1>
             <ul>
               <li><a href="/archive/index.html#latest">latest</a></li>
+              <li><a href="/archive/index.html#14">0.14</a></li>
               <li><a href="/archive/index.html#13">0.13</a></li>
               <li><a href="/archive/index.html#12">0.12</a></li>
               <li><a href="/archive/index.html#11">0.11</a></li>
@@ -179,7 +180,7 @@
   
     <script>
       $( document ).ready(function() {
-        if ( $.fn.urlExists( "/learn/documentation/0.13/container/jmx.html" ) ) {
+        if ( $.fn.urlExists( "/learn/documentation/0.14/container/jmx.html" ) ) {
           $("#switch-version-button").addClass("fa fa-history masthead-icon");
         }
       });

Modified: samza/site/learn/documentation/latest/container/metrics-table.html
URL: http://svn.apache.org/viewvc/samza/site/learn/documentation/latest/container/metrics-table.html?rev=1820026&r1=1820025&r2=1820026&view=diff
==============================================================================
--- samza/site/learn/documentation/latest/container/metrics-table.html (original)
+++ samza/site/learn/documentation/latest/container/metrics-table.html Thu Jan  4 00:31:39 2018
@@ -142,6 +142,8 @@
     <li><a href="#bootstrapping-chooser-metrics">BootstrappingChooserMetrics</a></li>
     <li><a href="#hdfs-system-producer-metrics">HdfsSystemProducerMetrics</a></li>
     <li><a href="#elasticsearch-system-producer-metrics">ElasticsearchSystemProducerMetrics</a></li>
+    <li><a href="#zookeeper-client-metrics">ZookeeperClientMetrics</a></li>
+    <li><a href="#zookeeper-job-coordinator-metrics">ZkJobCoordinatorMetrics</a></li>
 </ul>
 <p>Words highlighted like <span class="system">this</span> are placeholders for your own variable names defined in configuration file or system variables defined while starting the job.</p>
 <p id="average-time" style="color: #00a">Note: Average time is calculated for the current time window (set to 300 seconds)</p>
@@ -215,6 +217,10 @@
         <td>physical-memory-mb</td>
         <td>The physical memory used by the Samza container process (native + on heap) (in megabytes)</td>
     </tr>
+    <tr>
+        <td>container-startup-time</td>
+        <td><a href="#average-time">Average time</a> spent for the container to startup</td>
+    </tr>
 
     <tr>
         <th colspan="2" class="section" id="job-coordinator">job-coordinator</th>
@@ -339,6 +345,18 @@
         <td>threads-terminated</td>
         <td>Current number of terminated threads</td>
     </tr>
+    <tr>
+        <td>process-cpu-usage</td>
+        <td>Current CPU usage of the JVM process as a percentage from 0 to 100. The percentage represents the proportion of executed ticks by the JVM process to the total ticks across all CPUs. A negative number indicates the value was not available from the operating system. For more detail, see the JavaDoc for com.sun.management.OperatingSystemMXBean.</td>
+    </tr>
+    <tr>
+        <td>system-cpu-usage</td>
+        <td>Current CPU usage of the all processes in the whole system as a percentage from 0 to 100. The percentage represents the proportion of executed ticks by all processes to the total ticks across all CPUs. A negative number indicates the value was not available from the operating system. For more detail, see the JavaDoc for com.sun.management.OperatingSystemMXBean.</td>
+    </tr>
+    <tr>
+        <td>open-file-descriptor-count</td>
+        <td>Current number of open file descriptors</td>
+    </tr>
 
     <tr>
         <th colspan="2" class="section" id="system-consumers-metrics">org.apache.samza.system.SystemConsumersMetrics</th>
@@ -887,6 +905,49 @@
         <td><span class="system">system</span>-version-conflicts</td>
         <td>Number of times the request could not be completed due to a conflict with the current state of the document</td>
     </tr>
+
+    <tr>
+        <th colspan="2" class="section" id="zookeeper-client-metrics">org.apache.samza.zk.ZkUtilsMetrics</th>
+    </tr>
+    <tr>
+        <td>reads</td>
+        <td>Number of reads from Zookeeper</td>
+    </tr>
+    <tr>
+        <td>writes</td>
+        <td>Number of writes to Zookeeper</td>
+    </tr>
+    <tr>
+        <td>subscriptions</td>
+        <td>Number of subscriptions to znodes in Zookeeper</td>
+    </tr>
+    <tr>
+        <td>zk-connection-errors</td>
+        <td>Number of Zookeeper connection errors</td>
+    </tr>
+    <tr>
+        <th colspan="2" class="section" id="zookeeper-job-coordinator-metrics">org.apache.samza.zk.ZkJobCoordinatorMetrics</th>
+    </tr>
+    <tr>
+        <td>is-leader</td>
+        <td>Denotes if the processor is a leader or not</td>
+    </tr>
+    <tr>
+        <td>barrier-creation</td>
+        <td>Number of times a barrier was created by the leader</td>
+    </tr>
+    <tr>
+        <td>barrier-state-change</td>
+        <td>Number of times the barrier state changed</td>
+    </tr>
+    <tr>
+        <td>barrier-error</td>
+        <td>Number of times the barrier encountered an error while attaining consensus on the job model version</td>
+    </tr>
+    <tr>
+        <td>single-barrier-rebalancing-time</td>
+        <td><a href="#average-time">Average time</a> taken for all the processors to get the latest version of the job model after single processor change (without the occurence of a barrier timeout)</td>
+    </tr>
     </tbody>
 </table>
 </body>

Modified: samza/site/learn/documentation/latest/container/metrics.html
URL: http://svn.apache.org/viewvc/samza/site/learn/documentation/latest/container/metrics.html?rev=1820026&r1=1820025&r2=1820026&view=diff
==============================================================================
--- samza/site/learn/documentation/latest/container/metrics.html (original)
+++ samza/site/learn/documentation/latest/container/metrics.html Thu Jan  4 00:31:39 2018
@@ -46,7 +46,7 @@
                   
                     
                   
-                  <a href="http://samza.apache.org/learn/documentation/0.13/container/metrics.html"><i id="switch-version-button"></i></a>
+                  <a href="http://samza.apache.org/learn/documentation/0.14/container/metrics.html"><i id="switch-version-button"></i></a>
                    <!-- links for the navigation bar -->
                 
 
@@ -99,6 +99,7 @@
             <h1><i class="fa fa-history"></i> Archive</h1>
             <ul>
               <li><a href="/archive/index.html#latest">latest</a></li>
+              <li><a href="/archive/index.html#14">0.14</a></li>
               <li><a href="/archive/index.html#13">0.13</a></li>
               <li><a href="/archive/index.html#12">0.12</a></li>
               <li><a href="/archive/index.html#11">0.11</a></li>
@@ -238,7 +239,7 @@
   
     <script>
       $( document ).ready(function() {
-        if ( $.fn.urlExists( "/learn/documentation/0.13/container/metrics.html" ) ) {
+        if ( $.fn.urlExists( "/learn/documentation/0.14/container/metrics.html" ) ) {
           $("#switch-version-button").addClass("fa fa-history masthead-icon");
         }
       });

Modified: samza/site/learn/documentation/latest/container/samza-container.html
URL: http://svn.apache.org/viewvc/samza/site/learn/documentation/latest/container/samza-container.html?rev=1820026&r1=1820025&r2=1820026&view=diff
==============================================================================
--- samza/site/learn/documentation/latest/container/samza-container.html (original)
+++ samza/site/learn/documentation/latest/container/samza-container.html Thu Jan  4 00:31:39 2018
@@ -46,7 +46,7 @@
                   
                     
                   
-                  <a href="http://samza.apache.org/learn/documentation/0.13/container/samza-container.html"><i id="switch-version-button"></i></a>
+                  <a href="http://samza.apache.org/learn/documentation/0.14/container/samza-container.html"><i id="switch-version-button"></i></a>
                    <!-- links for the navigation bar -->
                 
 
@@ -99,6 +99,7 @@
             <h1><i class="fa fa-history"></i> Archive</h1>
             <ul>
               <li><a href="/archive/index.html#latest">latest</a></li>
+              <li><a href="/archive/index.html#14">0.14</a></li>
               <li><a href="/archive/index.html#13">0.13</a></li>
               <li><a href="/archive/index.html#12">0.12</a></li>
               <li><a href="/archive/index.html#11">0.11</a></li>
@@ -255,7 +256,7 @@
   
     <script>
       $( document ).ready(function() {
-        if ( $.fn.urlExists( "/learn/documentation/0.13/container/samza-container.html" ) ) {
+        if ( $.fn.urlExists( "/learn/documentation/0.14/container/samza-container.html" ) ) {
           $("#switch-version-button").addClass("fa fa-history masthead-icon");
         }
       });

Modified: samza/site/learn/documentation/latest/container/serialization.html
URL: http://svn.apache.org/viewvc/samza/site/learn/documentation/latest/container/serialization.html?rev=1820026&r1=1820025&r2=1820026&view=diff
==============================================================================
--- samza/site/learn/documentation/latest/container/serialization.html (original)
+++ samza/site/learn/documentation/latest/container/serialization.html Thu Jan  4 00:31:39 2018
@@ -46,7 +46,7 @@
                   
                     
                   
-                  <a href="http://samza.apache.org/learn/documentation/0.13/container/serialization.html"><i id="switch-version-button"></i></a>
+                  <a href="http://samza.apache.org/learn/documentation/0.14/container/serialization.html"><i id="switch-version-button"></i></a>
                    <!-- links for the navigation bar -->
                 
 
@@ -99,6 +99,7 @@
             <h1><i class="fa fa-history"></i> Archive</h1>
             <ul>
               <li><a href="/archive/index.html#latest">latest</a></li>
+              <li><a href="/archive/index.html#14">0.14</a></li>
               <li><a href="/archive/index.html#13">0.13</a></li>
               <li><a href="/archive/index.html#12">0.12</a></li>
               <li><a href="/archive/index.html#11">0.11</a></li>
@@ -261,7 +262,7 @@
   
     <script>
       $( document ).ready(function() {
-        if ( $.fn.urlExists( "/learn/documentation/0.13/container/serialization.html" ) ) {
+        if ( $.fn.urlExists( "/learn/documentation/0.14/container/serialization.html" ) ) {
           $("#switch-version-button").addClass("fa fa-history masthead-icon");
         }
       });

Modified: samza/site/learn/documentation/latest/container/state-management.html
URL: http://svn.apache.org/viewvc/samza/site/learn/documentation/latest/container/state-management.html?rev=1820026&r1=1820025&r2=1820026&view=diff
==============================================================================
--- samza/site/learn/documentation/latest/container/state-management.html (original)
+++ samza/site/learn/documentation/latest/container/state-management.html Thu Jan  4 00:31:39 2018
@@ -46,7 +46,7 @@
                   
                     
                   
-                  <a href="http://samza.apache.org/learn/documentation/0.13/container/state-management.html"><i id="switch-version-button"></i></a>
+                  <a href="http://samza.apache.org/learn/documentation/0.14/container/state-management.html"><i id="switch-version-button"></i></a>
                    <!-- links for the navigation bar -->
                 
 
@@ -99,6 +99,7 @@
             <h1><i class="fa fa-history"></i> Archive</h1>
             <ul>
               <li><a href="/archive/index.html#latest">latest</a></li>
+              <li><a href="/archive/index.html#14">0.14</a></li>
               <li><a href="/archive/index.html#13">0.13</a></li>
               <li><a href="/archive/index.html#12">0.12</a></li>
               <li><a href="/archive/index.html#11">0.11</a></li>
@@ -436,7 +437,7 @@ Object <span class="nv">value</span> <sp
   
     <script>
       $( document ).ready(function() {
-        if ( $.fn.urlExists( "/learn/documentation/0.13/container/state-management.html" ) ) {
+        if ( $.fn.urlExists( "/learn/documentation/0.14/container/state-management.html" ) ) {
           $("#switch-version-button").addClass("fa fa-history masthead-icon");
         }
       });

Modified: samza/site/learn/documentation/latest/container/streams.html
URL: http://svn.apache.org/viewvc/samza/site/learn/documentation/latest/container/streams.html?rev=1820026&r1=1820025&r2=1820026&view=diff
==============================================================================
--- samza/site/learn/documentation/latest/container/streams.html (original)
+++ samza/site/learn/documentation/latest/container/streams.html Thu Jan  4 00:31:39 2018
@@ -46,7 +46,7 @@
                   
                     
                   
-                  <a href="http://samza.apache.org/learn/documentation/0.13/container/streams.html"><i id="switch-version-button"></i></a>
+                  <a href="http://samza.apache.org/learn/documentation/0.14/container/streams.html"><i id="switch-version-button"></i></a>
                    <!-- links for the navigation bar -->
                 
 
@@ -99,6 +99,7 @@
             <h1><i class="fa fa-history"></i> Archive</h1>
             <ul>
               <li><a href="/archive/index.html#latest">latest</a></li>
+              <li><a href="/archive/index.html#14">0.14</a></li>
               <li><a href="/archive/index.html#13">0.13</a></li>
               <li><a href="/archive/index.html#12">0.12</a></li>
               <li><a href="/archive/index.html#11">0.11</a></li>
@@ -271,7 +272,7 @@
   
     <script>
       $( document ).ready(function() {
-        if ( $.fn.urlExists( "/learn/documentation/0.13/container/streams.html" ) ) {
+        if ( $.fn.urlExists( "/learn/documentation/0.14/container/streams.html" ) ) {
           $("#switch-version-button").addClass("fa fa-history masthead-icon");
         }
       });

Modified: samza/site/learn/documentation/latest/container/windowing.html
URL: http://svn.apache.org/viewvc/samza/site/learn/documentation/latest/container/windowing.html?rev=1820026&r1=1820025&r2=1820026&view=diff
==============================================================================
--- samza/site/learn/documentation/latest/container/windowing.html (original)
+++ samza/site/learn/documentation/latest/container/windowing.html Thu Jan  4 00:31:39 2018
@@ -46,7 +46,7 @@
                   
                     
                   
-                  <a href="http://samza.apache.org/learn/documentation/0.13/container/windowing.html"><i id="switch-version-button"></i></a>
+                  <a href="http://samza.apache.org/learn/documentation/0.14/container/windowing.html"><i id="switch-version-button"></i></a>
                    <!-- links for the navigation bar -->
                 
 
@@ -99,6 +99,7 @@
             <h1><i class="fa fa-history"></i> Archive</h1>
             <ul>
               <li><a href="/archive/index.html#latest">latest</a></li>
+              <li><a href="/archive/index.html#14">0.14</a></li>
               <li><a href="/archive/index.html#13">0.13</a></li>
               <li><a href="/archive/index.html#12">0.12</a></li>
               <li><a href="/archive/index.html#11">0.11</a></li>
@@ -199,7 +200,7 @@
   
     <script>
       $( document ).ready(function() {
-        if ( $.fn.urlExists( "/learn/documentation/0.13/container/windowing.html" ) ) {
+        if ( $.fn.urlExists( "/learn/documentation/0.14/container/windowing.html" ) ) {
           $("#switch-version-button").addClass("fa fa-history masthead-icon");
         }
       });

Added: samza/site/learn/documentation/latest/hadoop/consumer.html
URL: http://svn.apache.org/viewvc/samza/site/learn/documentation/latest/hadoop/consumer.html?rev=1820026&view=auto
==============================================================================
--- samza/site/learn/documentation/latest/hadoop/consumer.html (added)
+++ samza/site/learn/documentation/latest/hadoop/consumer.html Thu Jan  4 00:31:39 2018
@@ -0,0 +1,274 @@
+<!DOCTYPE html>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <title>Samza - Reading from HDFS</title>
+    <link href='/css/ropa-sans.css' rel='stylesheet' type='text/css'/>
+    <link href="/css/bootstrap.min.css" rel="stylesheet"/>
+    <link href="/css/font-awesome.min.css" rel="stylesheet"/>
+    <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
+    <link rel="icon" type="image/png" href="/img/samza-icon.png">
+    <script src="/js/jquery-1.11.1.min.js"></script>
+  </head>
+  <body>
+    <div class="wrapper">
+      <div class="wrapper-content">
+
+        <div class="masthead">
+          <div class="container">
+            <div class="masthead-logo">
+              <a href="/" class="logo">samza</a>
+            </div>
+            <div class="masthead-icons">
+              <div class="pull-right">
+                <a href="/startup/download"><i class="fa fa-arrow-circle-o-down masthead-icon"></i></a>
+                <a href="https://git-wip-us.apache.org/repos/asf?p=samza.git;a=tree" target="_blank"><i class="fa fa-code masthead-icon" style="font-weight: bold;"></i></a>
+                <a href="https://twitter.com/samzastream" target="_blank"><i class="fa fa-twitter masthead-icon"></i></a>
+                <!-- this icon only shows in versioned pages -->
+                
+                  
+                    
+                  
+                  <a href="http://samza.apache.org/learn/documentation/0.14/hadoop/consumer.html"><i id="switch-version-button"></i></a>
+                   <!-- links for the navigation bar -->
+                
+
+              </div>
+            </div>
+          </div><!-- /.container -->
+        </div>
+
+        <div class="container">
+          <div class="menu">
+            <h1><i class="fa fa-rocket"></i> Getting Started</h1>
+            <ul>
+              <li><a href="/startup/hello-samza/latest">Hello Samza</a></li>
+              <li><a href="/startup/download">Download</a></li>
+              <li><a href="/startup/preview">Feature Preview</a></li>
+            </ul>
+
+            <h1><i class="fa fa-book"></i> Learn</h1>
+            <ul>
+              <li><a href="/learn/documentation/latest">Documentation</a></li>
+              <li><a href="/learn/documentation/latest/jobs/configuration-table.html">Configuration</a></li>
+              <li><a href="/learn/documentation/latest/container/metrics-table.html">Metrics</a></li>
+              <li><a href="/learn/documentation/latest/api/javadocs/">Javadocs</a></li>
+              <li><a href="/learn/tutorials/latest">Tutorials</a></li>
+              <li><a href="https://cwiki.apache.org/confluence/display/SAMZA/FAQ">FAQ</a></li>
+              <li><a href="https://cwiki.apache.org/confluence/display/SAMZA/Apache+Samza">Wiki</a></li>
+              <li><a href="https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=51812876">Papers &amp; Talks</a></li>
+              <li><a href="http://blogs.apache.org/samza">Blog</a></li>
+            </ul>
+
+            <h1><i class="fa fa-comments"></i> Community</h1>
+            <ul>
+              <li><a href="/community/mailing-lists.html">Mailing Lists</a></li>
+              <li><a href="/community/irc.html">IRC</a></li>
+              <li><a href="https://issues.apache.org/jira/browse/SAMZA">Bugs</a></li>
+              <li><a href="https://cwiki.apache.org/confluence/display/SAMZA/Powered+By">Powered by</a></li>
+              <li><a href="https://cwiki.apache.org/confluence/display/SAMZA/Ecosystem">Ecosystem</a></li>
+              <li><a href="/community/committers.html">Committers</a></li>
+            </ul>
+
+            <h1><i class="fa fa-code"></i> Contribute</h1>
+            <ul>
+              <li><a href="/contribute/contributors-corner.html">Contributor's Corner</a></li>
+              <li><a href="/contribute/coding-guide.html">Coding Guide</a></li>
+              <li><a href="/contribute/design-documents.html">Design Documents</a></li>
+              <li><a href="/contribute/code.html">Code</a></li>
+              <li><a href="/contribute/tests.html">Tests</a></li>
+            </ul>
+
+            <h1><i class="fa fa-history"></i> Archive</h1>
+            <ul>
+              <li><a href="/archive/index.html#latest">latest</a></li>
+              <li><a href="/archive/index.html#14">0.14</a></li>
+              <li><a href="/archive/index.html#13">0.13</a></li>
+              <li><a href="/archive/index.html#12">0.12</a></li>
+              <li><a href="/archive/index.html#11">0.11</a></li>
+              <li><a href="/archive/index.html#10">0.10</a></li>
+              <li><a href="/archive/index.html#09">0.9</a></li>
+              <li><a href="/archive/index.html#08">0.8</a></li>
+              <li><a href="/archive/index.html#07">0.7</a></li>
+            </ul>
+          </div>
+
+          <div class="content">
+            <!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<h2>Reading from HDFS</h2>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<p>You can configure your Samza job to read from HDFS files. The <a href="https://github.com/apache/samza/blob/master/samza-hdfs/src/main/java/org/apache/samza/system/hdfs/HdfsSystemConsumer.java">HdfsSystemConsumer</a> can read from HDFS files. Avro encoded records are supported out of the box and it is easy to extend to support other formats (plain text, csv, json etc). See <code>Event format</code> section below.</p>
+
+<h3 id="environment">Environment</h3>
+
+<p>Your job needs to run on the same YARN cluster which hosts the HDFS you want to consume from.</p>
+
+<h3 id="partitioning">Partitioning</h3>
+
+<p>Partitioning works at the level of individual HDFS files. Each file is treated as a stream partition, while a directory that contains these files is a stream. For example, if you want to read from a HDFS path which contains 10 individual files, there will naturally be 10 partitions created. You can configure up to 10 Samza containers to process these partitions. If you want to read from a single HDFS file, there is currently no way to break down the consumption - you can only have one container to process the file.</p>
+
+<h3 id="event-format">Event format</h3>
+
+<p><a href="https://github.com/apache/samza/blob/master/samza-hdfs/src/main/java/org/apache/samza/system/hdfs/HdfsSystemConsumer.java">HdfsSystemConsumer</a> currently supports reading from avro files. The received <a href="../api/javadocs/org/apache/samza/system/IncomingMessageEnvelope.html">IncomingMessageEnvelope</a> contains three significant fields:</p>
+
+<ol>
+<li>The key which is empty</li>
+<li>The message which is set to the avro <a href="https://avro.apache.org/docs/1.7.6/api/java/org/apache/avro/generic/GenericRecord.html">GenericRecord</a></li>
+<li>The stream partition which is set to the name of the HDFS file</li>
+</ol>
+
+<p>To extend the support beyond avro files (e.g. json, csv, etc.), you can implement the interface <a href="https://github.com/apache/samza/blob/master/samza-hdfs/src/main/java/org/apache/samza/system/hdfs/reader/SingleFileHdfsReader.java">SingleFileHdfsReader</a> (take a look at the implementation of <a href="https://github.com/apache/samza/blob/master/samza-hdfs/src/main/java/org/apache/samza/system/hdfs/reader/AvroFileHdfsReader.java">AvroFileHdfsReader</a> as a sample).</p>
+
+<h3 id="end-of-stream-support">End of stream support</h3>
+
+<p>One major difference between HDFS data and Kafka data is that while a kafka topic has an unbounded stream of messages, HDFS files are bounded and have a notion of EOF.</p>
+
+<p>You can choose to implement <a href="../api/javadocs/org/apache/samza/task/EndOfStreamListenerTask.html">EndOfStreamListenerTask</a> to receive a callback when all partitions are at end of stream. When all partitions being processed by the task are at end of stream (i.e. EOF has been reached for all files), the Samza job exits automatically.</p>
+
+<h3 id="basic-configuration">Basic Configuration</h3>
+
+<p>Here is a few of the basic configs to set up HdfsSystemConsumer:</p>
+<div class="highlight"><pre><code class="language-text" data-lang="text"># The HDFS system consumer is implemented under the org.apache.samza.system.hdfs package,
+# so use HdfsSystemFactory as the system factory for your system
+systems.hdfs-clickstream.samza.factory=org.apache.samza.system.hdfs.HdfsSystemFactory
+
+# You need to specify the path of files you want to consume in task.inputs
+task.inputs=hdfs-clickstream.hdfs:/data/clickstream/2016/09/11
+
+# You can specify a white list of files you want your job to process (in Java Pattern style)
+systems.hdfs-clickstream.partitioner.defaultPartitioner.whitelist=.*avro
+
+# You can specify a black list of files you don&#39;t want your job to process (in Java Pattern style),
+# by default it&#39;s empty.
+# Note that you can have both white list and black list, in which case both will be applied.
+systems.hdfs-clickstream.partitioner.defaultPartitioner.blacklist=somefile.avro
+</code></pre></div>
+<h3 id="security-configuration">Security Configuration</h3>
+
+<p>The following additional configs are required when accessing HDFS clusters that have kerberos enabled:</p>
+<div class="highlight"><pre><code class="language-text" data-lang="text"># Use the SamzaYarnSecurityManagerFactory, which fetches and renews the Kerberos delegation tokens when the job is running in a secure environment.
+job.security.manager.factory=org.apache.samza.job.yarn.SamzaYarnSecurityManagerFactory
+
+# Kerberos principal
+yarn.kerberos.principal=your-principal-name
+
+# Path of the keytab file (local path)
+yarn.kerberos.keytab=/tmp/keytab
+</code></pre></div>
+<h3 id="advanced-configuration">Advanced Configuration</h3>
+
+<p>Some of the advanced configuration you might need to set up:</p>
+<div class="highlight"><pre><code class="language-text" data-lang="text"># Specify the group pattern for advanced partitioning.
+systems.hdfs-clickstream.partitioner.defaultPartitioner.groupPattern=part-[id]-.*
+</code></pre></div>
+<p>The advanced partitioning goes beyond the basic assumption that each file is a partition. With advanced partitioning you can group files into partitions arbitrarily. For example, if you have a set of files as [part-01-a.avro, part-01-b.avro, part-02-a.avro, part-02-b.avro, part-03-a.avro] that you want to organize into three partitions as (part-01-a.avro, part-01-b.avro), (part-02-a.avro, part-02-b.avro), (part-03-a.avro), where the numbers in the middle act as a &ldquo;group identifier&rdquo;, you can then set this property to be &ldquo;part-[id]-.<em>&rdquo; (note that *</em>[id]** is a reserved term here, i.e. you have to literally put it as <strong>[id]</strong>). The partitioner will apply this pattern to all file names and extract the &ldquo;group identifier&rdquo; (&ldquo;[id]&rdquo; in the pattern), then use the &ldquo;group identifier&rdquo; to group files into partitions.</p>
+<div class="highlight"><pre><code class="language-text" data-lang="text"># Specify the type of files your job want to process (support avro only for now)
+systems.hdfs-clickstream.consumer.reader=avro
+
+# Max number of retries (per-partition) before the container fails.
+system.hdfs-clickstream.consumer.numMaxRetries=10
+</code></pre></div>
+<p>For the list of all configs, check out the configuration table page <a href="../jobs/configuration-table.html">here</a></p>
+
+<h3 id="more-information">More Information</h3>
+
+<p><a href="https://issues.apache.org/jira/secure/attachment/12827670/HDFSSystemConsumer.pdf">HdfsSystemConsumer design doc</a></p>
+
+<h2 id="writing-to-hdfs-&raquo;"><a href="./producer.html">Writing to HDFS &raquo;</a></h2>
+
+
+          </div>
+        </div>
+
+      </div><!-- /.wrapper-content -->
+    </div><!-- /.wrapper -->
+
+    <div class="footer">
+      <div class="container">
+        <!-- nothing for now. -->
+      </div>
+    </div>
+
+  
+    <script>
+      $( document ).ready(function() {
+        if ( $.fn.urlExists( "/learn/documentation/0.14/hadoop/consumer.html" ) ) {
+          $("#switch-version-button").addClass("fa fa-history masthead-icon");
+        }
+      });
+
+      /* a function to test whether the url exists or not */
+      (function( $ ) {
+        $.fn.urlExists = function(url) {
+          var http = new XMLHttpRequest();
+          http.open('HEAD', url, false);
+          http.send();
+          return http.status != 404;
+        };
+      }( jQuery ));
+    </script>
+  
+
+    <!-- Google Analytics -->
+    <script>
+      (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+      m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+      })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+      ga('create', 'UA-43122768-1', 'apache.org');
+      ga('send', 'pageview');
+
+    </script>
+  </body>
+</html>

Added: samza/site/learn/documentation/latest/hadoop/overview.html
URL: http://svn.apache.org/viewvc/samza/site/learn/documentation/latest/hadoop/overview.html?rev=1820026&view=auto
==============================================================================
--- samza/site/learn/documentation/latest/hadoop/overview.html (added)
+++ samza/site/learn/documentation/latest/hadoop/overview.html Thu Jan  4 00:31:39 2018
@@ -0,0 +1,221 @@
+<!DOCTYPE html>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <title>Samza - Batch Processing Overview</title>
+    <link href='/css/ropa-sans.css' rel='stylesheet' type='text/css'/>
+    <link href="/css/bootstrap.min.css" rel="stylesheet"/>
+    <link href="/css/font-awesome.min.css" rel="stylesheet"/>
+    <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
+    <link rel="icon" type="image/png" href="/img/samza-icon.png">
+    <script src="/js/jquery-1.11.1.min.js"></script>
+  </head>
+  <body>
+    <div class="wrapper">
+      <div class="wrapper-content">
+
+        <div class="masthead">
+          <div class="container">
+            <div class="masthead-logo">
+              <a href="/" class="logo">samza</a>
+            </div>
+            <div class="masthead-icons">
+              <div class="pull-right">
+                <a href="/startup/download"><i class="fa fa-arrow-circle-o-down masthead-icon"></i></a>
+                <a href="https://git-wip-us.apache.org/repos/asf?p=samza.git;a=tree" target="_blank"><i class="fa fa-code masthead-icon" style="font-weight: bold;"></i></a>
+                <a href="https://twitter.com/samzastream" target="_blank"><i class="fa fa-twitter masthead-icon"></i></a>
+                <!-- this icon only shows in versioned pages -->
+                
+                  
+                    
+                  
+                  <a href="http://samza.apache.org/learn/documentation/0.14/hadoop/overview.html"><i id="switch-version-button"></i></a>
+                   <!-- links for the navigation bar -->
+                
+
+              </div>
+            </div>
+          </div><!-- /.container -->
+        </div>
+
+        <div class="container">
+          <div class="menu">
+            <h1><i class="fa fa-rocket"></i> Getting Started</h1>
+            <ul>
+              <li><a href="/startup/hello-samza/latest">Hello Samza</a></li>
+              <li><a href="/startup/download">Download</a></li>
+              <li><a href="/startup/preview">Feature Preview</a></li>
+            </ul>
+
+            <h1><i class="fa fa-book"></i> Learn</h1>
+            <ul>
+              <li><a href="/learn/documentation/latest">Documentation</a></li>
+              <li><a href="/learn/documentation/latest/jobs/configuration-table.html">Configuration</a></li>
+              <li><a href="/learn/documentation/latest/container/metrics-table.html">Metrics</a></li>
+              <li><a href="/learn/documentation/latest/api/javadocs/">Javadocs</a></li>
+              <li><a href="/learn/tutorials/latest">Tutorials</a></li>
+              <li><a href="https://cwiki.apache.org/confluence/display/SAMZA/FAQ">FAQ</a></li>
+              <li><a href="https://cwiki.apache.org/confluence/display/SAMZA/Apache+Samza">Wiki</a></li>
+              <li><a href="https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=51812876">Papers &amp; Talks</a></li>
+              <li><a href="http://blogs.apache.org/samza">Blog</a></li>
+            </ul>
+
+            <h1><i class="fa fa-comments"></i> Community</h1>
+            <ul>
+              <li><a href="/community/mailing-lists.html">Mailing Lists</a></li>
+              <li><a href="/community/irc.html">IRC</a></li>
+              <li><a href="https://issues.apache.org/jira/browse/SAMZA">Bugs</a></li>
+              <li><a href="https://cwiki.apache.org/confluence/display/SAMZA/Powered+By">Powered by</a></li>
+              <li><a href="https://cwiki.apache.org/confluence/display/SAMZA/Ecosystem">Ecosystem</a></li>
+              <li><a href="/community/committers.html">Committers</a></li>
+            </ul>
+
+            <h1><i class="fa fa-code"></i> Contribute</h1>
+            <ul>
+              <li><a href="/contribute/contributors-corner.html">Contributor's Corner</a></li>
+              <li><a href="/contribute/coding-guide.html">Coding Guide</a></li>
+              <li><a href="/contribute/design-documents.html">Design Documents</a></li>
+              <li><a href="/contribute/code.html">Code</a></li>
+              <li><a href="/contribute/tests.html">Tests</a></li>
+            </ul>
+
+            <h1><i class="fa fa-history"></i> Archive</h1>
+            <ul>
+              <li><a href="/archive/index.html#latest">latest</a></li>
+              <li><a href="/archive/index.html#14">0.14</a></li>
+              <li><a href="/archive/index.html#13">0.13</a></li>
+              <li><a href="/archive/index.html#12">0.12</a></li>
+              <li><a href="/archive/index.html#11">0.11</a></li>
+              <li><a href="/archive/index.html#10">0.10</a></li>
+              <li><a href="/archive/index.html#09">0.9</a></li>
+              <li><a href="/archive/index.html#08">0.8</a></li>
+              <li><a href="/archive/index.html#07">0.7</a></li>
+            </ul>
+          </div>
+
+          <div class="content">
+            <!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<h2>Batch Processing Overview</h2>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<p>Samza provides a unified data processing model for both stream and batch processing. The primary difference between batch and streaming is whether the input size is bounded or unbounded. Batch data sources are typically bounded (e.g. static files on HDFS), whereas streams are unbounded (e.g. a topic in Kafka). Under the hood, the same highly-efficient stream-processing engine handles both types.</p>
+
+<p><img src="/img/latest/learn/documentation/hadoop/unified_batch_streaming.png" alt="Unified Batch and Streaming" style="max-width: 100%; height: auto;" onclick="window.open(this.src)"></p>
+
+<h3 id="unified-api-for-batch-and-streaming">Unified API for Batch and Streaming</h3>
+
+<p>Samza provides a single set of APIs for both batch and stream processing. This unified programming API makes it convenient for you to focus on the processing logic, without treating bounded and unbounded sources differently. Switching between batch and streaming only requires config change, e.g. <a href="../api/overview.html">Kafka</a> to <a href="./consumer.html">HDFS</a>, instead of any code change.</p>
+
+<h3 id="multi-stage-batch-pipeline">Multi-stage Batch Pipeline</h3>
+
+<p>Complex data pipelines usually consist multiple stages, with data shuffled (repartitioned) between stages to enable key-based operations such as windowing, aggregation, and join. Samza <a href="/startup/preview/index.html">high-level API</a> provides an operator named <code>partitionBy</code> to create such multi-stage pipelines. Internally, Samza creates a physical stream, called an “intermediate stream”, based on the system configured as in <code>job.default.system</code>. Samza repartitions the output of the previous stage by sending it to the intermediate stream with the appropriate partition count and partition key. It then feeds it to the next stage of the pipeline. The lifecycle of intermediate streams is completely managed by Samza so from the user perspective the data shuffling is automatic.</p>
+
+<p>For a single-stage pipeline, dealing with bounded data sets is straightforward: the system consumer “knows” the end of a particular partition, and it will emit end-of-stream token once a partition is complete. Samza will shut down the container when all its input partitions are complete.</p>
+
+<p>For a multi-stage pipeline, however, things become tricky since intermediate streams are often physically unbounded data streams, e.g. Kafka, and the downstream stages don&rsquo;t know when to shut down since unbounded streams don&rsquo;t have an end. To solve this problem, Samza uses in-band end-of-stream control messages in the intermediate stream along with user data messages. The upstream stage broadcasts end-of-stream control messages to every partition of the intermediate stream, and the downstream stage will aggregate the end-of-stream messages for each partition. When one end-of-stream message has been received for every upstream task in a partition, the downstream stage will conclude that the partition has no more messages, and the task will shut down. For pipelines with more than 2 stages, the end-of-stream control messages will be propagated from the source to the last stage, and each stage will perform the end-of-stream aggregation and then shuts down. The following d
 iagram shows the flow:</p>
+
+<p><img src="/img/latest/learn/documentation/hadoop/multi_stage_batch.png" alt="Multi-stage Batch Processing" style="max-width: 100%; height: auto;" onclick="window.open(this.src)"></p>
+
+<h3 id="state-and-fault-tolerance">State and Fault-tolerance</h3>
+
+<p>Samza’s <a href="../container/state-management.html">state management</a> and <a href="../container/checkpointing.html">fault-tolerance</a> apply the same to batch. You can use in-memory or RocksDb as your local state store which can be persisted by changelog streams. In case of any container failures, Samza will restart the container by reseeding the local store from changelog streams, and resume processing from the previous checkpoints.</p>
+
+<p>During a job restart, batch processing behaves completely different from streaming. In batch, it is expected to be a re-run and all the internal streams, including intermediate, checkpoint and changelog streams, need to be fresh. Since some systems only support retention-based stream cleanup, e.g. Kafka without deletion enabled, Samza creates a new set of internal streams for each job run. To achieve this, Samza internally generates a unique <strong>run.id</strong> to each job run. The <strong>run.id</strong> is appended to the physical names of the internal streams, which will be used in the job in each run. Samza also performs due diligence to delete/purge the streams from previous run. The cleanup happens when the job is restarted.</p>
+
+<h2 id="reading-from-hdfs-&raquo;"><a href="./consumer.html">Reading from HDFS &raquo;</a></h2>
+
+
+          </div>
+        </div>
+
+      </div><!-- /.wrapper-content -->
+    </div><!-- /.wrapper -->
+
+    <div class="footer">
+      <div class="container">
+        <!-- nothing for now. -->
+      </div>
+    </div>
+
+  
+    <script>
+      $( document ).ready(function() {
+        if ( $.fn.urlExists( "/learn/documentation/0.14/hadoop/overview.html" ) ) {
+          $("#switch-version-button").addClass("fa fa-history masthead-icon");
+        }
+      });
+
+      /* a function to test whether the url exists or not */
+      (function( $ ) {
+        $.fn.urlExists = function(url) {
+          var http = new XMLHttpRequest();
+          http.open('HEAD', url, false);
+          http.send();
+          return http.status != 404;
+        };
+      }( jQuery ));
+    </script>
+  
+
+    <!-- Google Analytics -->
+    <script>
+      (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+      m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+      })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+      ga('create', 'UA-43122768-1', 'apache.org');
+      ga('send', 'pageview');
+
+    </script>
+  </body>
+</html>