You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@samza.apache.org by ma...@apache.org on 2014/06/25 00:01:59 UTC

svn commit: r1605209 [2/3] - in /incubator/samza/site: ./ community/ contribute/ css/ learn/documentation/0.7.0/ learn/documentation/0.7.0/api/ learn/documentation/0.7.0/comparisons/ learn/documentation/0.7.0/container/ learn/documentation/0.7.0/introd...

Modified: incubator/samza/site/learn/documentation/0.7.0/container/streams.html
URL: http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/0.7.0/container/streams.html?rev=1605209&r1=1605208&r2=1605209&view=diff
==============================================================================
--- incubator/samza/site/learn/documentation/0.7.0/container/streams.html (original)
+++ incubator/samza/site/learn/documentation/0.7.0/container/streams.html Tue Jun 24 22:01:57 2014
@@ -23,6 +23,7 @@
     <link href="/css/bootstrap.min.css" rel="stylesheet"/>
     <link href="/css/font-awesome.min.css" rel="stylesheet"/>
     <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
     <link rel="icon" type="image/png" href="/img/samza-icon.png">
   </head>
   <body>
@@ -123,48 +124,49 @@
 -->
 
 <p>The <a href="samza-container.html">samza container</a> reads and writes messages using the <a href="../api/javadocs/org/apache/samza/system/SystemConsumer.html">SystemConsumer</a> and <a href="../api/javadocs/org/apache/samza/system/SystemProducer.html">SystemProducer</a> interfaces. You can integrate any message broker with Samza by implementing these two interfaces.</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">public interface SystemConsumer {
-  void start();
 
-  void stop();
+<div class="highlight"><pre><code class="java"><span class="kd">public</span> <span class="kd">interface</span> <span class="nc">SystemConsumer</span> <span class="o">{</span>
+  <span class="kt">void</span> <span class="nf">start</span><span class="o">();</span>
 
-  void register(
-      SystemStreamPartition systemStreamPartition,
-      String lastReadOffset);
+  <span class="kt">void</span> <span class="nf">stop</span><span class="o">();</span>
 
-  List&lt;IncomingMessageEnvelope&gt; poll(
-      Map&lt;SystemStreamPartition, Integer&gt; systemStreamPartitions,
-      long timeout)
-    throws InterruptedException;
-}
+  <span class="kt">void</span> <span class="nf">register</span><span class="o">(</span>
+      <span class="n">SystemStreamPartition</span> <span class="n">systemStreamPartition</span><span class="o">,</span>
+      <span class="n">String</span> <span class="n">lastReadOffset</span><span class="o">);</span>
 
-public class IncomingMessageEnvelope {
-  public Object getMessage() { ... }
+  <span class="n">List</span><span class="o">&lt;</span><span class="n">IncomingMessageEnvelope</span><span class="o">&gt;</span> <span class="nf">poll</span><span class="o">(</span>
+      <span class="n">Map</span><span class="o">&lt;</span><span class="n">SystemStreamPartition</span><span class="o">,</span> <span class="n">Integer</span><span class="o">&gt;</span> <span class="n">systemStreamPartitions</span><span class="o">,</span>
+      <span class="kt">long</span> <span class="n">timeout</span><span class="o">)</span>
+    <span class="kd">throws</span> <span class="n">InterruptedException</span><span class="o">;</span>
+<span class="o">}</span>
 
-  public Object getKey() { ... }
+<span class="kd">public</span> <span class="kd">class</span> <span class="nc">IncomingMessageEnvelope</span> <span class="o">{</span>
+  <span class="kd">public</span> <span class="n">Object</span> <span class="nf">getMessage</span><span class="o">()</span> <span class="o">{</span> <span class="o">...</span> <span class="o">}</span>
 
-  public SystemStreamPartition getSystemStreamPartition() { ... }
-}
+  <span class="kd">public</span> <span class="n">Object</span> <span class="nf">getKey</span><span class="o">()</span> <span class="o">{</span> <span class="o">...</span> <span class="o">}</span>
 
-public interface SystemProducer {
-  void start();
+  <span class="kd">public</span> <span class="n">SystemStreamPartition</span> <span class="nf">getSystemStreamPartition</span><span class="o">()</span> <span class="o">{</span> <span class="o">...</span> <span class="o">}</span>
+<span class="o">}</span>
 
-  void stop();
+<span class="kd">public</span> <span class="kd">interface</span> <span class="nc">SystemProducer</span> <span class="o">{</span>
+  <span class="kt">void</span> <span class="nf">start</span><span class="o">();</span>
 
-  void register(String source);
+  <span class="kt">void</span> <span class="nf">stop</span><span class="o">();</span>
 
-  void send(String source, OutgoingMessageEnvelope envelope);
+  <span class="kt">void</span> <span class="nf">register</span><span class="o">(</span><span class="n">String</span> <span class="n">source</span><span class="o">);</span>
 
-  void flush(String source);
-}
+  <span class="kt">void</span> <span class="nf">send</span><span class="o">(</span><span class="n">String</span> <span class="n">source</span><span class="o">,</span> <span class="n">OutgoingMessageEnvelope</span> <span class="n">envelope</span><span class="o">);</span>
 
-public class OutgoingMessageEnvelope {
-  ...
-  public Object getKey() { ... }
+  <span class="kt">void</span> <span class="nf">flush</span><span class="o">(</span><span class="n">String</span> <span class="n">source</span><span class="o">);</span>
+<span class="o">}</span>
+
+<span class="kd">public</span> <span class="kd">class</span> <span class="nc">OutgoingMessageEnvelope</span> <span class="o">{</span>
+  <span class="o">...</span>
+  <span class="kd">public</span> <span class="n">Object</span> <span class="nf">getKey</span><span class="o">()</span> <span class="o">{</span> <span class="o">...</span> <span class="o">}</span>
+
+  <span class="kd">public</span> <span class="n">Object</span> <span class="nf">getMessage</span><span class="o">()</span> <span class="o">{</span> <span class="o">...</span> <span class="o">}</span>
+<span class="o">}</span></code></pre></div>
 
-  public Object getMessage() { ... }
-}
-</code></pre></div>
 <p>Out of the box, Samza supports Kafka (KafkaSystemConsumer and KafkaSystemProducer). However, any message bus system can be plugged in, as long as it can provide the semantics required by Samza, as described in the <a href="../api/javadocs/org/apache/samza/system/SystemConsumer.html">javadoc</a>.</p>
 
 <p>SystemConsumers and SystemProducers may read and write messages of any data type. It&rsquo;s ok if they only support byte arrays &mdash; Samza has a separate <a href="serialization.html">serialization layer</a> which converts to and from objects that application code can use. Samza does not prescribe any particular data model or serialization format.</p>
@@ -182,16 +184,18 @@ public class OutgoingMessageEnvelope {
 <p>When a Samza container has several incoming messages on different stream partitions, how does it decide which to process first? The behavior is determined by a <a href="../api/javadocs/org/apache/samza/system/chooser/MessageChooser.html">MessageChooser</a>. The default chooser is RoundRobinChooser, but you can override it by implementing a custom chooser.</p>
 
 <p>To plug in your own message chooser, you need to implement the <a href="../api/javadocs/org/apache/samza/system/chooser/MessageChooserFactory.html">MessageChooserFactory</a> interface, and set the &ldquo;task.chooser.class&rdquo; configuration to the fully-qualified class name of your implementation:</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">task.chooser.class=com.example.samza.YourMessageChooserFactory
-</code></pre></div>
+
+<div class="highlight"><pre><code class="jproperties"><span class="na">task.chooser.class</span><span class="o">=</span><span class="s">com.example.samza.YourMessageChooserFactory</span></code></pre></div>
+
 <h4 id="prioritizing-input-streams">Prioritizing input streams</h4>
 
 <p>There are certain times when messages from one stream should be processed with higher priority than messages from another stream. For example, some Samza jobs consume two streams: one stream is fed by a real-time system and the other stream is fed by a batch system. In this case, it&rsquo;s useful to prioritize the real-time stream over the batch stream, so that the real-time processing doesn&rsquo;t slow down if there is a sudden burst of data on the batch stream.</p>
 
 <p>Samza provides a mechanism to prioritize one stream over another by setting this configuration parameter: systems.&lt;system&gt;.streams.&lt;stream&gt;.samza.priority=&lt;number&gt;. For example:</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">systems.kafka.streams.my-real-time-stream.samza.priority=2
-systems.kafka.streams.my-batch-stream.samza.priority=1
-</code></pre></div>
+
+<div class="highlight"><pre><code class="jproperties"><span class="na">systems.kafka.streams.my-real-time-stream.samza.priority</span><span class="o">=</span><span class="s">2</span>
+<span class="na">systems.kafka.streams.my-batch-stream.samza.priority</span><span class="o">=</span><span class="s">1</span></code></pre></div>
+
 <p>This declares that my-real-time-stream&rsquo;s messages should be processed with higher priority than my-batch-stream&rsquo;s messages. If my-real-time-stream has any messages available, they are processed first. Only if there are no messages currently waiting on my-real-time-stream, the Samza job continues processing my-batch-stream.</p>
 
 <p>Each priority level gets its own MessageChooser. It is valid to define two streams with the same priority. If messages are available from two streams at the same priority level, it&rsquo;s up to the MessageChooser for that priority level to decide which message should be processed first.</p>
@@ -207,10 +211,11 @@ systems.kafka.streams.my-batch-stream.sa
 <p>Another difference between a bootstrap stream and a high-priority stream is that the bootstrap stream&rsquo;s special treatment is temporary: when it has been fully consumed (we say it has &ldquo;caught up&rdquo;), its priority drops to be the same as all the other input streams.</p>
 
 <p>To configure a stream called &ldquo;my-bootstrap-stream&rdquo; to be a fully-consumed bootstrap stream, use the following settings:</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">systems.kafka.streams.my-bootstrap-stream.samza.bootstrap=true
-systems.kafka.streams.my-bootstrap-stream.samza.reset.offset=true
-systems.kafka.streams.my-bootstrap-stream.samza.offset.default=oldest
-</code></pre></div>
+
+<div class="highlight"><pre><code class="jproperties"><span class="na">systems.kafka.streams.my-bootstrap-stream.samza.bootstrap</span><span class="o">=</span><span class="s">true</span>
+<span class="na">systems.kafka.streams.my-bootstrap-stream.samza.reset.offset</span><span class="o">=</span><span class="s">true</span>
+<span class="na">systems.kafka.streams.my-bootstrap-stream.samza.offset.default</span><span class="o">=</span><span class="s">oldest</span></code></pre></div>
+
 <p>The bootstrap=true parameter enables the bootstrap behavior (prioritization over other streams). The combination of reset.offset=true and offset.default=oldest tells Samza to always start reading the stream from the oldest offset, every time a container starts up (rather than starting to read from the most recent checkpoint).</p>
 
 <p>It is valid to define multiple bootstrap streams. In this case, the order in which they are bootstrapped is determined by the priority.</p>
@@ -220,8 +225,9 @@ systems.kafka.streams.my-bootstrap-strea
 <p>In some cases, you can improve performance by consuming several messages from the same stream partition in sequence. Samza supports this mode of operation, called <em>batching</em>.</p>
 
 <p>For example, if you want to read 100 messages in a row from each stream partition (regardless of the MessageChooser), you can use this configuration parameter:</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">task.consumer.batch.size=100
-</code></pre></div>
+
+<div class="highlight"><pre><code class="jproperties"><span class="na">task.consumer.batch.size</span><span class="o">=</span><span class="s">100</span></code></pre></div>
+
 <p>With this setting, Samza tries to read a message from the most recently used <a href="../api/javadocs/org/apache/samza/system/SystemStreamPartition.html">SystemStreamPartition</a>. This behavior continues either until no more messages are available for that SystemStreamPartition, or until the batch size has been reached. When that happens, Samza defers to the MessageChooser to determine the next message to process. It then again tries to continue consume from the chosen message&rsquo;s SystemStreamPartition until the batch size is reached.</p>
 
 <h2 id="serialization-&raquo;"><a href="serialization.html">Serialization &raquo;</a></h2>

Modified: incubator/samza/site/learn/documentation/0.7.0/container/windowing.html
URL: http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/0.7.0/container/windowing.html?rev=1605209&r1=1605208&r2=1605209&view=diff
==============================================================================
--- incubator/samza/site/learn/documentation/0.7.0/container/windowing.html (original)
+++ incubator/samza/site/learn/documentation/0.7.0/container/windowing.html Tue Jun 24 22:01:57 2014
@@ -23,6 +23,7 @@
     <link href="/css/bootstrap.min.css" rel="stylesheet"/>
     <link href="/css/font-awesome.min.css" rel="stylesheet"/>
     <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
     <link rel="icon" type="image/png" href="/img/samza-icon.png">
   </head>
   <body>
@@ -125,32 +126,34 @@
 <p>Sometimes a stream processing job needs to do something in regular time intervals, regardless of how many incoming messages the job is processing. For example, say you want to report the number of page views per minute. To do this, you increment a counter every time you see a page view event. Once per minute, you send the current counter value to an output stream and reset the counter to zero.</p>
 
 <p>Samza&rsquo;s <em>windowing</em> feature provides a way for tasks to do something in regular time intervals, for example once per minute. To enable windowing, you just need to set one property in your job configuration:</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text"># Call the window() method every 60 seconds
-task.window.ms=60000
-</code></pre></div>
+
+<div class="highlight"><pre><code class="jproperties"><span class="c"># Call the window() method every 60 seconds</span>
+<span class="na">task.window.ms</span><span class="o">=</span><span class="s">60000</span></code></pre></div>
+
 <p>Next, your stream task needs to implement the <a href="../api/javadocs/org/apache/samza/task/WindowableTask.html">WindowableTask</a> interface. This interface defines a window() method which is called by Samza in the regular interval that you configured.</p>
 
 <p>For example, this is how you would implement a basic per-minute event counter:</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">public class EventCounterTask implements StreamTask, WindowableTask {
 
-  public static final SystemStream OUTPUT_STREAM =
-    new SystemStream(&quot;kafka&quot;, &quot;events-per-minute&quot;);
+<div class="highlight"><pre><code class="java"><span class="kd">public</span> <span class="kd">class</span> <span class="nc">EventCounterTask</span> <span class="kd">implements</span> <span class="n">StreamTask</span><span class="o">,</span> <span class="n">WindowableTask</span> <span class="o">{</span>
+
+  <span class="kd">public</span> <span class="kd">static</span> <span class="kd">final</span> <span class="n">SystemStream</span> <span class="n">OUTPUT_STREAM</span> <span class="o">=</span>
+    <span class="k">new</span> <span class="nf">SystemStream</span><span class="o">(</span><span class="s">&quot;kafka&quot;</span><span class="o">,</span> <span class="s">&quot;events-per-minute&quot;</span><span class="o">);</span>
+
+  <span class="kd">private</span> <span class="kt">int</span> <span class="n">eventsSeen</span> <span class="o">=</span> <span class="mi">0</span><span class="o">;</span>
 
-  private int eventsSeen = 0;
+  <span class="kd">public</span> <span class="kt">void</span> <span class="nf">process</span><span class="o">(</span><span class="n">IncomingMessageEnvelope</span> <span class="n">envelope</span><span class="o">,</span>
+                      <span class="n">MessageCollector</span> <span class="n">collector</span><span class="o">,</span>
+                      <span class="n">TaskCoordinator</span> <span class="n">coordinator</span><span class="o">)</span> <span class="o">{</span>
+    <span class="n">eventsSeen</span><span class="o">++;</span>
+  <span class="o">}</span>
+
+  <span class="kd">public</span> <span class="kt">void</span> <span class="nf">window</span><span class="o">(</span><span class="n">MessageCollector</span> <span class="n">collector</span><span class="o">,</span>
+                     <span class="n">TaskCoordinator</span> <span class="n">coordinator</span><span class="o">)</span> <span class="o">{</span>
+    <span class="n">collector</span><span class="o">.</span><span class="na">send</span><span class="o">(</span><span class="k">new</span> <span class="n">OutgoingMessageEnvelope</span><span class="o">(</span><span class="n">OUTPUT_STREAM</span><span class="o">,</span> <span class="n">eventsSeen</span><span class="o">));</span>
+    <span class="n">eventsSeen</span> <span class="o">=</span> <span class="mi">0</span><span class="o">;</span>
+  <span class="o">}</span>
+<span class="o">}</span></code></pre></div>
 
-  public void process(IncomingMessageEnvelope envelope,
-                      MessageCollector collector,
-                      TaskCoordinator coordinator) {
-    eventsSeen++;
-  }
-
-  public void window(MessageCollector collector,
-                     TaskCoordinator coordinator) {
-    collector.send(new OutgoingMessageEnvelope(OUTPUT_STREAM, eventsSeen));
-    eventsSeen = 0;
-  }
-}
-</code></pre></div>
 <p>If you need to send messages to output streams, you can use the <a href="../api/javadocs/org/apache/samza/task/MessageCollector.html">MessageCollector</a> object passed to the window() method. Please only use that MessageCollector object for sending messages, and don&rsquo;t use it outside of the call to window().</p>
 
 <p>Note that Samza uses <a href="event-loop.html">single-threaded execution</a>, so the window() call can never happen concurrently with a process() call. This has the advantage that you don&rsquo;t need to worry about thread safety in your code (no need to synchronize anything), but the downside that the window() call may be delayed if your process() method takes a long time to return.</p>

Modified: incubator/samza/site/learn/documentation/0.7.0/index.html
URL: http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/0.7.0/index.html?rev=1605209&r1=1605208&r2=1605209&view=diff
==============================================================================
--- incubator/samza/site/learn/documentation/0.7.0/index.html (original)
+++ incubator/samza/site/learn/documentation/0.7.0/index.html Tue Jun 24 22:01:57 2014
@@ -23,6 +23,7 @@
     <link href="/css/bootstrap.min.css" rel="stylesheet"/>
     <link href="/css/font-awesome.min.css" rel="stylesheet"/>
     <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
     <link rel="icon" type="image/png" href="/img/samza-icon.png">
   </head>
   <body>

Modified: incubator/samza/site/learn/documentation/0.7.0/introduction/architecture.html
URL: http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/0.7.0/introduction/architecture.html?rev=1605209&r1=1605208&r2=1605209&view=diff
==============================================================================
--- incubator/samza/site/learn/documentation/0.7.0/introduction/architecture.html (original)
+++ incubator/samza/site/learn/documentation/0.7.0/introduction/architecture.html Tue Jun 24 22:01:57 2014
@@ -23,6 +23,7 @@
     <link href="/css/bootstrap.min.css" rel="stylesheet"/>
     <link href="/css/font-awesome.min.css" rel="stylesheet"/>
     <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
     <link rel="icon" type="image/png" href="/img/samza-icon.png">
   </head>
   <body>
@@ -201,8 +202,9 @@
 <h3 id="example">Example</h3>
 
 <p>Let&rsquo;s take a look at a real example: suppose we want to count the number of page views. In SQL, you would write something like:</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">SELECT user_id, COUNT(*) FROM PageViewEvent GROUP BY user_id
-</code></pre></div>
+
+<div class="highlight"><pre><code class="sql"><span class="k">SELECT</span> <span class="n">user_id</span><span class="p">,</span> <span class="k">COUNT</span><span class="p">(</span><span class="o">*</span><span class="p">)</span> <span class="k">FROM</span> <span class="n">PageViewEvent</span> <span class="k">GROUP</span> <span class="k">BY</span> <span class="n">user_id</span></code></pre></div>
+
 <p>Although Samza doesn&rsquo;t support SQL right now, the idea is the same. Two jobs are required to calculate this query: one to group messages by user ID, and the other to do the counting.</p>
 
 <p>In the first job, the grouping is done by sending all messages with the same user ID to the same partition of an intermediate topic. You can do this by using the user ID as key of the messages that are emitted by the first job, and this key is mapped to one of the intermediate topic&rsquo;s partitions (usually by taking a hash of the key mod the number of partitions). The second job consumes the intermediate topic. Each task in the second job consumes one partition of the intermediate topic, i.e. all the messages for a subset of user IDs. The task has a counter for each user ID in its partition, and the appropriate counter is incremented every time the task receives a message with a particular user ID.</p>

Modified: incubator/samza/site/learn/documentation/0.7.0/introduction/background.html
URL: http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/0.7.0/introduction/background.html?rev=1605209&r1=1605208&r2=1605209&view=diff
==============================================================================
--- incubator/samza/site/learn/documentation/0.7.0/introduction/background.html (original)
+++ incubator/samza/site/learn/documentation/0.7.0/introduction/background.html Tue Jun 24 22:01:57 2014
@@ -23,6 +23,7 @@
     <link href="/css/bootstrap.min.css" rel="stylesheet"/>
     <link href="/css/font-awesome.min.css" rel="stylesheet"/>
     <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
     <link rel="icon" type="image/png" href="/img/samza-icon.png">
   </head>
   <body>

Modified: incubator/samza/site/learn/documentation/0.7.0/introduction/concepts.html
URL: http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/0.7.0/introduction/concepts.html?rev=1605209&r1=1605208&r2=1605209&view=diff
==============================================================================
--- incubator/samza/site/learn/documentation/0.7.0/introduction/concepts.html (original)
+++ incubator/samza/site/learn/documentation/0.7.0/introduction/concepts.html Tue Jun 24 22:01:57 2014
@@ -23,6 +23,7 @@
     <link href="/css/bootstrap.min.css" rel="stylesheet"/>
     <link href="/css/font-awesome.min.css" rel="stylesheet"/>
     <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
     <link rel="icon" type="image/png" href="/img/samza-icon.png">
   </head>
   <body>

Modified: incubator/samza/site/learn/documentation/0.7.0/jobs/configuration.html
URL: http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/0.7.0/jobs/configuration.html?rev=1605209&r1=1605208&r2=1605209&view=diff
==============================================================================
--- incubator/samza/site/learn/documentation/0.7.0/jobs/configuration.html (original)
+++ incubator/samza/site/learn/documentation/0.7.0/jobs/configuration.html Tue Jun 24 22:01:57 2014
@@ -23,6 +23,7 @@
     <link href="/css/bootstrap.min.css" rel="stylesheet"/>
     <link href="/css/font-awesome.min.css" rel="stylesheet"/>
     <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
     <link rel="icon" type="image/png" href="/img/samza-icon.png">
   </head>
   <body>
@@ -123,23 +124,24 @@
 -->
 
 <p>All Samza jobs have a configuration file that defines the job. A very basic configuration file looks like this:</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text"># Job
-job.factory.class=samza.job.local.LocalJobFactory
-job.name=hello-world
-
-# Task
-task.class=samza.task.example.MyJavaStreamerTask
-task.inputs=example-system.example-stream
-
-# Serializers
-serializers.registry.json.class=org.apache.samza.serializers.JsonSerdeFactory
-serializers.registry.string.class=org.apache.samza.serializers.StringSerdeFactory
-
-# Systems
-systems.example-system.samza.factory=samza.stream.example.ExampleConsumerFactory
-systems.example-system.samza.key.serde=string
-systems.example-system.samza.msg.serde=json
-</code></pre></div>
+
+<div class="highlight"><pre><code class="jproperties"><span class="c"># Job</span>
+<span class="na">job.factory.class</span><span class="o">=</span><span class="s">samza.job.local.LocalJobFactory</span>
+<span class="na">job.name</span><span class="o">=</span><span class="s">hello-world</span>
+
+<span class="c"># Task</span>
+<span class="na">task.class</span><span class="o">=</span><span class="s">samza.task.example.MyJavaStreamerTask</span>
+<span class="na">task.inputs</span><span class="o">=</span><span class="s">example-system.example-stream</span>
+
+<span class="c"># Serializers</span>
+<span class="na">serializers.registry.json.class</span><span class="o">=</span><span class="s">org.apache.samza.serializers.JsonSerdeFactory</span>
+<span class="na">serializers.registry.string.class</span><span class="o">=</span><span class="s">org.apache.samza.serializers.StringSerdeFactory</span>
+
+<span class="c"># Systems</span>
+<span class="na">systems.example-system.samza.factory</span><span class="o">=</span><span class="s">samza.stream.example.ExampleConsumerFactory</span>
+<span class="na">systems.example-system.samza.key.serde</span><span class="o">=</span><span class="s">string</span>
+<span class="na">systems.example-system.samza.msg.serde</span><span class="o">=</span><span class="s">json</span></code></pre></div>
+
 <p>There are four major sections to a configuration file:</p>
 
 <ol>
@@ -154,10 +156,10 @@ systems.example-system.samza.msg.serde=j
 <p>Configuration keys that absolutely must be defined for a Samza job are:</p>
 
 <ul>
-<li>job.factory.class</li>
-<li>job.name</li>
-<li>task.class</li>
-<li>task.inputs</li>
+<li><code>job.factory.class</code></li>
+<li><code>job.name</code></li>
+<li><code>task.class</code></li>
+<li><code>task.inputs</code></li>
 </ul>
 
 <h3 id="configuration-keys">Configuration Keys</h3>

Modified: incubator/samza/site/learn/documentation/0.7.0/jobs/job-runner.html
URL: http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/0.7.0/jobs/job-runner.html?rev=1605209&r1=1605208&r2=1605209&view=diff
==============================================================================
--- incubator/samza/site/learn/documentation/0.7.0/jobs/job-runner.html (original)
+++ incubator/samza/site/learn/documentation/0.7.0/jobs/job-runner.html Tue Jun 24 22:01:57 2014
@@ -23,6 +23,7 @@
     <link href="/css/bootstrap.min.css" rel="stylesheet"/>
     <link href="/css/font-awesome.min.css" rel="stylesheet"/>
     <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
     <link rel="icon" type="image/png" href="/img/samza-icon.png">
   </head>
   <body>
@@ -123,30 +124,33 @@
 -->
 
 <p>Samza jobs are started using a script called run-job.sh.</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">samza-example/target/bin/run-job.sh \
-  --config-factory=samza.config.factories.PropertiesConfigFactory \
-  --config-path=file://$PWD/config/hello-world.properties
-</code></pre></div>
+
+<div class="highlight"><pre><code class="bash">samza-example/target/bin/run-job.sh <span class="se">\</span>
+  --config-factory<span class="o">=</span>samza.config.factories.PropertiesConfigFactory <span class="se">\</span>
+  --config-path<span class="o">=</span>file://<span class="nv">$PWD</span>/config/hello-world.properties</code></pre></div>
+
 <p>You provide two parameters to the run-job.sh script. One is the config location, and the other is a factory class that is used to read your configuration file. The run-job.sh script is actually executing a Samza class called JobRunner. The JobRunner uses your ConfigFactory to get a Config object from the config path.</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">public interface ConfigFactory {
-  Config getConfig(URI configUri);
-}
-</code></pre></div>
+
+<div class="highlight"><pre><code class="java"><span class="kd">public</span> <span class="kd">interface</span> <span class="nc">ConfigFactory</span> <span class="o">{</span>
+  <span class="n">Config</span> <span class="nf">getConfig</span><span class="o">(</span><span class="n">URI</span> <span class="n">configUri</span><span class="o">);</span>
+<span class="o">}</span></code></pre></div>
+
 <p>The Config object is just a wrapper around Map<String, String>, with some nice helper methods. Out of the box, Samza ships with the PropertiesConfigFactory, but developers can implement any kind of ConfigFactory they wish.</p>
 
 <p>Once the JobRunner gets your configuration, it gives your configuration to the StreamJobFactory class defined by the &ldquo;job.factory&rdquo; property. Samza ships with two job factory implementations: LocalJobFactory and YarnJobFactory. The StreamJobFactory&rsquo;s responsibility is to give the JobRunner a job that it can run.</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">public interface StreamJob {
-  StreamJob submit();
 
-  StreamJob kill();
+<div class="highlight"><pre><code class="java"><span class="kd">public</span> <span class="kd">interface</span> <span class="nc">StreamJob</span> <span class="o">{</span>
+  <span class="n">StreamJob</span> <span class="nf">submit</span><span class="o">();</span>
+
+  <span class="n">StreamJob</span> <span class="nf">kill</span><span class="o">();</span>
+
+  <span class="n">ApplicationStatus</span> <span class="nf">waitForFinish</span><span class="o">(</span><span class="kt">long</span> <span class="n">timeoutMs</span><span class="o">);</span>
 
-  ApplicationStatus waitForFinish(long timeoutMs);
+  <span class="n">ApplicationStatus</span> <span class="nf">waitForStatus</span><span class="o">(</span><span class="n">ApplicationStatus</span> <span class="n">status</span><span class="o">,</span> <span class="kt">long</span> <span class="n">timeoutMs</span><span class="o">);</span>
 
-  ApplicationStatus waitForStatus(ApplicationStatus status, long timeoutMs);
+  <span class="n">ApplicationStatus</span> <span class="nf">getStatus</span><span class="o">();</span>
+<span class="o">}</span></code></pre></div>
 
-  ApplicationStatus getStatus();
-}
-</code></pre></div>
 <p>Once the JobRunner gets a job, it calls submit() on the job. This method is what tells the StreamJob implementation to start the SamzaContainer. In the case of LocalJobRunner, it uses a run-container.sh script to execute the SamzaContainer in a separate process, which will start one SamzaContainer locally on the machine that you ran run-job.sh on.</p>
 
 <p>This flow differs slightly when you use YARN, but we&rsquo;ll get to that later.</p>

Modified: incubator/samza/site/learn/documentation/0.7.0/jobs/logging.html
URL: http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/0.7.0/jobs/logging.html?rev=1605209&r1=1605208&r2=1605209&view=diff
==============================================================================
--- incubator/samza/site/learn/documentation/0.7.0/jobs/logging.html (original)
+++ incubator/samza/site/learn/documentation/0.7.0/jobs/logging.html Tue Jun 24 22:01:57 2014
@@ -23,6 +23,7 @@
     <link href="/css/bootstrap.min.css" rel="stylesheet"/>
     <link href="/css/font-awesome.min.css" rel="stylesheet"/>
     <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
     <link rel="icon" type="image/png" href="/img/samza-icon.png">
   </head>
   <body>
@@ -127,52 +128,57 @@
 <h3 id="log4j">Log4j</h3>
 
 <p>The <a href="/startup/hello-samza/0.7.0">hello-samza</a> project shows how to use <a href="http://logging.apache.org/log4j/1.2/">log4j</a> with Samza. To turn on log4j logging, you just need to make sure slf4j-log4j12 is in your SamzaContainer&rsquo;s classpath. In Maven, this can be done by adding the following dependency to your Samza package project.</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">&lt;dependency&gt;
-  &lt;groupId&gt;org.slf4j&lt;/groupId&gt;
-  &lt;artifactId&gt;slf4j-log4j12&lt;/artifactId&gt;
-  &lt;scope&gt;runtime&lt;/scope&gt;
-  &lt;version&gt;1.6.2&lt;/version&gt;
-&lt;/dependency&gt;
-</code></pre></div>
+
+<div class="highlight"><pre><code class="xml"><span class="nt">&lt;dependency&gt;</span>
+  <span class="nt">&lt;groupId&gt;</span>org.slf4j<span class="nt">&lt;/groupId&gt;</span>
+  <span class="nt">&lt;artifactId&gt;</span>slf4j-log4j12<span class="nt">&lt;/artifactId&gt;</span>
+  <span class="nt">&lt;scope&gt;</span>runtime<span class="nt">&lt;/scope&gt;</span>
+  <span class="nt">&lt;version&gt;</span>1.6.2<span class="nt">&lt;/version&gt;</span>
+<span class="nt">&lt;/dependency&gt;</span></code></pre></div>
+
 <p>If you&rsquo;re not using Maven, just make sure that slf4j-log4j12 ends up in your Samza package&rsquo;s lib directory.</p>
 
 <h4 id="log4j-configuration">Log4j configuration</h4>
 
 <p>Samza&rsquo;s <a href="packaging.html">run-class.sh</a> script will automatically set the following setting if log4j.xml exists in your <a href="packaging.html">Samza package&rsquo;s</a> lib directory.</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">-Dlog4j.configuration=file:$base_dir/lib/log4j.xml
-</code></pre></div>
+
+<div class="highlight"><pre><code class="bash">-Dlog4j.configuration<span class="o">=</span>file:<span class="nv">$base_dir</span>/lib/log4j.xml</code></pre></div>
+
 <p>The <a href="packaging.html">run-class.sh</a> script will also set the following Java system properties:</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">-Dsamza.log.dir=$SAMZA_LOG_DIR -Dsamza.container.name=$SAMZA_CONTAINER_NAME
-</code></pre></div>
+
+<div class="highlight"><pre><code class="bash">-Dsamza.log.dir<span class="o">=</span><span class="nv">$SAMZA_LOG_DIR</span> -Dsamza.container.name<span class="o">=</span><span class="nv">$SAMZA_CONTAINER_NAME</span><span class="o">=</span></code></pre></div>
+
 <p>These settings are very useful if you&rsquo;re using a file-based appender. For example, you can use a daily rolling appender by configuring log4j.xml like this:</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">&lt;appender name=&quot;RollingAppender&quot; class=&quot;org.apache.log4j.DailyRollingFileAppender&quot;&gt;
-   &lt;param name=&quot;File&quot; value=&quot;${samza.log.dir}/${samza.container.name}.log&quot; /&gt;
-   &lt;param name=&quot;DatePattern&quot; value=&quot;&#39;.&#39;yyyy-MM-dd&quot; /&gt;
-   &lt;layout class=&quot;org.apache.log4j.PatternLayout&quot;&gt;
-    &lt;param name=&quot;ConversionPattern&quot; value=&quot;%d{yyyy-MM-dd HH:mm:ss} %c{1} [%p] %m%n&quot; /&gt;
-   &lt;/layout&gt;
-&lt;/appender&gt;
-</code></pre></div>
+
+<div class="highlight"><pre><code class="xml"><span class="nt">&lt;appender</span> <span class="na">name=</span><span class="s">&quot;RollingAppender&quot;</span> <span class="na">class=</span><span class="s">&quot;org.apache.log4j.DailyRollingFileAppender&quot;</span><span class="nt">&gt;</span>
+   <span class="nt">&lt;param</span> <span class="na">name=</span><span class="s">&quot;File&quot;</span> <span class="na">value=</span><span class="s">&quot;${samza.log.dir}/${samza.container.name}.log&quot;</span> <span class="nt">/&gt;</span>
+   <span class="nt">&lt;param</span> <span class="na">name=</span><span class="s">&quot;DatePattern&quot;</span> <span class="na">value=</span><span class="s">&quot;&#39;.&#39;yyyy-MM-dd&quot;</span> <span class="nt">/&gt;</span>
+   <span class="nt">&lt;layout</span> <span class="na">class=</span><span class="s">&quot;org.apache.log4j.PatternLayout&quot;</span><span class="nt">&gt;</span>
+    <span class="nt">&lt;param</span> <span class="na">name=</span><span class="s">&quot;ConversionPattern&quot;</span> <span class="na">value=</span><span class="s">&quot;%d{yyyy-MM-dd HH:mm:ss} %c{1} [%p] %m%n&quot;</span> <span class="nt">/&gt;</span>
+   <span class="nt">&lt;/layout&gt;</span>
+<span class="nt">&lt;/appender&gt;</span></code></pre></div>
+
 <p>Setting up a file-based appender is recommended as a better alternative to using standard out. Standard out log files (see below) don&rsquo;t roll, and can get quite large if used for logging.</p>
 
-<p><strong>NOTE:</strong> If you use the task.opts configuration property, the log configuration is disrupted. This is a known bug; please see <a href="https://issues.apache.org/jira/browse/SAMZA-109">SAMZA-109</a> for a workaround.</p>
+<p><strong>NOTE:</strong> If you use the <code>task.opts</code> configuration property, the log configuration is disrupted. This is a known bug; please see <a href="https://issues.apache.org/jira/browse/SAMZA-109">SAMZA-109</a> for a workaround.</p>
 
 <h3 id="log-directory">Log Directory</h3>
 
-<p>Samza will look for the <em>SAMZA</em>_<em>LOG</em>_<em>DIR</em> environment variable when it executes. If this variable is defined, all logs will be written to this directory. If the environment variable is empty, or not defined, then Samza will use /tmp. This environment variable can also be referenced inside log4j.xml files (see above).</p>
+<p>Samza will look for the <code>SAMZA_LOG_DIR</code> environment variable when it executes. If this variable is defined, all logs will be written to this directory. If the environment variable is empty, or not defined, then Samza will use /tmp. This environment variable can also be referenced inside log4j.xml files (see above).</p>
 
 <h3 id="garbage-collection-logging">Garbage Collection Logging</h3>
 
-<p>Samza&rsquo;s will automatically set the following garbage collection logging setting, and will output it to <em>$SAMZA</em>_<em>LOG</em>_<em>DIR</em>/gc.log.</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">-XX:+PrintGCDateStamps -Xloggc:$SAMZA_LOG_DIR/gc.log
-</code></pre></div>
+<p>Samza&rsquo;s will automatically set the following garbage collection logging setting, and will output it to <code>$SAMZA_LOG_DIR/gc.log</code>.</p>
+
+<div class="highlight"><pre><code class="bash">-XX:+PrintGCDateStamps -Xloggc:<span class="nv">$SAMZA_LOG_DIR</span>/gc.log</code></pre></div>
+
 <h4 id="rotation">Rotation</h4>
 
 <p>In older versions of Java, it is impossible to have GC logs roll over based on time or size without the use of a secondary tool. This means that your GC logs will never be deleted until a Samza job ceases to run. As of <a href="http://www.oracle.com/technetwork/java/javase/2col/6u34-bugfixes-1733379.html">Java 6 Update 34</a>, and <a href="http://www.oracle.com/technetwork/java/javase/7u2-relnotes-1394228.html">Java 7 Update 2</a>, <a href="http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6941923">new GC command line switches</a> have been added to support this functionality. If you are using a version of Java that supports GC log rotation, it&rsquo;s highly recommended that you turn it on.</p>
 
 <h3 id="yarn">YARN</h3>
 
-<p>When a Samza job executes on a YARN grid, the <em>$SAMZA</em>_<em>LOG</em>_<em>DIR</em> environment variable will point to a directory that is secured such that only the user executing the Samza job can read and write to it, if YARN is <a href="http://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/ClusterSetup.html">securely configured</a>.</p>
+<p>When a Samza job executes on a YARN grid, the <code>$SAMZA_LOG_DIR</code> environment variable will point to a directory that is secured such that only the user executing the Samza job can read and write to it, if YARN is <a href="http://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/ClusterSetup.html">securely configured</a>.</p>
 
 <h4 id="stdout">STDOUT</h4>
 

Modified: incubator/samza/site/learn/documentation/0.7.0/jobs/packaging.html
URL: http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/0.7.0/jobs/packaging.html?rev=1605209&r1=1605208&r2=1605209&view=diff
==============================================================================
--- incubator/samza/site/learn/documentation/0.7.0/jobs/packaging.html (original)
+++ incubator/samza/site/learn/documentation/0.7.0/jobs/packaging.html Tue Jun 24 22:01:57 2014
@@ -23,6 +23,7 @@
     <link href="/css/bootstrap.min.css" rel="stylesheet"/>
     <link href="/css/font-awesome.min.css" rel="stylesheet"/>
     <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
     <link rel="icon" type="image/png" href="/img/samza-icon.png">
   </head>
   <body>

Modified: incubator/samza/site/learn/documentation/0.7.0/jobs/reprocessing.html
URL: http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/0.7.0/jobs/reprocessing.html?rev=1605209&r1=1605208&r2=1605209&view=diff
==============================================================================
--- incubator/samza/site/learn/documentation/0.7.0/jobs/reprocessing.html (original)
+++ incubator/samza/site/learn/documentation/0.7.0/jobs/reprocessing.html Tue Jun 24 22:01:57 2014
@@ -23,6 +23,7 @@
     <link href="/css/bootstrap.min.css" rel="stylesheet"/>
     <link href="/css/font-awesome.min.css" rel="stylesheet"/>
     <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
     <link rel="icon" type="image/png" href="/img/samza-icon.png">
   </head>
   <body>

Modified: incubator/samza/site/learn/documentation/0.7.0/jobs/yarn-jobs.html
URL: http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/0.7.0/jobs/yarn-jobs.html?rev=1605209&r1=1605208&r2=1605209&view=diff
==============================================================================
--- incubator/samza/site/learn/documentation/0.7.0/jobs/yarn-jobs.html (original)
+++ incubator/samza/site/learn/documentation/0.7.0/jobs/yarn-jobs.html Tue Jun 24 22:01:57 2014
@@ -23,6 +23,7 @@
     <link href="/css/bootstrap.min.css" rel="stylesheet"/>
     <link href="/css/font-awesome.min.css" rel="stylesheet"/>
     <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
     <link rel="icon" type="image/png" href="/img/samza-icon.png">
   </head>
   <body>
@@ -122,11 +123,12 @@
    limitations under the License.
 -->
 
-<p>When you define job.factory.class=org.apache.samza.job.yarn.YarnJobFactory in your job&rsquo;s configuration, Samza will use YARN to execute your job. The YarnJobFactory will use the YARN_HOME environment variable on the machine that run-job.sh is executed on to get the appropriate YARN configuration, which will define where the YARN resource manager is. The YarnJob will work with the resource manager to get your job started on the YARN cluster.</p>
+<p>When you define <code>job.factory.class=org.apache.samza.job.yarn.YarnJobFactory</code> in your job&rsquo;s configuration, Samza will use YARN to execute your job. The YarnJobFactory will use the YARN_HOME environment variable on the machine that run-job.sh is executed on to get the appropriate YARN configuration, which will define where the YARN resource manager is. The YarnJob will work with the resource manager to get your job started on the YARN cluster.</p>
 
 <p>If you want to use YARN to run your Samza job, you&rsquo;ll also need to define the location of your Samza job&rsquo;s package. For example, you might say:</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">yarn.package.path=http://my.http.server/jobs/ingraphs-package-0.0.55.tgz
-</code></pre></div>
+
+<div class="highlight"><pre><code class="jproperties"><span class="na">yarn.package.path</span><span class="o">=</span><span class="s">http://my.http.server/jobs/ingraphs-package-0.0.55.tgz</span></code></pre></div>
+
 <p>This .tgz file follows the conventions outlined on the <a href="packaging.html">Packaging</a> page (it has bin/run-am.sh and bin/run-container.sh). YARN NodeManagers will take responsibility for downloading this .tgz file on the appropriate machines, and untar&#39;ing them. From there, YARN will execute run-am.sh or run-container.sh for the Samza Application Master, and SamzaContainer, respectively.</p>
 
 <!-- TODO document yarn.container.count and other key configs -->

Modified: incubator/samza/site/learn/documentation/0.7.0/operations/kafka.html
URL: http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/0.7.0/operations/kafka.html?rev=1605209&r1=1605208&r2=1605209&view=diff
==============================================================================
--- incubator/samza/site/learn/documentation/0.7.0/operations/kafka.html (original)
+++ incubator/samza/site/learn/documentation/0.7.0/operations/kafka.html Tue Jun 24 22:01:57 2014
@@ -23,6 +23,7 @@
     <link href="/css/bootstrap.min.css" rel="stylesheet"/>
     <link href="/css/font-awesome.min.css" rel="stylesheet"/>
     <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
     <link rel="icon" type="image/png" href="/img/samza-icon.png">
   </head>
   <body>
@@ -131,8 +132,9 @@
 <h3 id="auto-create-topics">Auto-Create Topics</h3>
 
 <p>Kafka brokers should be configured to automatically create topics. Without this, it&rsquo;s going to be very cumbersome to run Samze jobs, since jobs will write to arbitrary (and sometimes new) topics.</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">auto.create.topics.enable=true
-</code></pre></div>
+
+<div class="highlight"><pre><code class="jproperties"><span class="na">auto.create.topics.enable</span><span class="o">=</span><span class="s">true</span></code></pre></div>
+
 
           </div>
         </div>

Modified: incubator/samza/site/learn/documentation/0.7.0/operations/security.html
URL: http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/0.7.0/operations/security.html?rev=1605209&r1=1605208&r2=1605209&view=diff
==============================================================================
--- incubator/samza/site/learn/documentation/0.7.0/operations/security.html (original)
+++ incubator/samza/site/learn/documentation/0.7.0/operations/security.html Tue Jun 24 22:01:57 2014
@@ -23,6 +23,7 @@
     <link href="/css/bootstrap.min.css" rel="stylesheet"/>
     <link href="/css/font-awesome.min.css" rel="stylesheet"/>
     <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
     <link rel="icon" type="image/png" href="/img/samza-icon.png">
   </head>
   <body>

Modified: incubator/samza/site/learn/documentation/0.7.0/yarn/application-master.html
URL: http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/0.7.0/yarn/application-master.html?rev=1605209&r1=1605208&r2=1605209&view=diff
==============================================================================
--- incubator/samza/site/learn/documentation/0.7.0/yarn/application-master.html (original)
+++ incubator/samza/site/learn/documentation/0.7.0/yarn/application-master.html Tue Jun 24 22:01:57 2014
@@ -23,6 +23,7 @@
     <link href="/css/bootstrap.min.css" rel="stylesheet"/>
     <link href="/css/font-awesome.min.css" rel="stylesheet"/>
     <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
     <link rel="icon" type="image/png" href="/img/samza-icon.png">
   </head>
   <body>

Modified: incubator/samza/site/learn/documentation/0.7.0/yarn/isolation.html
URL: http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/0.7.0/yarn/isolation.html?rev=1605209&r1=1605208&r2=1605209&view=diff
==============================================================================
--- incubator/samza/site/learn/documentation/0.7.0/yarn/isolation.html (original)
+++ incubator/samza/site/learn/documentation/0.7.0/yarn/isolation.html Tue Jun 24 22:01:57 2014
@@ -23,6 +23,7 @@
     <link href="/css/bootstrap.min.css" rel="stylesheet"/>
     <link href="/css/font-awesome.min.css" rel="stylesheet"/>
     <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
     <link rel="icon" type="image/png" href="/img/samza-icon.png">
   </head>
   <body>

Modified: incubator/samza/site/learn/tutorials/0.7.0/deploy-samza-job-from-hdfs.html
URL: http://svn.apache.org/viewvc/incubator/samza/site/learn/tutorials/0.7.0/deploy-samza-job-from-hdfs.html?rev=1605209&r1=1605208&r2=1605209&view=diff
==============================================================================
--- incubator/samza/site/learn/tutorials/0.7.0/deploy-samza-job-from-hdfs.html (original)
+++ incubator/samza/site/learn/tutorials/0.7.0/deploy-samza-job-from-hdfs.html Tue Jun 24 22:01:57 2014
@@ -23,6 +23,7 @@
     <link href="/css/bootstrap.min.css" rel="stylesheet"/>
     <link href="/css/font-awesome.min.css" rel="stylesheet"/>
     <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
     <link rel="icon" type="image/png" href="/img/samza-icon.png">
   </head>
   <body>
@@ -131,29 +132,33 @@
 <ul>
 <li>Add dependency statement in pom.xml of samza-job-package</li>
 </ul>
-<div class="highlight"><pre><code class="language-text" data-lang="text">&lt;dependency&gt;
-  &lt;groupId&gt;org.apache.hadoop&lt;/groupId&gt;
-  &lt;artifactId&gt;hadoop-hdfs&lt;/artifactId&gt;
-  &lt;version&gt;2.2.0&lt;/version&gt;
-&lt;/dependency&gt;
-</code></pre></div>
+
+<div class="highlight"><pre><code class="xml"><span class="nt">&lt;dependency&gt;</span>
+  <span class="nt">&lt;groupId&gt;</span>org.apache.hadoop<span class="nt">&lt;/groupId&gt;</span>
+  <span class="nt">&lt;artifactId&gt;</span>hadoop-hdfs<span class="nt">&lt;/artifactId&gt;</span>
+  <span class="nt">&lt;version&gt;</span>2.2.0<span class="nt">&lt;/version&gt;</span>
+<span class="nt">&lt;/dependency&gt;</span></code></pre></div>
+
 <ul>
 <li>Add the following code to src/main/assembly/src.xml in samza-job-package.</li>
 </ul>
-<div class="highlight"><pre><code class="language-text" data-lang="text">&lt;include&gt;org.apache.hadoop:hadoop-hdfs&lt;/include&gt;
-</code></pre></div>
+
+<div class="highlight"><pre><code class="xml"><span class="nt">&lt;include&gt;</span>org.apache.hadoop:hadoop-hdfs<span class="nt">&lt;/include&gt;</span></code></pre></div>
+
 <ul>
 <li>Create .tar.gz package</li>
 </ul>
-<div class="highlight"><pre><code class="language-text" data-lang="text">mvn clean pacakge
-</code></pre></div>
+
+<div class="highlight"><pre><code class="bash">mvn clean pacakge</code></pre></div>
+
 <ul>
 <li>Make sure hadoop-common-version.jar has the same version as your hadoop-hdfs-version.jar. Otherwise, you may still have errors.</li>
 </ul>
 
 <h3 id="upload-the-package">Upload the package</h3>
-<div class="highlight"><pre><code class="language-text" data-lang="text">hadoop fs -put ./samza-job-package/target/samza-job-package-0.7.0-dist.tar.gz /path/for/tgz
-</code></pre></div>
+
+<div class="highlight"><pre><code class="bash">hadoop fs -put ./samza-job-package/target/samza-job-package-0.7.0-dist.tar.gz /path/for/tgz</code></pre></div>
+
 <h3 id="add-hdfs-configuration">Add HDFS configuration</h3>
 
 <p>Put the hdfs-site.xml file of your cluster into ~/.samza/conf directory. (The same place as the yarn-site.xml)</p>
@@ -161,8 +166,9 @@
 <h3 id="change-properties-file">Change properties file</h3>
 
 <p>Change the yarn.package.path in the properties file to your HDFS location.</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">yarn.package.path=hdfs://&lt;hdfs name node ip&gt;:&lt;hdfs name node port&gt;/path/to/tgz
-</code></pre></div>
+
+<div class="highlight"><pre><code class="jproperties"><span class="na">yarn.package.path</span><span class="o">=</span><span class="s">hdfs://&lt;hdfs name node ip&gt;:&lt;hdfs name node port&gt;/path/to/tgz</span></code></pre></div>
+
 <p>Then you should be able to run the Samza job as described in <a href="../../../startup/hello-samza/0.7.0/">hello-samza</a>.</p>
 
 

Modified: incubator/samza/site/learn/tutorials/0.7.0/index.html
URL: http://svn.apache.org/viewvc/incubator/samza/site/learn/tutorials/0.7.0/index.html?rev=1605209&r1=1605208&r2=1605209&view=diff
==============================================================================
--- incubator/samza/site/learn/tutorials/0.7.0/index.html (original)
+++ incubator/samza/site/learn/tutorials/0.7.0/index.html Tue Jun 24 22:01:57 2014
@@ -23,6 +23,7 @@
     <link href="/css/bootstrap.min.css" rel="stylesheet"/>
     <link href="/css/font-awesome.min.css" rel="stylesheet"/>
     <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
     <link rel="icon" type="image/png" href="/img/samza-icon.png">
   </head>
   <body>

Modified: incubator/samza/site/learn/tutorials/0.7.0/remote-debugging-samza.html
URL: http://svn.apache.org/viewvc/incubator/samza/site/learn/tutorials/0.7.0/remote-debugging-samza.html?rev=1605209&r1=1605208&r2=1605209&view=diff
==============================================================================
--- incubator/samza/site/learn/tutorials/0.7.0/remote-debugging-samza.html (original)
+++ incubator/samza/site/learn/tutorials/0.7.0/remote-debugging-samza.html Tue Jun 24 22:01:57 2014
@@ -23,6 +23,7 @@
     <link href="/css/bootstrap.min.css" rel="stylesheet"/>
     <link href="/css/font-awesome.min.css" rel="stylesheet"/>
     <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
     <link rel="icon" type="image/png" href="/img/samza-icon.png">
   </head>
   <body>
@@ -127,27 +128,32 @@
 <h3 id="get-the-code">Get the Code</h3>
 
 <p>Start by checking out Samza, so we have access to the source.</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">git clone http://git-wip-us.apache.org/repos/asf/incubator-samza.git
-</code></pre></div>
+
+<div class="highlight"><pre><code class="bash">git clone http://git-wip-us.apache.org/repos/asf/incubator-samza.git</code></pre></div>
+
 <p>Next, grab hello-samza.</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">git clone git://git.apache.org/incubator-samza-hello-samza.git
-</code></pre></div>
+
+<div class="highlight"><pre><code class="bash">git clone git://git.apache.org/incubator-samza-hello-samza.git</code></pre></div>
+
 <h3 id="setup-the-environment">Setup the Environment</h3>
 
 <p>Now, let&rsquo;s setup the Eclipse project files.</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">cd incubator-samza
-./gradlew eclipse
-</code></pre></div>
+
+<div class="highlight"><pre><code class="bash"><span class="nb">cd </span>incubator-samza
+./gradlew eclipse</code></pre></div>
+
 <p>Let&rsquo;s also release Samza to Maven&rsquo;s local repository, so hello-samza has access to the JARs that it needs.</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">./gradlew -PscalaVersion=2.9.2 clean publishToMavenLocal
-</code></pre></div>
+
+<div class="highlight"><pre><code class="bash">./gradlew -PscalaVersion<span class="o">=</span>2.9.2 clean publishToMavenLocal</code></pre></div>
+
 <p>Next, open Eclipse, and import the Samza source code into your workspace: &ldquo;File&rdquo; &gt; &ldquo;Import&rdquo; &gt; &ldquo;Existing Projects into Workspace&rdquo; &gt; &ldquo;Browse&rdquo;. Select &lsquo;incubator-samza&rsquo; folder, and hit &lsquo;finish&rsquo;.</p>
 
 <h3 id="enable-remote-debugging">Enable Remote Debugging</h3>
 
 <p>Now, go back to the hello-samza project, and edit ./samza-job-package/src/main/config/wikipedia-feed.properties to add the following line:</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">task.opts=-agentlib:jdwp=transport=dt_socket,address=localhost:9009,server=y,suspend=y
-</code></pre></div>
+
+<div class="highlight"><pre><code class="jproperties"><span class="na">task.opts</span><span class="o">=</span><span class="s">-agentlib:jdwp=transport=dt_socket,address=localhost:9009,server=y,suspend=y</span></code></pre></div>
+
 <p>The <a href="../../documentation/0.7.0/jobs/configuration-table.html">task.opts</a> configuration parameter is a way to override Java parameters at runtime for your Samza containers. In this example, we&rsquo;re setting the agentlib parameter to enable remote debugging on localhost, port 9009. In a more realistic environment, you might also set Java heap settings (-Xmx, -Xms, etc), as well as garbage collection and logging settings.</p>
 
 <p><em>NOTE: If you&rsquo;re running multiple Samza containers on the same machine, there is a potential for port collisions. You must configure your task.opts to assign different ports for different Samza jobs. If a Samza job has more than one container (e.g. if you&rsquo;re using YARN with yarn.container.count=2), those containers must be run on different machines.</em></p>
@@ -155,17 +161,20 @@
 <h3 id="start-the-grid">Start the Grid</h3>
 
 <p>Now that the Samza job has been setup to enable remote debugging when a Samza container starts, let&rsquo;s start the ZooKeeper, Kafka, and YARN.</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">bin/grid
-</code></pre></div>
+
+<div class="highlight"><pre><code class="bash">bin/grid</code></pre></div>
+
 <p>If you get a complaint that JAVA_HOME is not set, then you&rsquo;ll need to set it. This can be done on OSX by running:</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">export JAVA_HOME=$(/usr/libexec/java_home)
-</code></pre></div>
+
+<div class="highlight"><pre><code class="bash"><span class="nb">export </span><span class="nv">JAVA_HOME</span><span class="o">=</span><span class="k">$(</span>/usr/libexec/java_home<span class="k">)</span></code></pre></div>
+
 <p>Once the grid starts, you can start the wikipedia-feed Samza job.</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">mvn clean package
+
+<div class="highlight"><pre><code class="bash">mvn clean package
 mkdir -p deploy/samza
 tar -xvf ./samza-job-package/target/samza-job-package-0.7.0-dist.tar.gz -C deploy/samza
-deploy/samza/bin/run-job.sh --config-factory=org.apache.samza.config.factories.PropertiesConfigFactory --config-path=file://$PWD/deploy/samza/config/wikipedia-feed.properties
-</code></pre></div>
+deploy/samza/bin/run-job.sh --config-factory<span class="o">=</span>org.apache.samza.config.factories.PropertiesConfigFactory --config-path<span class="o">=</span>file://<span class="nv">$PWD</span>/deploy/samza/config/wikipedia-feed.properties</code></pre></div>
+
 <p>When the wikipedia-feed job starts up, a single Samza container will be created to process all incoming messages. This is the container that we&rsquo;ll want to connect to from the remote debugger.</p>
 
 <h3 id="connect-the-remote-debugger">Connect the Remote Debugger</h3>

Modified: incubator/samza/site/learn/tutorials/0.7.0/run-hello-samza-without-internet.html
URL: http://svn.apache.org/viewvc/incubator/samza/site/learn/tutorials/0.7.0/run-hello-samza-without-internet.html?rev=1605209&r1=1605208&r2=1605209&view=diff
==============================================================================
--- incubator/samza/site/learn/tutorials/0.7.0/run-hello-samza-without-internet.html (original)
+++ incubator/samza/site/learn/tutorials/0.7.0/run-hello-samza-without-internet.html Tue Jun 24 22:01:57 2014
@@ -23,6 +23,7 @@
     <link href="/css/bootstrap.min.css" rel="stylesheet"/>
     <link href="/css/font-awesome.min.css" rel="stylesheet"/>
     <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
     <link rel="icon" type="image/png" href="/img/samza-icon.png">
   </head>
   <body>
@@ -127,8 +128,9 @@
 <h3 id="test-your-connection">Test Your Connection</h3>
 
 <p>Ping irc.wikimedia.org. Sometimes the firewall in your company blocks this service.</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">telnet irc.wikimedia.org 6667
-</code></pre></div>
+
+<div class="highlight"><pre><code class="bash">telnet irc.wikimedia.org 6667</code></pre></div>
+
 <p>You should see something like this:</p>
 <div class="highlight"><pre><code class="language-text" data-lang="text">Trying 208.80.152.178...
 Connected to ekrem.wikimedia.org.
@@ -143,23 +145,25 @@ NOTICE AUTH :*** Found your hostname
 <h3 id="use-local-data-to-run-hello-samza">Use Local Data to Run Hello Samza</h3>
 
 <p>We provide an alternative to get wikipedia feed data. Instead of running</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">deploy/samza/bin/run-job.sh --config-factory=org.apache.samza.config.factories.PropertiesConfigFactory --config-path=file://$PWD/deploy/samza/config/wikipedia-feed.properties
-</code></pre></div>
-<p>You will run
-<code>
-bin/produce-wikipedia-raw-data.sh
-</code> </p>
+
+<div class="highlight"><pre><code class="bash">deploy/samza/bin/run-job.sh --config-factory<span class="o">=</span>org.apache.samza.config.factories.PropertiesConfigFactory --config-path<span class="o">=</span>file://<span class="nv">$PWD</span>/deploy/samza/config/wikipedia-feed.properties</code></pre></div>
+
+<p>You will run</p>
+
+<div class="highlight"><pre><code class="bash">bin/produce-wikipedia-raw-data.sh</code></pre></div>
 
 <p>This script will read wikipedia feed data from local file and produce them to the Kafka broker. By default, it produces to localhost:9092 as the Kafka broker and uses localhost:2181 as zookeeper. You can overwrite them:</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">bin/produce-wikipedia-raw-data.sh -b yourKafkaBrokerAddress -z yourZookeeperAddress
-</code></pre></div>
+
+<div class="highlight"><pre><code class="bash">bin/produce-wikipedia-raw-data.sh -b yourKafkaBrokerAddress -z yourZookeeperAddress</code></pre></div>
+
 <p>Now you can go back to Generate Wikipedia Statistics section in <a href="../../../startup/hello-samza/0.7.0/">Hello Samza</a> and follow the remaining steps.</p>
 
 <h3 id="a-little-explanation">A Little Explanation</h3>
 
-<p>The goal of </p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">deploy/samza/bin/run-job.sh --config-factory=org.apache.samza.config.factories.PropertiesConfigFactory --config-path=file://$PWD/deploy/samza/config/wikipedia-feed.properties
-</code></pre></div>
+<p>The goal of</p>
+
+<div class="highlight"><pre><code class="bash">deploy/samza/bin/run-job.sh --config-factory<span class="o">=</span>org.apache.samza.config.factories.PropertiesConfigFactory --config-path<span class="o">=</span>file://<span class="nv">$PWD</span>/deploy/samza/config/wikipedia-feed.properties</code></pre></div>
+
 <p>is to deploy a Samza job which listens to wikipedia API, receives the feed in realtime and produces the feed to the Kafka topic wikipedia-raw. The alternative in this tutorial is reading local wikipedia feed in an infinite loop and producing the data to Kafka wikipedia-raw. The follow-up job, wikipedia-parser is getting data from Kafka topic wikipedia-raw, so as long as we have correct data in Kafka topic wikipedia-raw, we are fine. All Samza jobs are connected by the Kafka and do not depend on each other.</p>
 
 

Modified: incubator/samza/site/learn/tutorials/0.7.0/run-in-multi-node-yarn.html
URL: http://svn.apache.org/viewvc/incubator/samza/site/learn/tutorials/0.7.0/run-in-multi-node-yarn.html?rev=1605209&r1=1605208&r2=1605209&view=diff
==============================================================================
--- incubator/samza/site/learn/tutorials/0.7.0/run-in-multi-node-yarn.html (original)
+++ incubator/samza/site/learn/tutorials/0.7.0/run-in-multi-node-yarn.html Tue Jun 24 22:01:57 2014
@@ -23,6 +23,7 @@
     <link href="/css/bootstrap.min.css" rel="stylesheet"/>
     <link href="/css/font-awesome.min.css" rel="stylesheet"/>
     <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
     <link rel="icon" type="image/png" href="/img/samza-icon.png">
   </head>
   <body>
@@ -131,26 +132,30 @@
 <h3 id="basic-yarn-setting">Basic YARN Setting</h3>
 
 <p>1. Dowload <a href="http://mirror.symnds.com/software/Apache/hadoop/common/hadoop-2.3.0/hadoop-2.3.0.tar.gz">YARN 2.3</a> to /tmp and untar it.</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">cd /tmp
+
+<div class="highlight"><pre><code class="bash"><span class="nb">cd</span> /tmp
 tar -xvf hadoop-2.3.0.tar.gz
-cd hadoop-2.3.0
-</code></pre></div>
+<span class="nb">cd </span>hadoop-2.3.0</code></pre></div>
+
 <p>2. Set up environment variables.</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">export HADOOP_YARN_HOME=$(pwd)
+
+<div class="highlight"><pre><code class="bash"><span class="nb">export </span><span class="nv">HADOOP_YARN_HOME</span><span class="o">=</span><span class="k">$(</span><span class="nb">pwd</span><span class="k">)</span>
 mkdir conf
-export HADOOP_CONF_DIR=$HADOOP_YARN_HOME/conf
-</code></pre></div>
+<span class="nb">export </span><span class="nv">HADOOP_CONF_DIR</span><span class="o">=</span><span class="nv">$HADOOP_YARN_HOME</span>/conf</code></pre></div>
+
 <p>3. Configure YARN setting file.</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">cp ./etc/hadoop/yarn-site.xml conf
-vi conf/yarn-site.xml
-</code></pre></div>
+
+<div class="highlight"><pre><code class="bash">cp ./etc/hadoop/yarn-site.xml conf
+vi conf/yarn-site.xml</code></pre></div>
+
 <p>Add the following property to yarn-site.xml:</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">&lt;property&gt;
-    &lt;name&gt;yarn.resourcemanager.hostname&lt;/name&gt;
-    &lt;!-- hostname that is accessible from all NMs --&gt;
-    &lt;value&gt;yourHostname&lt;/value&gt;
-&lt;/property&gt;
-</code></pre></div>
+
+<div class="highlight"><pre><code class="xml"><span class="nt">&lt;property&gt;</span>
+    <span class="nt">&lt;name&gt;</span>yarn.resourcemanager.hostname<span class="nt">&lt;/name&gt;</span>
+    <span class="c">&lt;!-- hostname that is accessible from all NMs --&gt;</span>
+    <span class="nt">&lt;value&gt;</span>yourHostname<span class="nt">&lt;/value&gt;</span>
+<span class="nt">&lt;/property&gt;</span></code></pre></div>
+
 <p>Download and add capacity-schedule.xml.</p>
 <div class="highlight"><pre><code class="language-text" data-lang="text">curl http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/resources/capacity-scheduler.xml?view=co &gt; conf/capacity-scheduler.xml
 </code></pre></div>
@@ -159,34 +164,39 @@ vi conf/yarn-site.xml
 <p>The goal of these steps is to configure YARN to read http filesystem because we will use Http server to deploy Samza job package. If you want to use HDFS to deploy Samza job package, you can skip step 4~6 and follow <a href="deploy-samza-job-from-hdfs.html">Deploying a Samza Job from HDFS</a></p>
 
 <p>4. Download Scala package and untar it.</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">cd /tmp
+
+<div class="highlight"><pre><code class="bash"><span class="nb">cd</span> /tmp
 curl http://www.scala-lang.org/files/archive/scala-2.10.3.tgz &gt; scala-2.10.3.tgz
-tar -xvf scala-2.10.3.tgz
-</code></pre></div>
+tar -xvf scala-2.10.3.tgz</code></pre></div>
+
 <p>5. Add Scala and its log jars.</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">cp /tmp/scala-2.10.3/lib/scala-compiler.jar $HADOOP_YARN_HOME/share/hadoop/hdfs/lib
-cp /tmp/scala-2.10.3/lib/scala-library.jar $HADOOP_YARN_HOME/share/hadoop/hdfs/lib
-curl http://search.maven.org/remotecontent?filepath=org/clapper/grizzled-slf4j_2.10/1.0.1/grizzled-slf4j_2.10-1.0.1.jar &gt; $HADOOP_YARN_HOME/share/hadoop/hdfs/lib/grizzled-slf4j_2.10-1.0.1.jar
-</code></pre></div>
+
+<div class="highlight"><pre><code class="bash">cp /tmp/scala-2.10.3/lib/scala-compiler.jar <span class="nv">$HADOOP_YARN_HOME</span>/share/hadoop/hdfs/lib
+cp /tmp/scala-2.10.3/lib/scala-library.jar <span class="nv">$HADOOP_YARN_HOME</span>/share/hadoop/hdfs/lib
+curl http://search.maven.org/remotecontent?filepath<span class="o">=</span>org/clapper/grizzled-slf4j_2.10/1.0.1/grizzled-slf4j_2.10-1.0.1.jar &gt; <span class="nv">$HADOOP_YARN_HOME</span>/share/hadoop/hdfs/lib/grizzled-slf4j_2.10-1.0.1.jar</code></pre></div>
+
 <p>6. Add http configuration in core-site.xml (create the core-site.xml file and add content).</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">vi $HADOOP_YARN_HOME/conf/core-site.xml
-</code></pre></div>
+
+<div class="highlight"><pre><code class="xml">vi $HADOOP_YARN_HOME/conf/core-site.xml</code></pre></div>
+
 <p>Add the following code:</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">&lt;?xml-stylesheet type=&quot;text/xsl&quot; href=&quot;configuration.xsl&quot;?&gt;
-&lt;configuration&gt;
-    &lt;property&gt;
-      &lt;name&gt;fs.http.impl&lt;/name&gt;
-      &lt;value&gt;org.apache.samza.util.hadoop.HttpFileSystem&lt;/value&gt;
-    &lt;/property&gt;
-&lt;/configuration&gt;
-</code></pre></div>
+
+<div class="highlight"><pre><code class="xml"><span class="cp">&lt;?xml-stylesheet type=&quot;text/xsl&quot; href=&quot;configuration.xsl&quot;?&gt;</span>
+<span class="nt">&lt;configuration&gt;</span>
+    <span class="nt">&lt;property&gt;</span>
+      <span class="nt">&lt;name&gt;</span>fs.http.impl<span class="nt">&lt;/name&gt;</span>
+      <span class="nt">&lt;value&gt;</span>org.apache.samza.util.hadoop.HttpFileSystem<span class="nt">&lt;/value&gt;</span>
+    <span class="nt">&lt;/property&gt;</span>
+<span class="nt">&lt;/configuration&gt;</span></code></pre></div>
+
 <h3 id="distribute-hadoop-file-to-slaves">Distribute Hadoop File to Slaves</h3>
 
 <p>7. Basically, you copy the hadoop file in your host machine to slave machines. (172.21.100.35, in my case):</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">scp -r . 172.21.100.35:/tmp/hadoop-2.3.0
-echo 172.21.100.35 &gt; conf/slaves
-sbin/start-yarn.sh
-</code></pre></div>
+
+<div class="highlight"><pre><code class="bash">scp -r . 172.21.100.35:/tmp/hadoop-2.3.0
+<span class="nb">echo </span>172.21.100.35 &gt; conf/slaves
+sbin/start-yarn.sh</code></pre></div>
+
 <ul>
 <li>If you get &ldquo;172.21.100.35: Error: JAVA<em>HOME is not set and could not be found.&rdquo;, you&rsquo;ll need to add a conf/hadoop-env.sh file to the machine with the failure (172.21.100.35, in this case), which has &ldquo;export JAVA</em>HOME=/export/apps/jdk/JDK-1<em>6</em>0<em>27&rdquo; (or wherever your JAVA</em>HOME actually is).</li>
 </ul>
@@ -198,33 +208,39 @@ sbin/start-yarn.sh
 <p>Some of the following steps are exactlly identical to what you have seen in <a href="../../../startup/hello-samza/0.7.0/">hello-samza</a>. You may skip them if you have already done so.</p>
 
 <p>1. Download Samza and publish it to Maven local repository.</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">cd /tmp
+
+<div class="highlight"><pre><code class="bash"><span class="nb">cd</span> /tmp
 git clone http://git-wip-us.apache.org/repos/asf/incubator-samza.git
-cd incubator-samza
+<span class="nb">cd </span>incubator-samza
 ./gradlew clean publishToMavenLocal
-cd ..
-</code></pre></div>
+<span class="nb">cd</span> ..</code></pre></div>
+
 <p>2. Download hello-samza project and change the job properties file.</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">git clone git://github.com/linkedin/hello-samza.git
-cd hello-samza
-vi samza-job-package/src/main/config/wikipedia-feed.properties
-</code></pre></div>
+
+<div class="highlight"><pre><code class="bash">git clone git://github.com/linkedin/hello-samza.git
+<span class="nb">cd </span>hello-samza
+vi samza-job-package/src/main/config/wikipedia-feed.properties</code></pre></div>
+
 <p>Change the yarn.package.path property to be:</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">yarn.package.path=http://yourHostname:8000/samza-job-package/target/samza-job-package-0.7.0-dist.tar.gz
-</code></pre></div>
+
+<div class="highlight"><pre><code class="jproperties"><span class="na">yarn.package.path</span><span class="o">=</span><span class="s">http://yourHostname:8000/samza-job-package/target/samza-job-package-0.7.0-dist.tar.gz</span></code></pre></div>
+
 <p>3. Complie hello-samza.</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">mvn clean package
+
+<div class="highlight"><pre><code class="bash">mvn clean package
 mkdir -p deploy/samza
-tar -xvf ./samza-job-package/target/samza-job-package-0.7.0-dist.tar.gz -C deploy/samza
-</code></pre></div>
+tar -xvf ./samza-job-package/target/samza-job-package-0.7.0-dist.tar.gz -C deploy/samza</code></pre></div>
+
 <p>4. Deploy Samza job package to Http server..</p>
 
 <p>Open a new terminal, and run:</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">cd /tmp/hello-samza &amp;&amp; python -m SimpleHTTPServer
-</code></pre></div>
+
+<div class="highlight"><pre><code class="bash"><span class="nb">cd</span> /tmp/hello-samza <span class="o">&amp;&amp;</span> python -m SimpleHTTPServer</code></pre></div>
+
 <p>Go back to the original terminal (not the one running the HTTP server):</p>
-<div class="highlight"><pre><code class="language-text" data-lang="text">deploy/samza/bin/run-job.sh --config-factory=org.apache.samza.config.factories.PropertiesConfigFactory --config-path=file://$PWD/deploy/samza/config/wikipedia-feed.properties
-</code></pre></div>
+
+<div class="highlight"><pre><code class="bash">deploy/samza/bin/run-job.sh --config-factory<span class="o">=</span>org.apache.samza.config.factories.PropertiesConfigFactory --config-path<span class="o">=</span>file://<span class="nv">$PWD</span>/deploy/samza/config/wikipedia-feed.properties</code></pre></div>
+
 <p>Go to http://yourHostname:8088 and find the wikipedia-feed job. Click on the ApplicationMaster link to see that it&rsquo;s running.</p>
 
 <p>Congratulations! You now run the Samza job in a &ldquo;real&rdquo; YARN grid!</p>

Modified: incubator/samza/site/less/main.less
URL: http://svn.apache.org/viewvc/incubator/samza/site/less/main.less?rev=1605209&r1=1605208&r2=1605209&view=diff
==============================================================================
--- incubator/samza/site/less/main.less (original)
+++ incubator/samza/site/less/main.less Tue Jun 24 22:01:57 2014
@@ -1,19 +1,21 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
 
 @import "variables.less";
 @import "non-responsive.less";
@@ -90,6 +92,8 @@ pre {
   border: 0px !important;
   border-radius: 0px !important;
   overflow-x: auto;
+  background-color: #f7f7f7;
+  font-size: 12px;
   code {
     overflow-wrap: normal;
     white-space: pre;
@@ -162,6 +166,10 @@ td.key {
   font-size: 16px;
 }
 
+img.diagram-large {
+  width: 100%;
+}
+
 ul.documentation-list {
   list-style: none;
   padding-left: 20px;

Modified: incubator/samza/site/sitemap.xml
URL: http://svn.apache.org/viewvc/incubator/samza/site/sitemap.xml?rev=1605209&r1=1605208&r2=1605209&view=diff
==============================================================================
--- incubator/samza/site/sitemap.xml (original)
+++ incubator/samza/site/sitemap.xml Tue Jun 24 22:01:57 2014
@@ -20,7 +20,7 @@
 
   <url>
     <loc>http://samza.incubator.apache.org/</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     <changefreq>daily</changefreq>
     <priority>1.0</priority>
   </url>
@@ -30,308 +30,308 @@
   
   <url>
     <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/yarn/application-master.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/introduction/architecture.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/introduction/background.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/container/checkpointing.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/contribute/code.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/contribute/coding-guide.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/community/committers.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/introduction/concepts.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/jobs/configuration.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/learn/tutorials/0.7.0/deploy-samza-job-from-hdfs.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/contribute/disclaimer.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/container/event-loop.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/index.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/learn/tutorials/0.7.0/index.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/index.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/startup/download/index.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/startup/hello-samza/0.7.0/index.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/comparisons/introduction.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/community/irc.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/yarn/isolation.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/container/jmx.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/jobs/job-runner.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/operations/kafka.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/jobs/logging.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/community/mailing-lists.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/container/metrics.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/comparisons/mupd8.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/api/overview.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/jobs/packaging.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/contribute/projects.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/learn/tutorials/0.7.0/remote-debugging-samza.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/jobs/reprocessing.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/contribute/rules.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/learn/tutorials/0.7.0/run-hello-samza-without-internet.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/learn/tutorials/0.7.0/run-in-multi-node-yarn.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/container/samza-container.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/operations/security.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/contribute/seps.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/container/serialization.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/container/state-management.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/comparisons/storm.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/container/streams.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/container/windowing.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/jobs/yarn-jobs.html</loc>
-    <lastmod>2014-06-18</lastmod>
+    <lastmod>2014-06-24</lastmod>
     
     
   </url>