You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@distributedlog.apache.org by si...@apache.org on 2017/04/26 18:56:54 UTC

[31/51] [partial] incubator-distributedlog git commit: Release 0.4.0-incubating

http://git-wip-us.apache.org/repos/asf/incubator-distributedlog/blob/ef7245e8/content/docs/0.4.0-incubating/user_guide/design/main.html
----------------------------------------------------------------------
diff --git a/content/docs/0.4.0-incubating/user_guide/design/main.html b/content/docs/0.4.0-incubating/user_guide/design/main.html
new file mode 100644
index 0000000..cc66267
--- /dev/null
+++ b/content/docs/0.4.0-incubating/user_guide/design/main.html
@@ -0,0 +1,808 @@
+<!DOCTYPE html>
+<html lang="en">
+
+  <head>
+  <meta charset="utf-8">
+  <meta http-equiv="X-UA-Compatible" content="IE=edge">
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+
+  <title>Apache DistributedLog (incubating)</title>
+  <meta name="description" content="Apache DistributedLog is an high performance replicated log.
+">
+
+  <link rel="stylesheet" href="/docs/0.4.0-incubating/styles/site.css">
+  <link rel="stylesheet" href="/docs/0.4.0-incubating/css/theme.css">
+  <!-- JQuery -->
+  <script src="https://ajax.googleapis.com/ajax/libs/jquery/2.2.0/jquery.min.js"></script>
+  <script src="/docs/0.4.0-incubating/js/bootstrap.min.js"></script>
+  <link rel="canonical" href="http://distributedlog.incubator.apache.org/docs/0.4.0-incubating/user_guide/design/main.html" data-proofer-ignore>
+  <link rel="alternate" type="application/rss+xml" title="Apache DistributedLog (incubating)" href="http://distributedlog.incubator.apache.org/docs/0.4.0-incubating/feed.xml">
+  <!-- Font Awesome -->
+  <script src="//cdnjs.cloudflare.com/ajax/libs/anchor-js/3.2.0/anchor.min.js"></script>
+  <!-- Google Analytics -->
+  <script>
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+    (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+    m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
+
+  ga('create', 'UA-83870961-1', 'auto');
+  ga('send', 'pageview');
+  </script>
+  <!-- End Google Analytics -->
+  <link rel="shortcut icon" type="image/x-icon" href="/images/favicon.ico">
+</head>
+
+
+  <body role="document">
+
+    
+<nav class="navbar navbar-default navbar-fixed-top">
+  <div class="container">
+    <div class="navbar-header">
+      <a href="/" class="navbar-brand" >
+        <img alt="Brand" style="height: 28px" src="/docs/0.4.0-incubating/images/distributedlog_logo_navbar.png">
+      </a>
+      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false" aria-controls="navbar">
+        <span class="sr-only">Toggle navigation</span>
+        <span class="icon-bar"></span>
+        <span class="icon-bar"></span>
+        <span class="icon-bar"></span>
+      </button>
+    </div>
+    <div id="navbar" class="navbar-collapse collapse">
+      <ul class="nav navbar-nav">
+        <!-- Overview -->
+        <li><a href="/docs/0.4.0-incubating/">V0.4.0</a></li>
+        <!-- Concepts -->
+        <li><a href="/docs/0.4.0-incubating/basics/introduction">Concepts</a></li>
+        <!-- Quick Start -->
+        <li>
+          <a href="/docs/0.4.0-incubating/start" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Start<span class="caret"></span></a>
+          <ul class="dropdown-menu" role="menu">
+            
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/start/building.html">
+                Build DistributedLog from Source
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/start/download.html">
+                Download Releases
+              </a>
+            </li>
+            
+            <li role="separator" class="divider"></li>
+            <li class="dropdown-header"><strong>Quickstart</strong></li>
+            
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/start/quickstart.html">
+                Setup & Run Example
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/tutorials/basic-1.html">
+                API - Write Records (via core library)
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/tutorials/basic-2.html">
+                API - Write Records (via write proxy)
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/tutorials/basic-5.html">
+                API - Read Records
+              </a>
+            </li>
+            
+            <li role="separator" class="divider"></li>
+            <li class="dropdown-header"><strong>Deployment</strong></li>
+            
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/deployment/cluster.html">
+                Cluster Setup
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/deployment/global-cluster.html">
+                Global Cluster Setup
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/deployment/docker.html">
+                Docker
+              </a>
+            </li>
+            
+          </ul>
+        </li>
+        <!-- API -->
+        <li>
+          <a href="/docs/0.4.0-incubating/start" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">API<span class="caret"></span></a>
+          <ul class="dropdown-menu" role="menu">
+            <li><a href="/docs/0.4.0-incubating/api/java">Java</a></li>
+          </ul>
+        </li>
+        <!-- User Guide -->
+        <li class="dropdown">
+		      <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false">User Guide<span class="caret"></span></a>
+          <ul class="dropdown-menu">
+            
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/basics/introduction.html">
+                Introduction
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/user_guide/considerations/main.html">
+                Considerations
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/user_guide/architecture/main.html">
+                Architecture
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/user_guide/api/main.html">
+                API
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/user_guide/configuration/main.html">
+                Configuration
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/user_guide/design/main.html">
+                Detail Design
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/user_guide/globalreplicatedlog/main.html">
+                Global Replicated Log
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/user_guide/implementation/main.html">
+                Implementation
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/user_guide/references/main.html">
+                References
+              </a>
+            </li>
+            
+          </ul>
+        </li>
+        <!-- Admin Guide -->
+        <li class="dropdown">
+		      <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false">Admin Guide<span class="caret"></span></a>
+          <ul class="dropdown-menu">
+            <li><a href="/docs/0.4.0-incubating/deployment/cluster">Cluster Setup</a></li>
+            
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/admin_guide/operations.html">
+                Operations
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/admin_guide/performance.html">
+                Performance Tuning
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/admin_guide/loadtest.html">
+                Load Test
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/admin_guide/hardware.html">
+                Hardware
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/admin_guide/monitoring.html">
+                Monitoring
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/admin_guide/zookeeper.html">
+                ZooKeeper
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/admin_guide/bookkeeper.html">
+                BookKeeper
+              </a>
+            </li>
+            
+          </ul>
+        </li>
+        <!-- Tutorials -->
+        <li class="dropdown">
+		      <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false">Tutorials<span class="caret"></span></a>
+          <ul class="dropdown-menu">
+            <li class="dropdown-header"><strong>Basic</strong></li>
+            <li><a href="/docs/0.4.0-incubating/tutorials/basic-1">Write Records (via Core Library)</a></li>
+            <li><a href="/docs/0.4.0-incubating/tutorials/basic-2">Write Records (via Write Proxy)</a></li>
+            <li><a href="/docs/0.4.0-incubating/tutorials/basic-3">Write Records to multiple streams</a></li>
+            <li><a href="/docs/0.4.0-incubating/tutorials/basic-4">Atomic Write Records</a></li>
+            <li><a href="/docs/0.4.0-incubating/tutorials/basic-5">Tailing Read Records</a></li>
+            <li><a href="/docs/0.4.0-incubating/tutorials/basic-6">Rewind Read Records</a></li>
+            <li role="separator" class="divider"></li>
+            <li class="dropdown-header"><strong>Messaging</strong></li>
+            
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/tutorials/messaging-1.html">
+                Write records to partitioned streams
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/tutorials/messaging-2.html">
+                Write records to multiple streams (load balancer)
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/tutorials/messaging-3.html">
+                At-least-once Processing
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/tutorials/messaging-4.html">
+                Exact-Once Processing
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/tutorials/messaging-5.html">
+                Implement a kafka-like pub/sub system
+              </a>
+            </li>
+            
+            <li role="separator" class="divider"></li>
+            <li class="dropdown-header"><strong>Replicated State Machines</strong></li>
+            
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/tutorials/replicatedstatemachines.html">
+                Build replicated state machines
+              </a>
+            </li>
+            
+            <li role="separator" class="divider"></li>
+            <li class="dropdown-header"><strong>Analytics</strong></li>
+            <li><a href="/docs/0.4.0-incubating/tutorials/analytics-mapreduce">Process log streams using MapReduce</a></li>
+          </ul>
+        </li>
+      </ul>
+    </div><!--/.nav-collapse -->
+  </div>
+</nav>
+
+
+<link rel="stylesheet" href="">
+
+
+    <div class="container" role="main">
+
+      <div class="row">
+        
+        <!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+
+<div class="row">
+  <!-- Sub Navigation -->
+  <div class="col-sm-3">
+    <ul id="sub-nav">
+      
+      
+      
+        
+        <li><a href="/docs/0.4.0-incubating/user_guide/main.html" class="">User Guide</a>
+          
+          <ul>
+            
+              
+              <li>
+                <a href="/docs/0.4.0-incubating/basics/introduction.html" class="">
+                  Introduction
+                </a>
+                
+                <ul>
+                  
+                </ul>
+                
+              </li>
+            
+              
+              <li>
+                <a href="/docs/0.4.0-incubating/user_guide/considerations/main.html" class="">
+                  Considerations
+                </a>
+                
+                <ul>
+                  
+                </ul>
+                
+              </li>
+            
+              
+              <li>
+                <a href="/docs/0.4.0-incubating/user_guide/architecture/main.html" class="">
+                  Architecture
+                </a>
+                
+                <ul>
+                  
+                </ul>
+                
+              </li>
+            
+              
+              <li>
+                <a href="/docs/0.4.0-incubating/user_guide/api/main.html" class="">
+                  API
+                </a>
+                
+                <ul>
+                  
+                    
+                    <li>
+                      <a href="/docs/0.4.0-incubating/user_guide/api/core.html" class="active">
+                        Core Library API
+                      </a>
+                    </li>
+                  
+                    
+                    <li>
+                      <a href="/docs/0.4.0-incubating/user_guide/api/proxy.html" class="active">
+                        Proxy Client API
+                      </a>
+                    </li>
+                  
+                    
+                    <li>
+                      <a href="/docs/0.4.0-incubating/user_guide/api/practice.html" class="active">
+                        Best Practise
+                      </a>
+                    </li>
+                  
+                </ul>
+                
+              </li>
+            
+              
+              <li>
+                <a href="/docs/0.4.0-incubating/user_guide/configuration/main.html" class="">
+                  Configuration
+                </a>
+                
+                <ul>
+                  
+                    
+                    <li>
+                      <a href="/docs/0.4.0-incubating/user_guide/configuration/core.html" class="active">
+                        Core Library Configuration
+                      </a>
+                    </li>
+                  
+                    
+                    <li>
+                      <a href="/docs/0.4.0-incubating/user_guide/configuration/proxy.html" class="active">
+                        Write Proxy Configuration
+                      </a>
+                    </li>
+                  
+                    
+                    <li>
+                      <a href="/docs/0.4.0-incubating/user_guide/configuration/client.html" class="active">
+                        Client Configuration
+                      </a>
+                    </li>
+                  
+                    
+                    <li>
+                      <a href="/docs/0.4.0-incubating/user_guide/configuration/perlog.html" class="active">
+                        Per Stream Configuration
+                      </a>
+                    </li>
+                  
+                </ul>
+                
+              </li>
+            
+              
+              <li>
+                <a href="/docs/0.4.0-incubating/user_guide/design/main.html" class="active">
+                  Detail Design
+                </a>
+                
+                <ul>
+                  
+                </ul>
+                
+              </li>
+            
+              
+              <li>
+                <a href="/docs/0.4.0-incubating/user_guide/globalreplicatedlog/main.html" class="">
+                  Global Replicated Log
+                </a>
+                
+                <ul>
+                  
+                </ul>
+                
+              </li>
+            
+              
+              <li>
+                <a href="/docs/0.4.0-incubating/user_guide/implementation/main.html" class="">
+                  Implementation
+                </a>
+                
+                <ul>
+                  
+                    
+                    <li>
+                      <a href="/docs/0.4.0-incubating/user_guide/implementation/storage.html" class="active">
+                        Storage
+                      </a>
+                    </li>
+                  
+                </ul>
+                
+              </li>
+            
+              
+              <li>
+                <a href="/docs/0.4.0-incubating/user_guide/references/main.html" class="">
+                  References
+                </a>
+                
+                <ul>
+                  
+                    
+                    <li>
+                      <a href="/docs/0.4.0-incubating/user_guide/references/metrics.html" class="active">
+                        Metrics
+                      </a>
+                    </li>
+                  
+                    
+                    <li>
+                      <a href="/docs/0.4.0-incubating/user_guide/references/features.html" class="active">
+                        Available Features
+                      </a>
+                    </li>
+                  
+                </ul>
+                
+              </li>
+            
+          </ul>
+          
+        </li>
+      
+    </ul>
+  </div>
+  <!-- Main -->
+  <div class="col-sm-9">
+    <!-- Top anchor -->
+    <a href="#top"></a>
+
+    <!-- Breadcrumbs above the main heading -->
+    <ol class="breadcrumb">
+
+      
+      
+      
+
+      
+      
+
+      
+
+      
+
+      <li><a href="/docs/0.4.0-incubating/user_guide/main.html">User Guide</a></li>
+      
+      
+      <li class="active">Detail Design</li>
+    </ol>
+
+    <div class="text">
+      <!-- Content -->
+      <div class="contents topic" id="detail-design">
+<p class="topic-title first">Detail Design</p>
+<ul class="simple">
+<li><a class="reference internal" href="#id1" id="id2">Detail Design</a><ul>
+<li><a class="reference internal" href="#consistency" id="id3">Consistency</a><ul>
+<li><a class="reference internal" href="#lastaddconfirmed" id="id4">LastAddConfirmed</a></li>
+<li><a class="reference internal" href="#fencing" id="id5">Fencing</a></li>
+<li><a class="reference internal" href="#ownership-tracking" id="id6">Ownership Tracking</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#streaming-reads" id="id7">Streaming Reads</a><ul>
+<li><a class="reference internal" href="#positioning" id="id8">Positioning</a></li>
+<li><a class="reference internal" href="#reading" id="id9">Reading</a></li>
+<li><a class="reference internal" href="#notifications" id="id10">Notifications</a></li>
+<li><a class="reference internal" href="#readahead" id="id11">ReadAhead</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#logsegment-lifecycle" id="id12">LogSegment Lifecycle</a><ul>
+<li><a class="reference internal" href="#distribution" id="id13">Distribution</a></li>
+<li><a class="reference internal" href="#truncation" id="id14">Truncation</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+<div class="section" id="id1">
+<h2><a class="toc-backref" href="#id2">Detail Design</a></h2>
+<p>We will describe the design choices that we made while implementing DistributedLog and why we built such layered architecture.</p>
+<div class="section" id="consistency">
+<h3><a class="toc-backref" href="#id3">Consistency</a></h3>
+<p>DistributedLog achieves strong consistency, using the <cite>fencing</cite> mechanism provided in the log segment store to guarantee data consistency
+and <cite>versioned updates</cite> in the metadata store to guarantee metadata consistency.</p>
+<div class="section" id="lastaddconfirmed">
+<h4><a class="toc-backref" href="#id4">LastAddConfirmed</a></h4>
+<p>DistributedLog leverages bookkeeper's <cite>LAC</cite> (LastAddConfirmed) protocol - a variation of <cite>two-phase-commit</cite> algorithm to build its data pipeline
+and achieve consistency around it. Figure 1 illustrates the basic concepts of this protocol.</p>
+<div class="figure align-center">
+<img alt="../../images/lacprotocol.png" src="../../images/lacprotocol.png" />
+<p class="caption">Figure 1. Consistency in Log Segment Store</p>
+</div>
+<p>Each batched entry appended to a log segment will be assigned a monotonically increasing entry id by the log segment writer. All the entries are
+written asynchronously in a pipeline. The log segment writer therefore updates an in-memory pointer, called <cite>LAP</cite> (LastAddPushed), which is the
+entry id of the last batched entry pushed to log segment store by the writer. The entries could be written out of order but only be acknowledged
+in entry id order. Along with the successful acknowledges, the log segment writer also updates an in-memory pointer, called <cite>LAC</cite> (LastAddConfirmed).
+LAC is the entry id of the last entry that already acknowledged by the writer. All the entries written between LAC and LAP are unacknowledged data,
+which they are not visible to readers.</p>
+<p>The readers can read entries up to LAC as those entries are known to be durably replicated - thereby can be safely read without the risk of violating
+read ordering. The writer includes the current LAC in each entry that it sends to BookKeeper. Therefore each subsequent entry makes the records in
+the previous entry visible to the readers. LAC updates can be piggybacked on the next entry that are written by the writer. Since readers are strictly
+followers, they can leverage LAC to read durable data from any of the replicas without need for any communication or coordination with the writer.</p>
+<p>DL introduces one type of system record, which is called <cite>control record</cite> - it acts as the <cite>commit</cite> request in <cite>two-phases-commit</cite> algorithm.
+If no application records arrive within the specified SLA, the writer will generate a control record. With writing the control record, it would advance
+the LAC of the log stream. The control record is added either immediately after receiving acknowledges from writing a user record or periodically if
+no application records are added. It is configured as part of writer's flushing policy. While control log records are present in the physical log stream,
+they are not delivered by the log readers to the application.</p>
+</div>
+<div class="section" id="fencing">
+<h4><a class="toc-backref" href="#id5">Fencing</a></h4>
+<p>LAC is a very simple and useful mechanism to guarantee consistency across readers. But it is not enough to guarantee correctness when the ownership
+of a log stream is changed - there might be multiple writers exist at the same time when network partition happens. DistributedLog addresses this by <cite>fencing</cite>
+data in log segment store and conditionally (via versioned set) updating log segment metadata in metadata store. Fencing is a built-in mechanism in bookkeeper - when
+a client wants to fence a ledger, it would send a special fence request to all the replicas of that ledger; the bookies that host that ledger will change the state of
+that ledger to fenced. once a ledger's state is changed to fenced, all the write attempts to it would be failed immediately. Client claims a success fence when
+it receives successful fence responses from majorities of the replicas.</p>
+<p>Figure 2 illustrates how does DistributedLog work when ownership is changed for a log stream.</p>
+<div class="figure align-center">
+<img alt="../../images/fencing.png" src="../../images/fencing.png" />
+<p class="caption">Figure 2. Fencing &amp; Consistency</p>
+</div>
+<p>Whenever the ownership is changed from one writer to the other writer (step 0), the new owner of the log stream will first retrieve the list of log segments of
+that log stream along with their versions (the versions will used for versioned set on updating log segments' metadata). The new owner will find current inprogress
+log segment and recover the log segment in following sequence:</p>
+<ol class="arabic simple">
+<li>It would first fence the log segment (step 2.1). Fencing successfully means no writes will succeed any more after that.</li>
+<li>If the old owner is just network partitioned, it might still think itself is the owner and keep adding records to that log segment.  But because the log segment has been fenced, so all writes by the old owner will be rejected and failed (step 2.2). The old owner will realize that it already lost the ownership and gave up.</li>
+<li>Once the log segment is fenced, the new owner will proceed a recovery process to recover the log segment. Once the log segment is recovered, it would issue a versioned set operation to metadata store to convert the log segment status from inprogress to completed (step 2.3).</li>
+<li>A new inprogress log segment will be created by the new writer to continue writing to this log stream (step 3).</li>
+</ol>
+<p>Completing an inprogress log segment and creating a new log segment could be executed in parallel to achieve fast log stream recovery. It will reduce the latency
+penalty for writes during ownership changed.</p>
+<p>Creating a new log segment during ownership change is known as '<em>obtaining an epoch during leader election</em>' in distributed consensus algorithms. It makes clean
+implementation for a replicated log service, as the client that lost the ownership (aka mastership, lock) doesn't even know the identity of the new epoch (in DL,
+it is the new log segment id) so it can't accidentally write to the new log segment. We leverage zookeeper's sequential znode on generating new log segment id.</p>
+</div>
+<div class="section" id="ownership-tracking">
+<h4><a class="toc-backref" href="#id6">Ownership Tracking</a></h4>
+<p>With the built-in fencing mechanism in storage layer and metadata updates, DistributedLog doesn't require strict leader election
+to guarantee correctness. Therefore we use '<cite>ownership tracking</cite>' as opposed to '<cite>leader election</cite>' for the log stream ownership management.</p>
+<p>DistributedLog uses ZooKeeper ephemeral znodes for tracking the ownerships of log streams. Since ZooKeeper already provides <cite>sessions</cite> that
+can be used to track leases for failure detection. In production environment, we tuned the zookeeper settings to ensure failures could be
+detected within one second. An aggressive bound on failure detection increases the possibility of false positives. If ownerships flap between
+write proxies, delays will result from writes blocking for log stream recovery. <cite>Deterministic routing</cite> allows multiple clients to choose the
+same write proxy to fail over when the current owner proxy is unavailable. The details are described in Figure 3.</p>
+<div class="figure align-center">
+<img alt="../../images/requestrouting.png" src="../../images/requestrouting.png" />
+<p class="caption">Figure 3. Request Routing</p>
+</div>
+<p>Applications write the log records by the write client. Write client will first look up the <cite>ownership cache</cite>, a local cache that caches mapping
+between log stream name and its corresponding log stream owner. If the stream is not cached yet, the client will use consistent hashing based
+<cite>routing service</cite> to compute a candidate write proxy (step 1.1) and then send the write request to this candidate write proxy (step 1.2). If it
+already owns the log stream or it could successfully claim the ownership, it would satisfy the write request and respond back to the client (step 1.3).
+If it can't claim the ownership, it then send the response back to the client to ask it redirect to the right owner (1.4). All succeed write requests
+will keep the local ownership cache up-to-date, which help avoiding the subsequent requests being redirected.</p>
+</div>
+</div>
+<div class="section" id="streaming-reads">
+<h3><a class="toc-backref" href="#id7">Streaming Reads</a></h3>
+<p>After the readers have caught up to the current tail of the log, DistributedLog provides readers the ability to read new log records as they are
+published - a mechanism commonly known as <cite>tailing</cite> the log. Readers start out by <strong>positioning</strong> to a record in the log stream based on either DLSN or
+Transaction ID. The reader starts <strong>reading</strong> records until it reaches the tail of the log stream. Once it has caught up with the writer, the reader waits
+for <strong>notifications</strong> about new log records or new log segments.</p>
+<div class="section" id="positioning">
+<h4><a class="toc-backref" href="#id8">Positioning</a></h4>
+<p>As mentioned above, there are 3 types of sequence numbers are associated with a log record. Except sequence id is computed at reading time, both DLSN (implicit)
+and Transaction ID (explicit) are attached to log records in writing time. Applications could use either of them for positioning. DLSN is the best sequence number
+on positioning, as it already tells which log segment, which entry and which slot of the record in the log stream. No additional search operations are required.
+While Transaction ID is assigned by applications, positioning a reader by transaction id will first look up the list of log segments to find which log segment
+contains the given transaction id and then look up the records in the found log segment to figure out the actual position within that log segment.
+Both looking up in the log segment list and the found log segment use binary search to speed up the searching. Although positioning by transaction id could be a
+bit slower than positioning by DLSN, it is useful for analytics workloads to rewind to analyze old data in hours if the transaction id is timestamp.</p>
+</div>
+<div class="section" id="reading">
+<h4><a class="toc-backref" href="#id9">Reading</a></h4>
+<p>Figure 4 illustrates reading batched entries from log segment store. The are two basic read operations: read a given entry by entry id (a) and read LAC (b).</p>
+<div class="figure align-center">
+<img alt="../../images/readrequests.png" src="../../images/readrequests.png" />
+<p class="caption">Figure 4. Read entries from log segment store</p>
+</div>
+<p>Since an entry is immutable after it is appended to a log segment, reading a given entry by entry id could go to any replicas of that log segment and retry others
+if encountered failures. In order to achieve low predictable 99.9 percentile latency even during bookie failures, a <strong>speculative</strong> read mechanism is deployed:
+a read request will be sent to first replica; if client doesn't receive the response with a speculative timeout, it would send another request to second replica;
+then wait for the responses of both first replica and second replica; and so forth until receiving a valid response to complete the read request or timeout.</p>
+<p>Reading LAC is an operation for readers to catch up with the writer. It is typically a quorum-read operation to guarantee freshness: the client sends the read requests
+to all replicas in the log segment and waits for the responses from the majority of them. It could be optimized to be a best-effort quorum-read operation for tailing reads,
+which it doesn't have to wait for quorum responses from the replicas and could return whenever it sees an advanced LAC.</p>
+<p><cite>Figure 4(c)</cite> illustrates the third type of read request, which is called <cite>&quot;Long Poll Read&quot;</cite>. It is a combination of (a) and (b), serving the purpose of
+reading next available entry in the log segment. The client sends a long poll read request along with next read entry id to the log segment store.
+If the log segment store already saw the entry and it is committed (entry id is not greater than LAC), it responds the request immediately with latest LAC
+and requested entry. Otherwise, it would wait for LAC being advanced to given entry id and respond back requested entry. Similar speculative mechanism is
+deployed in long polling to achieve predictable low 99.9 percentile latency.</p>
+</div>
+<div class="section" id="notifications">
+<h4><a class="toc-backref" href="#id10">Notifications</a></h4>
+<p>Once the reader is caught up with the writer, it would turn itself into <cite>'notification'</cite> mode. In this mode, it would wait notifications of new records
+by <cite>long polling</cite> reads (described above) and <cite>notification</cite> of state changes of log segments. The notification mechanism for state changes of log segments
+is provided by Metadata Store. Currently it is ZooKeeper watcher. The notifications are triggered when an inprogress log segment is completed or a new inprogress
+log segment is created.</p>
+</div>
+<div class="section" id="readahead">
+<h4><a class="toc-backref" href="#id11">ReadAhead</a></h4>
+<p>The reader will read ahead to proactively bring new data into cache, for applications to consume. It helps reducing the read latency as it proactively brings newer
+data into cache while applications consuming them. DistributedLog uses LAC as an indicator to detect if a reader is still catching up or already caught up and
+adjusting the readahead pace based on the reader state and its consuming rate.</p>
+</div>
+</div>
+<div class="section" id="logsegment-lifecycle">
+<h3><a class="toc-backref" href="#id12">LogSegment Lifecycle</a></h3>
+<p>DistributedLog breaks a log stream down into multiple log segments based configured rolling policy. The current inprogress log segment will be completed
+and a new log segment will be created when either the log segment has been written for more than a configured rolling interval (aka time-based rolling),
+the size of the log segment has reached a configured threshold (aka size-based rolling), or whenever the ownership of a log stream is changed.</p>
+<p>A new log segment is created in <cite>Inprogress</cite> state. It is completed as a <cite>Completed</cite> log segment when either the writer rolls into a new log segment or
+recovered when ownership changed. Once the log segment is completed, it will be truncated later either by <cite>explicit truncation</cite> or <cite>expired due to TTL timeout</cite>.
+The log segment will be marked as <cite>Partial Truncated</cite> along with a <cite>Min-Active-DLSN</cite> pointer when only portion of its data is truncated, and <cite>Truncated</cite> when
+the <cite>Min-Active-DLSN</cite> pointer reaches the end of the log segment. The truncated log segments will be moved to Cold Storage for longer retention or backup for
+disaster recovery, and eventually be deleted after TTL expiration. Figure 5 illustrates a log stream that contains 5 log segments which each of them are in
+different states. The dot line describes the transition between states.</p>
+<div class="figure align-center">
+<img alt="../../images/logsegments.png" src="../../images/logsegments.png" />
+<p class="caption">Figure 5. The lifecycle of log segments</p>
+</div>
+<div class="section" id="distribution">
+<h4><a class="toc-backref" href="#id13">Distribution</a></h4>
+<p>A log segment is placed on multiple log segment storage nodes according configured placement policy. DistributedLog uses a <cite>rack-aware</cite> placement policy on
+placing log segments in a local datacenter setup, which the rack-aware placement policy will guarantee all the replicas of same log segment placed in
+different racks for network fault-tolerance. It uses a <cite>region-aware</cite> placement policy on placing log segments among multiple datacenters for a global setup
+(see more in section <cite>&quot;Global Replicated Log&quot;</cite>), which guarantees all the replicas of same log segment placed in multiple datacenters and ensures receiving
+acknowledges from majority of the data centers.</p>
+<p>As DistributedLog breaks down the streams into multiple log segments, the log segments could be evenly distributed across multiple log segment storage nodes
+for load balancing. It helps the data distribution balancing and read workload balancing. Figure 6 shows an example how the data of 2 streams (<em>x</em>, <em>y</em>) is
+stored as 3 replicas in a <em>5-nodes</em> cluster in a balanced way.</p>
+<div class="figure align-center">
+<img alt="../../images/distribution.png" src="../../images/distribution.png" />
+<p class="caption">Figure 6. Log Segment Distribution Example</p>
+</div>
+</div>
+<div class="section" id="truncation">
+<h4><a class="toc-backref" href="#id14">Truncation</a></h4>
+<p>As the writers keep writing records into the log streams, the data will be accumulated. In DistributedLog,
+there are two ways to delete old data, one is <cite>Explicit Truncation</cite> while the other is <cite>TTL Expiration</cite>.</p>
+<p>Applications are allowed to explicitly truncate a log stream to a given DLSN. Once the truncation request is
+received by the writer, the writer will mark all the log segments whose log segment sequence number is less than
+the sequence number of that DLSN as <cite>Truncated</cite>. The log segment segment whose sequence number is same as that
+DLSN will be marked as <cite>Partially Truncated</cite> along and the DLSN as the last active DLSN. So positioning the reader
+will be advanced to last active DLSN if the provided position is already truncated. All the truncated log segments
+will be still kept for a configured time period for disaster recovery and the actual log segments will be deleted
+and garbage collected via <cite>TTL Expiration</cite>.</p>
+<p>When a log segment is completed, the completion time will be recorded as part of the log segment metadata.
+DistributedLog uses <cite>completion time</cite> for TTL Expiration: all the log segments whose completion time already
+passed the configured TTL period will be deleted from metadata store. After the log segments are deleted from
+metadata store, the log segments will be garbage collected from log segment store and their disk spaces will be
+reclaimed.</p>
+</div>
+</div>
+</div>
+
+
+    </div>
+  </div>
+</div>
+
+
+
+      </div>
+
+
+    <hr>
+  <div class="row">
+      <div class="col-xs-12">
+          <footer>
+              <p class="text-center">&copy; Copyright 2016
+                  <a href="http://www.apache.org">The Apache Software Foundation.</a> All Rights Reserved.
+              </p>
+              <p class="text-center">
+                  <a href="/docs/0.4.0-incubating/feed.xml">RSS Feed</a>
+              </p>
+          </footer>
+      </div>
+  </div>
+  <!-- container div end -->
+</div>
+
+
+    <script>
+  (function () {
+    'use strict';
+    anchors.options.placement = 'right';
+    anchors.add();
+  })();
+</script>
+
+  </body>
+
+</html>

http://git-wip-us.apache.org/repos/asf/incubator-distributedlog/blob/ef7245e8/content/docs/0.4.0-incubating/user_guide/globalreplicatedlog/main.html
----------------------------------------------------------------------
diff --git a/content/docs/0.4.0-incubating/user_guide/globalreplicatedlog/main.html b/content/docs/0.4.0-incubating/user_guide/globalreplicatedlog/main.html
new file mode 100644
index 0000000..316a2af
--- /dev/null
+++ b/content/docs/0.4.0-incubating/user_guide/globalreplicatedlog/main.html
@@ -0,0 +1,696 @@
+<!DOCTYPE html>
+<html lang="en">
+
+  <head>
+  <meta charset="utf-8">
+  <meta http-equiv="X-UA-Compatible" content="IE=edge">
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+
+  <title>Apache DistributedLog (incubating)</title>
+  <meta name="description" content="Apache DistributedLog is an high performance replicated log.
+">
+
+  <link rel="stylesheet" href="/docs/0.4.0-incubating/styles/site.css">
+  <link rel="stylesheet" href="/docs/0.4.0-incubating/css/theme.css">
+  <!-- JQuery -->
+  <script src="https://ajax.googleapis.com/ajax/libs/jquery/2.2.0/jquery.min.js"></script>
+  <script src="/docs/0.4.0-incubating/js/bootstrap.min.js"></script>
+  <link rel="canonical" href="http://distributedlog.incubator.apache.org/docs/0.4.0-incubating/user_guide/globalreplicatedlog/main.html" data-proofer-ignore>
+  <link rel="alternate" type="application/rss+xml" title="Apache DistributedLog (incubating)" href="http://distributedlog.incubator.apache.org/docs/0.4.0-incubating/feed.xml">
+  <!-- Font Awesome -->
+  <script src="//cdnjs.cloudflare.com/ajax/libs/anchor-js/3.2.0/anchor.min.js"></script>
+  <!-- Google Analytics -->
+  <script>
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+    (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+    m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
+
+  ga('create', 'UA-83870961-1', 'auto');
+  ga('send', 'pageview');
+  </script>
+  <!-- End Google Analytics -->
+  <link rel="shortcut icon" type="image/x-icon" href="/images/favicon.ico">
+</head>
+
+
+  <body role="document">
+
+    
+<nav class="navbar navbar-default navbar-fixed-top">
+  <div class="container">
+    <div class="navbar-header">
+      <a href="/" class="navbar-brand" >
+        <img alt="Brand" style="height: 28px" src="/docs/0.4.0-incubating/images/distributedlog_logo_navbar.png">
+      </a>
+      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false" aria-controls="navbar">
+        <span class="sr-only">Toggle navigation</span>
+        <span class="icon-bar"></span>
+        <span class="icon-bar"></span>
+        <span class="icon-bar"></span>
+      </button>
+    </div>
+    <div id="navbar" class="navbar-collapse collapse">
+      <ul class="nav navbar-nav">
+        <!-- Overview -->
+        <li><a href="/docs/0.4.0-incubating/">V0.4.0</a></li>
+        <!-- Concepts -->
+        <li><a href="/docs/0.4.0-incubating/basics/introduction">Concepts</a></li>
+        <!-- Quick Start -->
+        <li>
+          <a href="/docs/0.4.0-incubating/start" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Start<span class="caret"></span></a>
+          <ul class="dropdown-menu" role="menu">
+            
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/start/building.html">
+                Build DistributedLog from Source
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/start/download.html">
+                Download Releases
+              </a>
+            </li>
+            
+            <li role="separator" class="divider"></li>
+            <li class="dropdown-header"><strong>Quickstart</strong></li>
+            
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/start/quickstart.html">
+                Setup & Run Example
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/tutorials/basic-1.html">
+                API - Write Records (via core library)
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/tutorials/basic-2.html">
+                API - Write Records (via write proxy)
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/tutorials/basic-5.html">
+                API - Read Records
+              </a>
+            </li>
+            
+            <li role="separator" class="divider"></li>
+            <li class="dropdown-header"><strong>Deployment</strong></li>
+            
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/deployment/cluster.html">
+                Cluster Setup
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/deployment/global-cluster.html">
+                Global Cluster Setup
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/deployment/docker.html">
+                Docker
+              </a>
+            </li>
+            
+          </ul>
+        </li>
+        <!-- API -->
+        <li>
+          <a href="/docs/0.4.0-incubating/start" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">API<span class="caret"></span></a>
+          <ul class="dropdown-menu" role="menu">
+            <li><a href="/docs/0.4.0-incubating/api/java">Java</a></li>
+          </ul>
+        </li>
+        <!-- User Guide -->
+        <li class="dropdown">
+		      <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false">User Guide<span class="caret"></span></a>
+          <ul class="dropdown-menu">
+            
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/basics/introduction.html">
+                Introduction
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/user_guide/considerations/main.html">
+                Considerations
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/user_guide/architecture/main.html">
+                Architecture
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/user_guide/api/main.html">
+                API
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/user_guide/configuration/main.html">
+                Configuration
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/user_guide/design/main.html">
+                Detail Design
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/user_guide/globalreplicatedlog/main.html">
+                Global Replicated Log
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/user_guide/implementation/main.html">
+                Implementation
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/user_guide/references/main.html">
+                References
+              </a>
+            </li>
+            
+          </ul>
+        </li>
+        <!-- Admin Guide -->
+        <li class="dropdown">
+		      <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false">Admin Guide<span class="caret"></span></a>
+          <ul class="dropdown-menu">
+            <li><a href="/docs/0.4.0-incubating/deployment/cluster">Cluster Setup</a></li>
+            
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/admin_guide/operations.html">
+                Operations
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/admin_guide/performance.html">
+                Performance Tuning
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/admin_guide/loadtest.html">
+                Load Test
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/admin_guide/hardware.html">
+                Hardware
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/admin_guide/monitoring.html">
+                Monitoring
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/admin_guide/zookeeper.html">
+                ZooKeeper
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/admin_guide/bookkeeper.html">
+                BookKeeper
+              </a>
+            </li>
+            
+          </ul>
+        </li>
+        <!-- Tutorials -->
+        <li class="dropdown">
+		      <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false">Tutorials<span class="caret"></span></a>
+          <ul class="dropdown-menu">
+            <li class="dropdown-header"><strong>Basic</strong></li>
+            <li><a href="/docs/0.4.0-incubating/tutorials/basic-1">Write Records (via Core Library)</a></li>
+            <li><a href="/docs/0.4.0-incubating/tutorials/basic-2">Write Records (via Write Proxy)</a></li>
+            <li><a href="/docs/0.4.0-incubating/tutorials/basic-3">Write Records to multiple streams</a></li>
+            <li><a href="/docs/0.4.0-incubating/tutorials/basic-4">Atomic Write Records</a></li>
+            <li><a href="/docs/0.4.0-incubating/tutorials/basic-5">Tailing Read Records</a></li>
+            <li><a href="/docs/0.4.0-incubating/tutorials/basic-6">Rewind Read Records</a></li>
+            <li role="separator" class="divider"></li>
+            <li class="dropdown-header"><strong>Messaging</strong></li>
+            
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/tutorials/messaging-1.html">
+                Write records to partitioned streams
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/tutorials/messaging-2.html">
+                Write records to multiple streams (load balancer)
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/tutorials/messaging-3.html">
+                At-least-once Processing
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/tutorials/messaging-4.html">
+                Exact-Once Processing
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/tutorials/messaging-5.html">
+                Implement a kafka-like pub/sub system
+              </a>
+            </li>
+            
+            <li role="separator" class="divider"></li>
+            <li class="dropdown-header"><strong>Replicated State Machines</strong></li>
+            
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/tutorials/replicatedstatemachines.html">
+                Build replicated state machines
+              </a>
+            </li>
+            
+            <li role="separator" class="divider"></li>
+            <li class="dropdown-header"><strong>Analytics</strong></li>
+            <li><a href="/docs/0.4.0-incubating/tutorials/analytics-mapreduce">Process log streams using MapReduce</a></li>
+          </ul>
+        </li>
+      </ul>
+    </div><!--/.nav-collapse -->
+  </div>
+</nav>
+
+
+<link rel="stylesheet" href="">
+
+
+    <div class="container" role="main">
+
+      <div class="row">
+        
+        <!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+
+<div class="row">
+  <!-- Sub Navigation -->
+  <div class="col-sm-3">
+    <ul id="sub-nav">
+      
+      
+      
+        
+        <li><a href="/docs/0.4.0-incubating/user_guide/main.html" class="">User Guide</a>
+          
+          <ul>
+            
+              
+              <li>
+                <a href="/docs/0.4.0-incubating/basics/introduction.html" class="">
+                  Introduction
+                </a>
+                
+                <ul>
+                  
+                </ul>
+                
+              </li>
+            
+              
+              <li>
+                <a href="/docs/0.4.0-incubating/user_guide/considerations/main.html" class="">
+                  Considerations
+                </a>
+                
+                <ul>
+                  
+                </ul>
+                
+              </li>
+            
+              
+              <li>
+                <a href="/docs/0.4.0-incubating/user_guide/architecture/main.html" class="">
+                  Architecture
+                </a>
+                
+                <ul>
+                  
+                </ul>
+                
+              </li>
+            
+              
+              <li>
+                <a href="/docs/0.4.0-incubating/user_guide/api/main.html" class="">
+                  API
+                </a>
+                
+                <ul>
+                  
+                    
+                    <li>
+                      <a href="/docs/0.4.0-incubating/user_guide/api/core.html" class="active">
+                        Core Library API
+                      </a>
+                    </li>
+                  
+                    
+                    <li>
+                      <a href="/docs/0.4.0-incubating/user_guide/api/proxy.html" class="active">
+                        Proxy Client API
+                      </a>
+                    </li>
+                  
+                    
+                    <li>
+                      <a href="/docs/0.4.0-incubating/user_guide/api/practice.html" class="active">
+                        Best Practise
+                      </a>
+                    </li>
+                  
+                </ul>
+                
+              </li>
+            
+              
+              <li>
+                <a href="/docs/0.4.0-incubating/user_guide/configuration/main.html" class="">
+                  Configuration
+                </a>
+                
+                <ul>
+                  
+                    
+                    <li>
+                      <a href="/docs/0.4.0-incubating/user_guide/configuration/core.html" class="active">
+                        Core Library Configuration
+                      </a>
+                    </li>
+                  
+                    
+                    <li>
+                      <a href="/docs/0.4.0-incubating/user_guide/configuration/proxy.html" class="active">
+                        Write Proxy Configuration
+                      </a>
+                    </li>
+                  
+                    
+                    <li>
+                      <a href="/docs/0.4.0-incubating/user_guide/configuration/client.html" class="active">
+                        Client Configuration
+                      </a>
+                    </li>
+                  
+                    
+                    <li>
+                      <a href="/docs/0.4.0-incubating/user_guide/configuration/perlog.html" class="active">
+                        Per Stream Configuration
+                      </a>
+                    </li>
+                  
+                </ul>
+                
+              </li>
+            
+              
+              <li>
+                <a href="/docs/0.4.0-incubating/user_guide/design/main.html" class="">
+                  Detail Design
+                </a>
+                
+                <ul>
+                  
+                </ul>
+                
+              </li>
+            
+              
+              <li>
+                <a href="/docs/0.4.0-incubating/user_guide/globalreplicatedlog/main.html" class="active">
+                  Global Replicated Log
+                </a>
+                
+                <ul>
+                  
+                </ul>
+                
+              </li>
+            
+              
+              <li>
+                <a href="/docs/0.4.0-incubating/user_guide/implementation/main.html" class="">
+                  Implementation
+                </a>
+                
+                <ul>
+                  
+                    
+                    <li>
+                      <a href="/docs/0.4.0-incubating/user_guide/implementation/storage.html" class="active">
+                        Storage
+                      </a>
+                    </li>
+                  
+                </ul>
+                
+              </li>
+            
+              
+              <li>
+                <a href="/docs/0.4.0-incubating/user_guide/references/main.html" class="">
+                  References
+                </a>
+                
+                <ul>
+                  
+                    
+                    <li>
+                      <a href="/docs/0.4.0-incubating/user_guide/references/metrics.html" class="active">
+                        Metrics
+                      </a>
+                    </li>
+                  
+                    
+                    <li>
+                      <a href="/docs/0.4.0-incubating/user_guide/references/features.html" class="active">
+                        Available Features
+                      </a>
+                    </li>
+                  
+                </ul>
+                
+              </li>
+            
+          </ul>
+          
+        </li>
+      
+    </ul>
+  </div>
+  <!-- Main -->
+  <div class="col-sm-9">
+    <!-- Top anchor -->
+    <a href="#top"></a>
+
+    <!-- Breadcrumbs above the main heading -->
+    <ol class="breadcrumb">
+
+      
+      
+      
+
+      
+      
+
+      
+
+      
+
+      <li><a href="/docs/0.4.0-incubating/user_guide/main.html">User Guide</a></li>
+      
+      
+      <li class="active">Global Replicated Log</li>
+    </ol>
+
+    <div class="text">
+      <!-- Content -->
+      <div class="contents topic" id="global-replicated-log">
+<p class="topic-title first">Global Replicated Log</p>
+<ul class="simple">
+<li><a class="reference internal" href="#id1" id="id2">Global Replicated Log</a><ul>
+<li><a class="reference internal" href="#region-aware-data-placement-policy" id="id3">Region Aware Data Placement Policy</a></li>
+<li><a class="reference internal" href="#cross-region-speculative-reads" id="id4">Cross Region Speculative Reads</a></li>
+</ul>
+</li>
+</ul>
+</div>
+<div class="section" id="id1">
+<h2><a class="toc-backref" href="#id2">Global Replicated Log</a></h2>
+<p>A typical setup for DistributedLog is within a datacenter. But a global setup is required for
+providing global replicated logs for distributed key/value store to achieve strong consistency
+across multiple datacenters. <cite>Global</cite> here means across datacenters, which is different from
+<cite>Local</cite> meaning within a datacenter.</p>
+<p>A global setup of DistributedLog is organized as a set of <cite>regions</cite>, where each region is the
+rough analog of a local setup. Regions are the unit of administrative deployment. The set of
+regions is also the set of locations across which data can be replicated. Regions can be added
+to or removed from a running system as new datacenters are brought into service and old ones
+are turned off, respectively. Regions are also the unit of physical isolation: there may be one
+or more regions in a datacenter if they have isolated power or network supplies.</p>
+<div class="figure align-center">
+<img alt="../../images/globalreplicatedlog.png" src="../../images/globalreplicatedlog.png" style="width: 600px;" />
+<p class="caption">Figure 1. Global Replicated Log</p>
+</div>
+<p>Figure 1 illustrates the servers in a <cite>Global Replicated Log</cite> setup. There is no inter datacenter
+communication between write proxies or log segment storage nodes. The only component that does
+inter datacenters communications within its hosts is the 'Global' metadata store, which is a global
+setup of ZooKeeper. Write clients will talk to the write proxies in its local region to bootstrap
+the ownership cache and redirect to correct write proxies in other regions through direct TCP
+connections. While readers will identify the regions of the log segment storage nodes according to
+the <cite>region aware</cite> placement policy, and try reading from local region at most of the time and
+speculatively try on remote regions.</p>
+<div class="section" id="region-aware-data-placement-policy">
+<h3><a class="toc-backref" href="#id3">Region Aware Data Placement Policy</a></h3>
+<p>Region aware placement policy uses hierarchical allocation where-in nodes are allocated so that data
+is spread uniformly across the available regions and within each region it uses the <cite>rack-aware</cite>
+placement policy to spread the data uniformly across the available racks.</p>
+<p>Region aware placement policy is governed by a parameter ensures that the ack quorum covers at least
+<em>minRegionsForDurability</em> distinct regions. This ensures that the system can survive the failure of
+<cite>(totalRegions - minRegionsForDurability)</cite> regions without loss of availability. For example if we
+have bookie nodes in <em>5</em> regions and if the <em>minRegionsForDurability</em> is <em>3</em> then we can survive the
+failure of <cite>(5 - 3) = 2</cite> regions.</p>
+<p>The placement algorithm follows the following simple invariant:</p>
+<pre class="literal-block">
+There is no combination of nodes that would satisfy the ack quorum with
+less than &quot;minRegionsForDurability&quot; responses.
+</pre>
+<p>This invariant ensures that enforcing ack quorum is sufficient to enforce that the entry has been made durable
+in <em>minRegionsForDurability</em> regions.</p>
+<p>The <em>minRegionsForDurability</em> requirement enforces constraints on the minimum ack quorum as we want to ensure
+that when we run in degraded mode - <em>i.e. when only a subset of the regions are available</em> - we would still not
+be able to allocate nodes in such a way that the ack quorum would be satisfied by fewer than <em>minRegionsForDurability</em>
+regions.</p>
+<p>For instance consider the following scenario with three regions each containing 20 bookie nodes:</p>
+<pre class="literal-block">
+minRegionsForDurability = 2
+ensemble size = write quorum = 15
+ack quorum =  8
+</pre>
+<p>Let's say that one of the regions is currently unavailable and we want to still ensure that writes can continue.
+The ensemble placement may then have to choose bookies from the two available regions. Given that <em>15</em> bookies have
+to be allocated, we will have to allocate at least <em>8</em> bookies from one of the remaining regions - but with ack quorum
+of <em>8</em> we run the risk of satisfying ack quorum with bookies from a single region. Therefore we must require that
+the ack quorum is greater than <em>8</em>.</p>
+</div>
+<div class="section" id="cross-region-speculative-reads">
+<h3><a class="toc-backref" href="#id4">Cross Region Speculative Reads</a></h3>
+<p>As discussed before, read requests can be satisfied by any replica of the data, however for high availability
+speculative requests are sent to multiple copies to ensure that at least one of the requests returns within
+the time specified by the <em>SLA</em>. The reader consults the data placement policy to get the list of replicas that
+can satisfy the request in the order of preference. This order is decided as follows:</p>
+<ul class="simple">
+<li>The first node in the list is always the bookie node that is closest to the client - if more than one such nodes exist, one is chosen at random.</li>
+<li>The second node is usually the closest node in a different failure domain. In the case of a two level hierarchy that would be a node in a different rack.</li>
+<li>The third node is chosen from a different region</li>
+</ul>
+<p>The delay between successive speculative read requests ensures that the probability of sending the <em>nth</em>
+speculative read request decays exponentially with <em>n</em>. This ensures that the number of requests that go to
+farther nodes is still kept at a minimum. However by making sure that we cross failure domains in the first
+few speculative requests improves fault-tolerance of the reader. Transient node failures are transparently
+handled by the reader by this simple and generalizable speculative read policy. This can be thought of as
+the most granular form of failover where each request essentially fails-over to an alternate node if the
+primary node it attempted to access is unavailable. In practice we have found this to also better handle
+network congestion where routes between specific pairs of nodes may become unavailable without necessarily
+making the nodes completely inaccessible.</p>
+<p>In addition to static decisions based on the location of the bookie nodes, we can also make dynamic decisions
+based on observed latency or failure rates from specific bookies. These statistics are tracked by the bookie
+client and are used to influence the order in which speculative read requests are scheduled. This again is
+able to capture partial network outages that affect specific routes within the network.</p>
+</div>
+</div>
+
+
+    </div>
+  </div>
+</div>
+
+
+
+      </div>
+
+
+    <hr>
+  <div class="row">
+      <div class="col-xs-12">
+          <footer>
+              <p class="text-center">&copy; Copyright 2016
+                  <a href="http://www.apache.org">The Apache Software Foundation.</a> All Rights Reserved.
+              </p>
+              <p class="text-center">
+                  <a href="/docs/0.4.0-incubating/feed.xml">RSS Feed</a>
+              </p>
+          </footer>
+      </div>
+  </div>
+  <!-- container div end -->
+</div>
+
+
+    <script>
+  (function () {
+    'use strict';
+    anchors.options.placement = 'right';
+    anchors.add();
+  })();
+</script>
+
+  </body>
+
+</html>

http://git-wip-us.apache.org/repos/asf/incubator-distributedlog/blob/ef7245e8/content/docs/0.4.0-incubating/user_guide/implementation/core.html
----------------------------------------------------------------------
diff --git a/content/docs/0.4.0-incubating/user_guide/implementation/core.html b/content/docs/0.4.0-incubating/user_guide/implementation/core.html
new file mode 100644
index 0000000..b8027af
--- /dev/null
+++ b/content/docs/0.4.0-incubating/user_guide/implementation/core.html
@@ -0,0 +1,378 @@
+<!DOCTYPE html>
+<html lang="en">
+
+  <head>
+  <meta charset="utf-8">
+  <meta http-equiv="X-UA-Compatible" content="IE=edge">
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+
+  <title>Apache DistributedLog (incubating)</title>
+  <meta name="description" content="Apache DistributedLog is an high performance replicated log.
+">
+
+  <link rel="stylesheet" href="/docs/0.4.0-incubating/styles/site.css">
+  <link rel="stylesheet" href="/docs/0.4.0-incubating/css/theme.css">
+  <!-- JQuery -->
+  <script src="https://ajax.googleapis.com/ajax/libs/jquery/2.2.0/jquery.min.js"></script>
+  <script src="/docs/0.4.0-incubating/js/bootstrap.min.js"></script>
+  <link rel="canonical" href="http://distributedlog.incubator.apache.org/docs/0.4.0-incubating/user_guide/implementation/core.html" data-proofer-ignore>
+  <link rel="alternate" type="application/rss+xml" title="Apache DistributedLog (incubating)" href="http://distributedlog.incubator.apache.org/docs/0.4.0-incubating/feed.xml">
+  <!-- Font Awesome -->
+  <script src="//cdnjs.cloudflare.com/ajax/libs/anchor-js/3.2.0/anchor.min.js"></script>
+  <!-- Google Analytics -->
+  <script>
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+    (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+    m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
+
+  ga('create', 'UA-83870961-1', 'auto');
+  ga('send', 'pageview');
+  </script>
+  <!-- End Google Analytics -->
+  <link rel="shortcut icon" type="image/x-icon" href="/images/favicon.ico">
+</head>
+
+
+  <body role="document">
+
+    
+<nav class="navbar navbar-default navbar-fixed-top">
+  <div class="container">
+    <div class="navbar-header">
+      <a href="/" class="navbar-brand" >
+        <img alt="Brand" style="height: 28px" src="/docs/0.4.0-incubating/images/distributedlog_logo_navbar.png">
+      </a>
+      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false" aria-controls="navbar">
+        <span class="sr-only">Toggle navigation</span>
+        <span class="icon-bar"></span>
+        <span class="icon-bar"></span>
+        <span class="icon-bar"></span>
+      </button>
+    </div>
+    <div id="navbar" class="navbar-collapse collapse">
+      <ul class="nav navbar-nav">
+        <!-- Overview -->
+        <li><a href="/docs/0.4.0-incubating/">V0.4.0</a></li>
+        <!-- Concepts -->
+        <li><a href="/docs/0.4.0-incubating/basics/introduction">Concepts</a></li>
+        <!-- Quick Start -->
+        <li>
+          <a href="/docs/0.4.0-incubating/start" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Start<span class="caret"></span></a>
+          <ul class="dropdown-menu" role="menu">
+            
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/start/building.html">
+                Build DistributedLog from Source
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/start/download.html">
+                Download Releases
+              </a>
+            </li>
+            
+            <li role="separator" class="divider"></li>
+            <li class="dropdown-header"><strong>Quickstart</strong></li>
+            
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/start/quickstart.html">
+                Setup & Run Example
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/tutorials/basic-1.html">
+                API - Write Records (via core library)
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/tutorials/basic-2.html">
+                API - Write Records (via write proxy)
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/tutorials/basic-5.html">
+                API - Read Records
+              </a>
+            </li>
+            
+            <li role="separator" class="divider"></li>
+            <li class="dropdown-header"><strong>Deployment</strong></li>
+            
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/deployment/cluster.html">
+                Cluster Setup
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/deployment/global-cluster.html">
+                Global Cluster Setup
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/deployment/docker.html">
+                Docker
+              </a>
+            </li>
+            
+          </ul>
+        </li>
+        <!-- API -->
+        <li>
+          <a href="/docs/0.4.0-incubating/start" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">API<span class="caret"></span></a>
+          <ul class="dropdown-menu" role="menu">
+            <li><a href="/docs/0.4.0-incubating/api/java">Java</a></li>
+          </ul>
+        </li>
+        <!-- User Guide -->
+        <li class="dropdown">
+		      <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false">User Guide<span class="caret"></span></a>
+          <ul class="dropdown-menu">
+            
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/basics/introduction.html">
+                Introduction
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/user_guide/considerations/main.html">
+                Considerations
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/user_guide/architecture/main.html">
+                Architecture
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/user_guide/api/main.html">
+                API
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/user_guide/configuration/main.html">
+                Configuration
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/user_guide/design/main.html">
+                Detail Design
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/user_guide/globalreplicatedlog/main.html">
+                Global Replicated Log
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/user_guide/implementation/main.html">
+                Implementation
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/user_guide/references/main.html">
+                References
+              </a>
+            </li>
+            
+          </ul>
+        </li>
+        <!-- Admin Guide -->
+        <li class="dropdown">
+		      <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false">Admin Guide<span class="caret"></span></a>
+          <ul class="dropdown-menu">
+            <li><a href="/docs/0.4.0-incubating/deployment/cluster">Cluster Setup</a></li>
+            
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/admin_guide/operations.html">
+                Operations
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/admin_guide/performance.html">
+                Performance Tuning
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/admin_guide/loadtest.html">
+                Load Test
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/admin_guide/hardware.html">
+                Hardware
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/admin_guide/monitoring.html">
+                Monitoring
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/admin_guide/zookeeper.html">
+                ZooKeeper
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/admin_guide/bookkeeper.html">
+                BookKeeper
+              </a>
+            </li>
+            
+          </ul>
+        </li>
+        <!-- Tutorials -->
+        <li class="dropdown">
+		      <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false">Tutorials<span class="caret"></span></a>
+          <ul class="dropdown-menu">
+            <li class="dropdown-header"><strong>Basic</strong></li>
+            <li><a href="/docs/0.4.0-incubating/tutorials/basic-1">Write Records (via Core Library)</a></li>
+            <li><a href="/docs/0.4.0-incubating/tutorials/basic-2">Write Records (via Write Proxy)</a></li>
+            <li><a href="/docs/0.4.0-incubating/tutorials/basic-3">Write Records to multiple streams</a></li>
+            <li><a href="/docs/0.4.0-incubating/tutorials/basic-4">Atomic Write Records</a></li>
+            <li><a href="/docs/0.4.0-incubating/tutorials/basic-5">Tailing Read Records</a></li>
+            <li><a href="/docs/0.4.0-incubating/tutorials/basic-6">Rewind Read Records</a></li>
+            <li role="separator" class="divider"></li>
+            <li class="dropdown-header"><strong>Messaging</strong></li>
+            
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/tutorials/messaging-1.html">
+                Write records to partitioned streams
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/tutorials/messaging-2.html">
+                Write records to multiple streams (load balancer)
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/tutorials/messaging-3.html">
+                At-least-once Processing
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/tutorials/messaging-4.html">
+                Exact-Once Processing
+              </a>
+            </li>
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/tutorials/messaging-5.html">
+                Implement a kafka-like pub/sub system
+              </a>
+            </li>
+            
+            <li role="separator" class="divider"></li>
+            <li class="dropdown-header"><strong>Replicated State Machines</strong></li>
+            
+            
+            <li>
+              <a href="/docs/0.4.0-incubating/tutorials/replicatedstatemachines.html">
+                Build replicated state machines
+              </a>
+            </li>
+            
+            <li role="separator" class="divider"></li>
+            <li class="dropdown-header"><strong>Analytics</strong></li>
+            <li><a href="/docs/0.4.0-incubating/tutorials/analytics-mapreduce">Process log streams using MapReduce</a></li>
+          </ul>
+        </li>
+      </ul>
+    </div><!--/.nav-collapse -->
+  </div>
+</nav>
+
+
+<link rel="stylesheet" href="">
+
+
+    <div class="container" role="main">
+
+      <div class="row">
+        
+        <!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<div class="col-md-8 col-md-offset-2">
+  
+
+</div>
+
+
+
+      </div>
+
+
+    <hr>
+  <div class="row">
+      <div class="col-xs-12">
+          <footer>
+              <p class="text-center">&copy; Copyright 2016
+                  <a href="http://www.apache.org">The Apache Software Foundation.</a> All Rights Reserved.
+              </p>
+              <p class="text-center">
+                  <a href="/docs/0.4.0-incubating/feed.xml">RSS Feed</a>
+              </p>
+          </footer>
+      </div>
+  </div>
+  <!-- container div end -->
+</div>
+
+
+    <script>
+  (function () {
+    'use strict';
+    anchors.options.placement = 'right';
+    anchors.add();
+  })();
+</script>
+
+  </body>
+
+</html>