You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@falcon.apache.org by ve...@apache.org on 2013/05/21 17:06:19 UTC

svn commit: r1484831 - in /incubator/falcon/site: issue-tracking.html license.html mail-lists.html project-info.html slides/falcon-overview.html source-repository.html team-list.html

Author: venkatesh
Date: Tue May 21 15:06:19 2013
New Revision: 1484831

URL: http://svn.apache.org/r1484831
Log:
had missed a few files....

Added:
    incubator/falcon/site/slides/falcon-overview.html
Modified:
    incubator/falcon/site/issue-tracking.html
    incubator/falcon/site/license.html
    incubator/falcon/site/mail-lists.html
    incubator/falcon/site/project-info.html
    incubator/falcon/site/source-repository.html
    incubator/falcon/site/team-list.html

Modified: incubator/falcon/site/issue-tracking.html
URL: http://svn.apache.org/viewvc/incubator/falcon/site/issue-tracking.html?rev=1484831&r1=1484830&r2=1484831&view=diff
==============================================================================
--- incubator/falcon/site/issue-tracking.html (original)
+++ incubator/falcon/site/issue-tracking.html Tue May 21 15:06:19 2013
@@ -1,13 +1,13 @@
 <!DOCTYPE html>
 <!--
- | Generated by Apache Maven Doxia at May 20, 2013
+ | Generated by Apache Maven Doxia at May 21, 2013
  | Rendered using Apache Maven Fluido Skin 1.3.0
 -->
 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
   <head>
     <meta charset="UTF-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <meta name="Date-Revision-yyyymmdd" content="20130520" />
+    <meta name="Date-Revision-yyyymmdd" content="20130521" />
     <meta http-equiv="Content-Language" content="en" />
     <title>Falcon - Issue Tracking</title>
     <link rel="stylesheet" href="./css/apache-maven-fluido-1.3.0.min.css" />
@@ -218,7 +218,7 @@
         
                 
                     
-                  <li id="publishDate" class="pull-right">Last Published: 2013-05-20</li> 
+                  <li id="publishDate" class="pull-right">Last Published: 2013-05-21</li> 
             
                             </ul>
       </div>

Modified: incubator/falcon/site/license.html
URL: http://svn.apache.org/viewvc/incubator/falcon/site/license.html?rev=1484831&r1=1484830&r2=1484831&view=diff
==============================================================================
--- incubator/falcon/site/license.html (original)
+++ incubator/falcon/site/license.html Tue May 21 15:06:19 2013
@@ -1,13 +1,13 @@
 <!DOCTYPE html>
 <!--
- | Generated by Apache Maven Doxia at May 20, 2013
+ | Generated by Apache Maven Doxia at May 21, 2013
  | Rendered using Apache Maven Fluido Skin 1.3.0
 -->
 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
   <head>
     <meta charset="UTF-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <meta name="Date-Revision-yyyymmdd" content="20130520" />
+    <meta name="Date-Revision-yyyymmdd" content="20130521" />
     <meta http-equiv="Content-Language" content="en" />
     <title>Falcon - Project License</title>
     <link rel="stylesheet" href="./css/apache-maven-fluido-1.3.0.min.css" />
@@ -218,7 +218,7 @@
         
                 
                     
-                  <li id="publishDate" class="pull-right">Last Published: 2013-05-20</li> 
+                  <li id="publishDate" class="pull-right">Last Published: 2013-05-21</li> 
             
                             </ul>
       </div>

Modified: incubator/falcon/site/mail-lists.html
URL: http://svn.apache.org/viewvc/incubator/falcon/site/mail-lists.html?rev=1484831&r1=1484830&r2=1484831&view=diff
==============================================================================
--- incubator/falcon/site/mail-lists.html (original)
+++ incubator/falcon/site/mail-lists.html Tue May 21 15:06:19 2013
@@ -1,13 +1,13 @@
 <!DOCTYPE html>
 <!--
- | Generated by Apache Maven Doxia at May 20, 2013
+ | Generated by Apache Maven Doxia at May 21, 2013
  | Rendered using Apache Maven Fluido Skin 1.3.0
 -->
 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
   <head>
     <meta charset="UTF-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <meta name="Date-Revision-yyyymmdd" content="20130520" />
+    <meta name="Date-Revision-yyyymmdd" content="20130521" />
     <meta http-equiv="Content-Language" content="en" />
     <title>Falcon - Project Mailing Lists</title>
     <link rel="stylesheet" href="./css/apache-maven-fluido-1.3.0.min.css" />
@@ -218,7 +218,7 @@
         
                 
                     
-                  <li id="publishDate" class="pull-right">Last Published: 2013-05-20</li> 
+                  <li id="publishDate" class="pull-right">Last Published: 2013-05-21</li> 
             
                             </ul>
       </div>

Modified: incubator/falcon/site/project-info.html
URL: http://svn.apache.org/viewvc/incubator/falcon/site/project-info.html?rev=1484831&r1=1484830&r2=1484831&view=diff
==============================================================================
--- incubator/falcon/site/project-info.html (original)
+++ incubator/falcon/site/project-info.html Tue May 21 15:06:19 2013
@@ -1,13 +1,13 @@
 <!DOCTYPE html>
 <!--
- | Generated by Apache Maven Doxia at May 20, 2013
+ | Generated by Apache Maven Doxia at May 21, 2013
  | Rendered using Apache Maven Fluido Skin 1.3.0
 -->
 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
   <head>
     <meta charset="UTF-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <meta name="Date-Revision-yyyymmdd" content="20130520" />
+    <meta name="Date-Revision-yyyymmdd" content="20130521" />
     <meta http-equiv="Content-Language" content="en" />
     <title>Falcon - Project Information</title>
     <link rel="stylesheet" href="./css/apache-maven-fluido-1.3.0.min.css" />
@@ -218,7 +218,7 @@
         
                 
                     
-                  <li id="publishDate" class="pull-right">Last Published: 2013-05-20</li> 
+                  <li id="publishDate" class="pull-right">Last Published: 2013-05-21</li> 
             
                             </ul>
       </div>

Added: incubator/falcon/site/slides/falcon-overview.html
URL: http://svn.apache.org/viewvc/incubator/falcon/site/slides/falcon-overview.html?rev=1484831&view=auto
==============================================================================
--- incubator/falcon/site/slides/falcon-overview.html (added)
+++ incubator/falcon/site/slides/falcon-overview.html Tue May 21 15:06:19 2013
@@ -0,0 +1,357 @@
+<!DOCTYPE html>
+<html>
+<head>
+    <meta charset="utf-8">
+    <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
+    <meta name="viewport" content="width=1024, user-scalable=no">
+
+    <title>Apache Falcon - Data Management on Hadoop</title>
+
+    <!-- Required stylesheet -->
+    <link rel="stylesheet" href="core/deck.core.css">
+
+    <!-- Extension CSS files go here. Remove or add as needed. -->
+    <link rel="stylesheet" href="extensions/goto/deck.goto.css">
+    <link rel="stylesheet" href="extensions/menu/deck.menu.css">
+    <link rel="stylesheet" href="extensions/navigation/deck.navigation.css">
+    <link rel="stylesheet" href="extensions/status/deck.status.css">
+    <link rel="stylesheet" href="extensions/hash/deck.hash.css">
+    <link rel="stylesheet" href="extensions/scale/deck.scale.css">
+
+    <!-- Style theme. More available in /themes/style/ or create your own. -->
+    <link rel="stylesheet" href="themes/style/web-2.0.css">
+
+    <!-- Transition theme. More available in /themes/transition/ or create your own. -->
+    <link rel="stylesheet" href="themes/transition/horizontal-slide.css">
+
+    <!-- Required Modernizr file -->
+    <script src="modernizr.custom.js"></script>
+</head>
+<body class="deck-container">
+
+<!-- Begin slides. Just make elements with a class of slide. -->
+
+<section class="slide" id="intro">
+    <h2>Apache Falcon</h2>
+    <h3>Simplified Data Management for Hadoop!</h3>
+</section>
+
+<section class="slide" id="what">
+    <h2>What is Data Management?</h2>
+    <ul>
+        <li class="slide">
+            <h3>Data Motion</h3>
+            <p>Import, Export</p>
+        </li>
+        <li class="slide">
+            <h3>Policy-based Lifecycle Management</h3>
+            <p>Retention, Replication/DR/BCP, Anonymization of PII data, Archival, etc.</p>
+        </li>
+        <li class="slide">
+            <h3>Process orchestration and scheduling</h3>
+            <p>Late data handling, reprocessing, dependency checking, etc.</p>
+            <p>Multi-cluster management to support Local/Global Aggregations, Rollups, etc.</p>
+        </li>
+        <li class="slide">
+            <h3>Data Discovery</h3>
+            <p>Lineage, Audit, Classification</p>
+        </li>
+    </ul>
+</section>
+
+<section class="slide" id="critical">
+    <h2>Why is Data Management Critical?</h2>
+    <ul>
+        <li class="slide">
+            <h3>Productivity Gains</h3>
+            <p>Large datasets are incentives for users to come to Hadoop</p>
+            <p>Data Loading optimized for space, time and bandwidth</p>
+        </li>
+        <li class="slide">
+            <h3>Regulatory compliance</h3>
+            <p>We cannot rely on users to adhere to data governance policies.</p>
+            <p>SEC, SOX, A29 of PII data, etc.</p>
+        </li>
+        <li class="slide">
+            <h3>Process orchestration and scheduling</h3>
+            <p>Data management is a common concern to be offered as a service</p>
+            <p>BCP, Security, Data  Pipeline processing, etc.</p>
+        </li>
+    </ul>
+</section>
+
+<section class="slide" id="challenges">
+    <h2>Challenging Data Management Landscape</h2>
+    <ul>
+        <li class="slide">
+            <h3>Data Management is hard and messy</h3>
+            <p>New opportunities – from Traditional ETL</p>
+            <p>Steady growth in data volumes – 3 V</p>
+            <p>SLA requirements</p>
+        </li>
+        <li class="slide">
+            <h3>Separation of Concerns</h3>
+            <p>DIY - Silo problem</p>
+            <p>Best practices/patterns</p>
+            <p>Security, BCP, Resource management</p>
+        </li>
+        <li class="slide">
+            <h3>Visibility into E2E</h3>
+            <p>Lineage, Audit, etc.</p>
+        </li>
+    </ul>
+</section>
+
+<section class="slide" id="solution">
+    <h2>Falcon - The Solution!</h2>
+    <ul>
+        <li class="slide">
+            <h3>Introduces a higher layer of abstraction – Data Set</h3>
+            <p>Decouples a data location and its properties from workflows</p>
+            <p>Understanding the life-time of a feed will allow for implicit validation of the processing rules</p>
+        </li>
+        <li class="slide">
+            <h3>Provides the key services for data processing apps</h3>
+            <p>Common data services are simple directives, No need to define them verbosely in each job</p>
+            <p>Allows process owners to keep their processing specific to their application logic</p>
+            <p>Sits in the execution path, intercepts to handle OOB data / retries etc.</p>
+        </li>
+        <li class="slide">
+            <h3>Promotes Polyglot Programming</h3>
+            <p>Does not do any heavy lifting but delegates to tools with in the Hadoop ecosystem</p>
+        </li>
+    </ul>
+</section>
+
+<section class="slide" id="how">
+    <h2>How does Falcon work?</h2>
+    <ul>
+        <li class="slide">
+            <h3>System accepts entities using DSL</h3>
+            <p>Infrastructure, Data Sets, Pipeline/Processing logic</p>
+            <p>Simply a dependency graph between infrastructure, data and processing logic</p>
+        </li>
+        <li class="slide">
+            <h3>System orchestrates workflows</h3>
+            <p>Transforms the input into automated and scheduled workflows</p>
+            <p>Handles retry logic and late data processing. Records audit, lineage and metrics</p>
+            <p>Seamless integration with metastore/catalog</p>
+        </li>
+        <li class="slide">
+            <h3>Integrated Seamless experience to users</h3>
+            <p>Data Set management (Replication, Retention, etc.) offered as a service</p>
+            <p>Users can cherry pick, No coupling between primitives</p>
+            <p>Automates processing and tracks the end to end progress.
+                Provides hooks for metering and monitoring, notifications
+            </p>
+        </li>
+    </ul>
+</section>
+
+<section class="slide" id="hla">
+    <h2>High Level Architecture</h2>
+    <img src="../images/Architecture.png" alt="High Level Architecture" />
+</section>
+
+<section class="slide" id="case-study-repl">
+    <h2>Case Study: Replication of data sets</h2>
+    <ul>
+        <li class="slide">
+            <h3>User creates a Primary cluster definition</h3>
+            <pre><code>&lt;cluster colo=&quot;colo-1&quot; description=&quot;Primary cluster&quot;
+    name=&quot;primary-cluster&quot; xmlns=&quot;uri:ivory:cluster:0.1&quot;&gt;
+    &lt;interfaces&gt;
+        &lt;interface type="readonly" endpoint="hftp://localhost:50070” version="1.2"/&gt;
+        &lt;interface type="write" endpoint="hdfs://localhost:54310” version="1.2"/&gt;
+        &lt;interface type="execute" endpoint="localhost:54311" version="1.2"/&gt;
+        &lt;interface type="workflow" endpoint="http://localhost:11000/oozie/" version="3.3.0"/&gt;
+        &lt;interface type="messaging" endpoint="tcp://localhost:61616?daemon=true" version="5.1.6"/&gt;
+    &lt;/interfaces&gt;
+    &lt;locations&gt;
+        &lt;location name="staging" path="/projects/ivory/staging"/&gt;
+        &lt;location name="temp" path="/tmp"/&gt;
+        &lt;location name="working" path="/projects/ivory/working"/&gt;
+    &lt;/locations&gt;
+    &lt;properties/&gt;
+&lt;/cluster&gt;</code></pre>
+        </li>
+        <li class="slide">
+            <h3>User submits the cluster definition to Falcon</h3>
+            <pre><code>bin/falcon entity -url http://localhost:15000 -submit -type cluster -file primary-cluster.xml</code></pre>
+        </li>
+        <li class="slide">
+            <h3>Repeat the above for a BCP cluster</h3>
+        </li>
+    </ul>
+</section>
+
+<section class="slide" id="case-study-repl-2">
+    <h2>Case Study: Replication of data sets...Continued</h2>
+    <ul>
+        <li class="slide">
+            <h3>User creates a Data Set</h3>
+            <pre><code> &lt;feed description="TestHourlySummary" name="TestHourlySummary” xmlns="uri:ivory:feed:0.1"&gt;
+    &lt;partitions/&gt;
+    &lt;groups&gt;bi&lt;/groups&gt;
+    &lt;frequency&gt;hours(1)&lt;/frequency&gt;
+    &lt;late-arrival cut-off="hours(4)"/&gt;
+    &lt;clusters&gt;
+        &lt;cluster name=”cluster-primary" type="source"&gt;
+            &lt;validity start="2012-01-01T00:00Z" end="2099-12-31T00:00Z"/&gt;
+            &lt;retention limit="days(2)" action="delete"/&gt;
+        &lt;/cluster&gt;
+        &lt;cluster name=”cluster-BCP" type="target"&gt;
+            &lt;validity start="2012-01-01T00:00Z" end="2099-12-31T00:00Z"/&gt;
+            &lt;retention limit="days(2)" action="delete"/&gt;
+        &lt;/cluster&gt;
+    &lt;/clusters&gt;
+    &lt;locations&gt;
+        &lt;location type="data” path="/projects/test/TestHourlySummary/${YEAR}-${MONTH}-${DAY}-${HOUR}"/&gt;
+        &lt;location type="stats" path="/none"/&gt;
+        &lt;location type="meta" path="/none"/&gt;
+    &lt;/locations&gt;
+    &lt;ACL owner=”venkatesh" group="users" permission="0755"/&gt;
+    &lt;schema location="/none" provider="none"/&gt;
+&lt;/feed&gt;</code></pre>
+        </li>
+    </ul>
+</section>
+
+<section class="slide" id="case-study-repl-3">
+    <h2>Case Study: Replication of data sets...Continued</h2>
+    <ul>
+        <li class="slide">
+            <h3>User submits the data set definition to Falcont</h3>
+            <pre><code>bin/falcon entity -url http://localhost:15000 -submit -type feed -file replicating-feed.xml</code></pre>
+        </li>
+        <li class="slide">
+            <h3>User then schedules it with Falcon</h3>
+            <pre><code>bin/falcon entity -type feed -url http://localhost:15000 -name replicating-feed -schedule</code></pre>
+        </li>
+        <li class="slide">
+            <h3>Voila! You are done. Magic happens!</h3>
+        </li>
+    </ul>
+</section>
+
+<section class="slide" id="case-study-repl-4">
+    <h2>Case Study: Replication of data sets...Continued</h2>
+    <ul>
+        <li class="slide">
+            <h3>Maintains the dependencies and relationships between entities</h3>
+        </li>
+        <li class="slide">
+            <h3>Instruments workflows for dependencies, retry logic, Table/Partition registration, notifications, etc.</h3>
+        </li>
+        <li class="slide">
+            <h3>Falcon orchestrates these into scheduled recurring workflows</h3>
+        </li>
+        <li class="slide">
+            <h3>Replication workflow</h3>
+            <p>A recurring workflow for copying data from source to target(s)</p>
+        </li>
+        <li class="slide">
+            <h3>Retention workflow for each cluster based on the defined policy</h3>
+            <p>A recurring workflow for purging expired data on Primary cluster</p>
+            <p>A recurring workflow for purging expired data on BCP cluster</p>
+        </li>
+    </ul>
+</section>
+
+<!--
+<section class="slide" id="case-study-process">
+    <h2>Case Study: Geographically Distributed Data Processing</h2>
+    <ul>
+        <li>
+            TBD
+        </li>
+    </ul>
+</section>
+-->
+
+<section class="slide" id="highlights">
+    <h2>Falcon Highlights</h2>
+    <ul>
+        <li class="slide">
+            <h3>Falcon provides the key services for data processing apps</h3>
+            <p>Provides a single interface to orchestrate data lifecycle across clusters.</p>
+            <p>Provides the key services data processing applications need so
+                Sophisticated DLM can easily be added to Hadoop applications.</p>
+            <p>Complex data processing logic handled by Falcon instead of hard-coded in apps.</p>
+            <p>Faster development and higher quality for ETL, reporting and other data processing apps on Hadoop.</p>
+        </li>
+        <li class="slide">
+            <h3>Introduces new abstractions : “Data Set”, “Process”, etc.</h3>
+            <p>Promotes decoupling of data set location from Ooze definition.</p>
+            <p>Declarative processing with simple directives enabling rapid prototyping</p>
+        </li>
+        <li class="slide">
+            <h3>Current Status</h3>
+            <p>V0.2 is in deployment for over 12 months at InMobi.</p>
+            <p>A release will be coming soon at Apache.</p>
+        </li>
+    </ul>
+</section>
+
+
+<section class="slide" id="thank-you">
+    <h2>Thank you!</h2>
+    <ul>
+        <li>
+            <h3>For more information on Falcon</h3>
+            <p>Visit <a href="http://falcon.incubator.apache.org">Apache Falcon</a></p>
+        </li>
+    </ul>
+</section>
+
+<!-- End slides. -->
+
+
+<!-- Begin extension snippets. Add or remove as needed. -->
+
+<!-- deck.navigation snippet -->
+<a href="#" class="deck-prev-link" title="Previous">&#8592;</a>
+<a href="#" class="deck-next-link" title="Next">&#8594;</a>
+
+<!-- deck.status snippet -->
+<p class="deck-status">
+    <span class="deck-status-current"></span>
+    /
+    <span class="deck-status-total"></span>
+</p>
+
+<!-- deck.goto snippet -->
+<form action="." method="get" class="goto-form">
+    <label for="goto-slide">Go to slide:</label>
+    <input type="text" name="slidenum" id="goto-slide" list="goto-datalist">
+    <datalist id="goto-datalist"></datalist>
+    <input type="submit" value="Go">
+</form>
+
+<!-- deck.hash snippet -->
+<a href="." title="Permalink to this slide" class="deck-permalink">#</a>
+
+<!-- End extension snippets. -->
+
+
+<!-- Required JS files. -->
+<script src="jquery-1.7.2.min.js"></script>
+<script src="core/deck.core.js"></script>
+
+<!-- Extension JS files. Add or remove as needed. -->
+<script src="core/deck.core.js"></script>
+<script src="extensions/hash/deck.hash.js"></script>
+<script src="extensions/menu/deck.menu.js"></script>
+<script src="extensions/goto/deck.goto.js"></script>
+<script src="extensions/status/deck.status.js"></script>
+<script src="extensions/navigation/deck.navigation.js"></script>
+<script src="extensions/scale/deck.scale.js"></script>
+
+<!-- Initialize the deck. You can put this in an external file if desired. -->
+<script>
+    $(function() {
+        $.deck('.slide');
+    });
+</script>
+</body>
+</html>

Modified: incubator/falcon/site/source-repository.html
URL: http://svn.apache.org/viewvc/incubator/falcon/site/source-repository.html?rev=1484831&r1=1484830&r2=1484831&view=diff
==============================================================================
--- incubator/falcon/site/source-repository.html (original)
+++ incubator/falcon/site/source-repository.html Tue May 21 15:06:19 2013
@@ -1,13 +1,13 @@
 <!DOCTYPE html>
 <!--
- | Generated by Apache Maven Doxia at May 20, 2013
+ | Generated by Apache Maven Doxia at May 21, 2013
  | Rendered using Apache Maven Fluido Skin 1.3.0
 -->
 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
   <head>
     <meta charset="UTF-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <meta name="Date-Revision-yyyymmdd" content="20130520" />
+    <meta name="Date-Revision-yyyymmdd" content="20130521" />
     <meta http-equiv="Content-Language" content="en" />
     <title>Falcon - Source Repository</title>
     <link rel="stylesheet" href="./css/apache-maven-fluido-1.3.0.min.css" />
@@ -218,7 +218,7 @@
         
                 
                     
-                  <li id="publishDate" class="pull-right">Last Published: 2013-05-20</li> 
+                  <li id="publishDate" class="pull-right">Last Published: 2013-05-21</li> 
             
                             </ul>
       </div>

Modified: incubator/falcon/site/team-list.html
URL: http://svn.apache.org/viewvc/incubator/falcon/site/team-list.html?rev=1484831&r1=1484830&r2=1484831&view=diff
==============================================================================
--- incubator/falcon/site/team-list.html (original)
+++ incubator/falcon/site/team-list.html Tue May 21 15:06:19 2013
@@ -1,13 +1,13 @@
 <!DOCTYPE html>
 <!--
- | Generated by Apache Maven Doxia at May 20, 2013
+ | Generated by Apache Maven Doxia at May 21, 2013
  | Rendered using Apache Maven Fluido Skin 1.3.0
 -->
 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
   <head>
     <meta charset="UTF-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <meta name="Date-Revision-yyyymmdd" content="20130520" />
+    <meta name="Date-Revision-yyyymmdd" content="20130521" />
     <meta http-equiv="Content-Language" content="en" />
     <title>Falcon - Team list</title>
     <link rel="stylesheet" href="./css/apache-maven-fluido-1.3.0.min.css" />
@@ -218,7 +218,7 @@
         
                 
                     
-                  <li id="publishDate" class="pull-right">Last Published: 2013-05-20</li> 
+                  <li id="publishDate" class="pull-right">Last Published: 2013-05-21</li> 
             
                             </ul>
       </div>