You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@falcon.apache.org by ve...@apache.org on 2013/05/21 17:06:19 UTC
svn commit: r1484831 - in /incubator/falcon/site: issue-tracking.html
license.html mail-lists.html project-info.html slides/falcon-overview.html
source-repository.html team-list.html
Author: venkatesh
Date: Tue May 21 15:06:19 2013
New Revision: 1484831
URL: http://svn.apache.org/r1484831
Log:
had missed a few files....
Added:
incubator/falcon/site/slides/falcon-overview.html
Modified:
incubator/falcon/site/issue-tracking.html
incubator/falcon/site/license.html
incubator/falcon/site/mail-lists.html
incubator/falcon/site/project-info.html
incubator/falcon/site/source-repository.html
incubator/falcon/site/team-list.html
Modified: incubator/falcon/site/issue-tracking.html
URL: http://svn.apache.org/viewvc/incubator/falcon/site/issue-tracking.html?rev=1484831&r1=1484830&r2=1484831&view=diff
==============================================================================
--- incubator/falcon/site/issue-tracking.html (original)
+++ incubator/falcon/site/issue-tracking.html Tue May 21 15:06:19 2013
@@ -1,13 +1,13 @@
<!DOCTYPE html>
<!--
- | Generated by Apache Maven Doxia at May 20, 2013
+ | Generated by Apache Maven Doxia at May 21, 2013
| Rendered using Apache Maven Fluido Skin 1.3.0
-->
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
- <meta name="Date-Revision-yyyymmdd" content="20130520" />
+ <meta name="Date-Revision-yyyymmdd" content="20130521" />
<meta http-equiv="Content-Language" content="en" />
<title>Falcon - Issue Tracking</title>
<link rel="stylesheet" href="./css/apache-maven-fluido-1.3.0.min.css" />
@@ -218,7 +218,7 @@
- <li id="publishDate" class="pull-right">Last Published: 2013-05-20</li>
+ <li id="publishDate" class="pull-right">Last Published: 2013-05-21</li>
</ul>
</div>
Modified: incubator/falcon/site/license.html
URL: http://svn.apache.org/viewvc/incubator/falcon/site/license.html?rev=1484831&r1=1484830&r2=1484831&view=diff
==============================================================================
--- incubator/falcon/site/license.html (original)
+++ incubator/falcon/site/license.html Tue May 21 15:06:19 2013
@@ -1,13 +1,13 @@
<!DOCTYPE html>
<!--
- | Generated by Apache Maven Doxia at May 20, 2013
+ | Generated by Apache Maven Doxia at May 21, 2013
| Rendered using Apache Maven Fluido Skin 1.3.0
-->
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
- <meta name="Date-Revision-yyyymmdd" content="20130520" />
+ <meta name="Date-Revision-yyyymmdd" content="20130521" />
<meta http-equiv="Content-Language" content="en" />
<title>Falcon - Project License</title>
<link rel="stylesheet" href="./css/apache-maven-fluido-1.3.0.min.css" />
@@ -218,7 +218,7 @@
- <li id="publishDate" class="pull-right">Last Published: 2013-05-20</li>
+ <li id="publishDate" class="pull-right">Last Published: 2013-05-21</li>
</ul>
</div>
Modified: incubator/falcon/site/mail-lists.html
URL: http://svn.apache.org/viewvc/incubator/falcon/site/mail-lists.html?rev=1484831&r1=1484830&r2=1484831&view=diff
==============================================================================
--- incubator/falcon/site/mail-lists.html (original)
+++ incubator/falcon/site/mail-lists.html Tue May 21 15:06:19 2013
@@ -1,13 +1,13 @@
<!DOCTYPE html>
<!--
- | Generated by Apache Maven Doxia at May 20, 2013
+ | Generated by Apache Maven Doxia at May 21, 2013
| Rendered using Apache Maven Fluido Skin 1.3.0
-->
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
- <meta name="Date-Revision-yyyymmdd" content="20130520" />
+ <meta name="Date-Revision-yyyymmdd" content="20130521" />
<meta http-equiv="Content-Language" content="en" />
<title>Falcon - Project Mailing Lists</title>
<link rel="stylesheet" href="./css/apache-maven-fluido-1.3.0.min.css" />
@@ -218,7 +218,7 @@
- <li id="publishDate" class="pull-right">Last Published: 2013-05-20</li>
+ <li id="publishDate" class="pull-right">Last Published: 2013-05-21</li>
</ul>
</div>
Modified: incubator/falcon/site/project-info.html
URL: http://svn.apache.org/viewvc/incubator/falcon/site/project-info.html?rev=1484831&r1=1484830&r2=1484831&view=diff
==============================================================================
--- incubator/falcon/site/project-info.html (original)
+++ incubator/falcon/site/project-info.html Tue May 21 15:06:19 2013
@@ -1,13 +1,13 @@
<!DOCTYPE html>
<!--
- | Generated by Apache Maven Doxia at May 20, 2013
+ | Generated by Apache Maven Doxia at May 21, 2013
| Rendered using Apache Maven Fluido Skin 1.3.0
-->
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
- <meta name="Date-Revision-yyyymmdd" content="20130520" />
+ <meta name="Date-Revision-yyyymmdd" content="20130521" />
<meta http-equiv="Content-Language" content="en" />
<title>Falcon - Project Information</title>
<link rel="stylesheet" href="./css/apache-maven-fluido-1.3.0.min.css" />
@@ -218,7 +218,7 @@
- <li id="publishDate" class="pull-right">Last Published: 2013-05-20</li>
+ <li id="publishDate" class="pull-right">Last Published: 2013-05-21</li>
</ul>
</div>
Added: incubator/falcon/site/slides/falcon-overview.html
URL: http://svn.apache.org/viewvc/incubator/falcon/site/slides/falcon-overview.html?rev=1484831&view=auto
==============================================================================
--- incubator/falcon/site/slides/falcon-overview.html (added)
+++ incubator/falcon/site/slides/falcon-overview.html Tue May 21 15:06:19 2013
@@ -0,0 +1,357 @@
+<!DOCTYPE html>
+<html>
+<head>
+ <meta charset="utf-8">
+ <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
+ <meta name="viewport" content="width=1024, user-scalable=no">
+
+ <title>Apache Falcon - Data Management on Hadoop</title>
+
+ <!-- Required stylesheet -->
+ <link rel="stylesheet" href="core/deck.core.css">
+
+ <!-- Extension CSS files go here. Remove or add as needed. -->
+ <link rel="stylesheet" href="extensions/goto/deck.goto.css">
+ <link rel="stylesheet" href="extensions/menu/deck.menu.css">
+ <link rel="stylesheet" href="extensions/navigation/deck.navigation.css">
+ <link rel="stylesheet" href="extensions/status/deck.status.css">
+ <link rel="stylesheet" href="extensions/hash/deck.hash.css">
+ <link rel="stylesheet" href="extensions/scale/deck.scale.css">
+
+ <!-- Style theme. More available in /themes/style/ or create your own. -->
+ <link rel="stylesheet" href="themes/style/web-2.0.css">
+
+ <!-- Transition theme. More available in /themes/transition/ or create your own. -->
+ <link rel="stylesheet" href="themes/transition/horizontal-slide.css">
+
+ <!-- Required Modernizr file -->
+ <script src="modernizr.custom.js"></script>
+</head>
+<body class="deck-container">
+
+<!-- Begin slides. Just make elements with a class of slide. -->
+
+<section class="slide" id="intro">
+ <h2>Apache Falcon</h2>
+ <h3>Simplified Data Management for Hadoop!</h3>
+</section>
+
+<section class="slide" id="what">
+ <h2>What is Data Management?</h2>
+ <ul>
+ <li class="slide">
+ <h3>Data Motion</h3>
+ <p>Import, Export</p>
+ </li>
+ <li class="slide">
+ <h3>Policy-based Lifecycle Management</h3>
+ <p>Retention, Replication/DR/BCP, Anonymization of PII data, Archival, etc.</p>
+ </li>
+ <li class="slide">
+ <h3>Process orchestration and scheduling</h3>
+ <p>Late data handling, reprocessing, dependency checking, etc.</p>
+ <p>Multi-cluster management to support Local/Global Aggregations, Rollups, etc.</p>
+ </li>
+ <li class="slide">
+ <h3>Data Discovery</h3>
+ <p>Lineage, Audit, Classification</p>
+ </li>
+ </ul>
+</section>
+
+<section class="slide" id="critical">
+ <h2>Why is Data Management Critical?</h2>
+ <ul>
+ <li class="slide">
+ <h3>Productivity Gains</h3>
+ <p>Large datasets are incentives for users to come to Hadoop</p>
+ <p>Data Loading optimized for space, time and bandwidth</p>
+ </li>
+ <li class="slide">
+ <h3>Regulatory compliance</h3>
+ <p>We cannot rely on users to adhere to data governance policies.</p>
+ <p>SEC, SOX, A29 of PII data, etc.</p>
+ </li>
+ <li class="slide">
+ <h3>Process orchestration and scheduling</h3>
+ <p>Data management is a common concern to be offered as a service</p>
+ <p>BCP, Security, Data Pipeline processing, etc.</p>
+ </li>
+ </ul>
+</section>
+
+<section class="slide" id="challenges">
+ <h2>Challenging Data Management Landscape</h2>
+ <ul>
+ <li class="slide">
+ <h3>Data Management is hard and messy</h3>
+ <p>New opportunities â from Traditional ETL</p>
+ <p>Steady growth in data volumes â 3 V</p>
+ <p>SLA requirements</p>
+ </li>
+ <li class="slide">
+ <h3>Separation of Concerns</h3>
+ <p>DIY - Silo problem</p>
+ <p>Best practices/patterns</p>
+ <p>Security, BCP, Resource management</p>
+ </li>
+ <li class="slide">
+ <h3>Visibility into E2E</h3>
+ <p>Lineage, Audit, etc.</p>
+ </li>
+ </ul>
+</section>
+
+<section class="slide" id="solution">
+ <h2>Falcon - The Solution!</h2>
+ <ul>
+ <li class="slide">
+ <h3>Introduces a higher layer of abstraction â Data Set</h3>
+ <p>Decouples a data location and its properties from workflows</p>
+ <p>Understanding the life-time of a feed will allow for implicit validation of the processing rules</p>
+ </li>
+ <li class="slide">
+ <h3>Provides the key services for data processing apps</h3>
+ <p>Common data services are simple directives, No need to define them verbosely in each job</p>
+ <p>Allows process owners to keep their processing specific to their application logic</p>
+ <p>Sits in the execution path, intercepts to handle OOB data / retries etc.</p>
+ </li>
+ <li class="slide">
+ <h3>Promotes Polyglot Programming</h3>
+ <p>Does not do any heavy lifting but delegates to tools with in the Hadoop ecosystem</p>
+ </li>
+ </ul>
+</section>
+
+<section class="slide" id="how">
+ <h2>How does Falcon work?</h2>
+ <ul>
+ <li class="slide">
+ <h3>System accepts entities using DSL</h3>
+ <p>Infrastructure, Data Sets, Pipeline/Processing logic</p>
+ <p>Simply a dependency graph between infrastructure, data and processing logic</p>
+ </li>
+ <li class="slide">
+ <h3>System orchestrates workflows</h3>
+ <p>Transforms the input into automated and scheduled workflows</p>
+ <p>Handles retry logic and late data processing. Records audit, lineage and metrics</p>
+ <p>Seamless integration with metastore/catalog</p>
+ </li>
+ <li class="slide">
+ <h3>Integrated Seamless experience to users</h3>
+ <p>Data Set management (Replication, Retention, etc.) offered as a service</p>
+ <p>Users can cherry pick, No coupling between primitives</p>
+ <p>Automates processing and tracks the end to end progress.
+ Provides hooks for metering and monitoring, notifications
+ </p>
+ </li>
+ </ul>
+</section>
+
+<section class="slide" id="hla">
+ <h2>High Level Architecture</h2>
+ <img src="../images/Architecture.png" alt="High Level Architecture" />
+</section>
+
+<section class="slide" id="case-study-repl">
+ <h2>Case Study: Replication of data sets</h2>
+ <ul>
+ <li class="slide">
+ <h3>User creates a Primary cluster definition</h3>
+ <pre><code><cluster colo="colo-1" description="Primary cluster"
+ name="primary-cluster" xmlns="uri:ivory:cluster:0.1">
+ <interfaces>
+ <interface type="readonly" endpoint="hftp://localhost:50070â version="1.2"/>
+ <interface type="write" endpoint="hdfs://localhost:54310â version="1.2"/>
+ <interface type="execute" endpoint="localhost:54311" version="1.2"/>
+ <interface type="workflow" endpoint="http://localhost:11000/oozie/" version="3.3.0"/>
+ <interface type="messaging" endpoint="tcp://localhost:61616?daemon=true" version="5.1.6"/>
+ </interfaces>
+ <locations>
+ <location name="staging" path="/projects/ivory/staging"/>
+ <location name="temp" path="/tmp"/>
+ <location name="working" path="/projects/ivory/working"/>
+ </locations>
+ <properties/>
+</cluster></code></pre>
+ </li>
+ <li class="slide">
+ <h3>User submits the cluster definition to Falcon</h3>
+ <pre><code>bin/falcon entity -url http://localhost:15000 -submit -type cluster -file primary-cluster.xml</code></pre>
+ </li>
+ <li class="slide">
+ <h3>Repeat the above for a BCP cluster</h3>
+ </li>
+ </ul>
+</section>
+
+<section class="slide" id="case-study-repl-2">
+ <h2>Case Study: Replication of data sets...Continued</h2>
+ <ul>
+ <li class="slide">
+ <h3>User creates a Data Set</h3>
+ <pre><code> <feed description="TestHourlySummary" name="TestHourlySummaryâ xmlns="uri:ivory:feed:0.1">
+ <partitions/>
+ <groups>bi</groups>
+ <frequency>hours(1)</frequency>
+ <late-arrival cut-off="hours(4)"/>
+ <clusters>
+ <cluster name=âcluster-primary" type="source">
+ <validity start="2012-01-01T00:00Z" end="2099-12-31T00:00Z"/>
+ <retention limit="days(2)" action="delete"/>
+ </cluster>
+ <cluster name=âcluster-BCP" type="target">
+ <validity start="2012-01-01T00:00Z" end="2099-12-31T00:00Z"/>
+ <retention limit="days(2)" action="delete"/>
+ </cluster>
+ </clusters>
+ <locations>
+ <location type="dataâ path="/projects/test/TestHourlySummary/${YEAR}-${MONTH}-${DAY}-${HOUR}"/>
+ <location type="stats" path="/none"/>
+ <location type="meta" path="/none"/>
+ </locations>
+ <ACL owner=âvenkatesh" group="users" permission="0755"/>
+ <schema location="/none" provider="none"/>
+</feed></code></pre>
+ </li>
+ </ul>
+</section>
+
+<section class="slide" id="case-study-repl-3">
+ <h2>Case Study: Replication of data sets...Continued</h2>
+ <ul>
+ <li class="slide">
+ <h3>User submits the data set definition to Falcont</h3>
+ <pre><code>bin/falcon entity -url http://localhost:15000 -submit -type feed -file replicating-feed.xml</code></pre>
+ </li>
+ <li class="slide">
+ <h3>User then schedules it with Falcon</h3>
+ <pre><code>bin/falcon entity -type feed -url http://localhost:15000 -name replicating-feed -schedule</code></pre>
+ </li>
+ <li class="slide">
+ <h3>Voila! You are done. Magic happens!</h3>
+ </li>
+ </ul>
+</section>
+
+<section class="slide" id="case-study-repl-4">
+ <h2>Case Study: Replication of data sets...Continued</h2>
+ <ul>
+ <li class="slide">
+ <h3>Maintains the dependencies and relationships between entities</h3>
+ </li>
+ <li class="slide">
+ <h3>Instruments workflows for dependencies, retry logic, Table/Partition registration, notifications, etc.</h3>
+ </li>
+ <li class="slide">
+ <h3>Falcon orchestrates these into scheduled recurring workflows</h3>
+ </li>
+ <li class="slide">
+ <h3>Replication workflow</h3>
+ <p>A recurring workflow for copying data from source to target(s)</p>
+ </li>
+ <li class="slide">
+ <h3>Retention workflow for each cluster based on the defined policy</h3>
+ <p>A recurring workflow for purging expired data on Primary cluster</p>
+ <p>A recurring workflow for purging expired data on BCP cluster</p>
+ </li>
+ </ul>
+</section>
+
+<!--
+<section class="slide" id="case-study-process">
+ <h2>Case Study: Geographically Distributed Data Processing</h2>
+ <ul>
+ <li>
+ TBD
+ </li>
+ </ul>
+</section>
+-->
+
+<section class="slide" id="highlights">
+ <h2>Falcon Highlights</h2>
+ <ul>
+ <li class="slide">
+ <h3>Falcon provides the key services for data processing apps</h3>
+ <p>Provides a single interface to orchestrate data lifecycle across clusters.</p>
+ <p>Provides the key services data processing applications need so
+ Sophisticated DLM can easily be added to Hadoop applications.</p>
+ <p>Complex data processing logic handled by Falcon instead of hard-coded in apps.</p>
+ <p>Faster development and higher quality for ETL, reporting and other data processing apps on Hadoop.</p>
+ </li>
+ <li class="slide">
+ <h3>Introduces new abstractions : âData Setâ, âProcessâ, etc.</h3>
+ <p>Promotes decoupling of data set location from Ooze definition.</p>
+ <p>Declarative processing with simple directives enabling rapid prototyping</p>
+ </li>
+ <li class="slide">
+ <h3>Current Status</h3>
+ <p>V0.2 is in deployment for over 12 months at InMobi.</p>
+ <p>A release will be coming soon at Apache.</p>
+ </li>
+ </ul>
+</section>
+
+
+<section class="slide" id="thank-you">
+ <h2>Thank you!</h2>
+ <ul>
+ <li>
+ <h3>For more information on Falcon</h3>
+ <p>Visit <a href="http://falcon.incubator.apache.org">Apache Falcon</a></p>
+ </li>
+ </ul>
+</section>
+
+<!-- End slides. -->
+
+
+<!-- Begin extension snippets. Add or remove as needed. -->
+
+<!-- deck.navigation snippet -->
+<a href="#" class="deck-prev-link" title="Previous">←</a>
+<a href="#" class="deck-next-link" title="Next">→</a>
+
+<!-- deck.status snippet -->
+<p class="deck-status">
+ <span class="deck-status-current"></span>
+ /
+ <span class="deck-status-total"></span>
+</p>
+
+<!-- deck.goto snippet -->
+<form action="." method="get" class="goto-form">
+ <label for="goto-slide">Go to slide:</label>
+ <input type="text" name="slidenum" id="goto-slide" list="goto-datalist">
+ <datalist id="goto-datalist"></datalist>
+ <input type="submit" value="Go">
+</form>
+
+<!-- deck.hash snippet -->
+<a href="." title="Permalink to this slide" class="deck-permalink">#</a>
+
+<!-- End extension snippets. -->
+
+
+<!-- Required JS files. -->
+<script src="jquery-1.7.2.min.js"></script>
+<script src="core/deck.core.js"></script>
+
+<!-- Extension JS files. Add or remove as needed. -->
+<script src="core/deck.core.js"></script>
+<script src="extensions/hash/deck.hash.js"></script>
+<script src="extensions/menu/deck.menu.js"></script>
+<script src="extensions/goto/deck.goto.js"></script>
+<script src="extensions/status/deck.status.js"></script>
+<script src="extensions/navigation/deck.navigation.js"></script>
+<script src="extensions/scale/deck.scale.js"></script>
+
+<!-- Initialize the deck. You can put this in an external file if desired. -->
+<script>
+ $(function() {
+ $.deck('.slide');
+ });
+</script>
+</body>
+</html>
Modified: incubator/falcon/site/source-repository.html
URL: http://svn.apache.org/viewvc/incubator/falcon/site/source-repository.html?rev=1484831&r1=1484830&r2=1484831&view=diff
==============================================================================
--- incubator/falcon/site/source-repository.html (original)
+++ incubator/falcon/site/source-repository.html Tue May 21 15:06:19 2013
@@ -1,13 +1,13 @@
<!DOCTYPE html>
<!--
- | Generated by Apache Maven Doxia at May 20, 2013
+ | Generated by Apache Maven Doxia at May 21, 2013
| Rendered using Apache Maven Fluido Skin 1.3.0
-->
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
- <meta name="Date-Revision-yyyymmdd" content="20130520" />
+ <meta name="Date-Revision-yyyymmdd" content="20130521" />
<meta http-equiv="Content-Language" content="en" />
<title>Falcon - Source Repository</title>
<link rel="stylesheet" href="./css/apache-maven-fluido-1.3.0.min.css" />
@@ -218,7 +218,7 @@
- <li id="publishDate" class="pull-right">Last Published: 2013-05-20</li>
+ <li id="publishDate" class="pull-right">Last Published: 2013-05-21</li>
</ul>
</div>
Modified: incubator/falcon/site/team-list.html
URL: http://svn.apache.org/viewvc/incubator/falcon/site/team-list.html?rev=1484831&r1=1484830&r2=1484831&view=diff
==============================================================================
--- incubator/falcon/site/team-list.html (original)
+++ incubator/falcon/site/team-list.html Tue May 21 15:06:19 2013
@@ -1,13 +1,13 @@
<!DOCTYPE html>
<!--
- | Generated by Apache Maven Doxia at May 20, 2013
+ | Generated by Apache Maven Doxia at May 21, 2013
| Rendered using Apache Maven Fluido Skin 1.3.0
-->
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
- <meta name="Date-Revision-yyyymmdd" content="20130520" />
+ <meta name="Date-Revision-yyyymmdd" content="20130521" />
<meta http-equiv="Content-Language" content="en" />
<title>Falcon - Team list</title>
<link rel="stylesheet" href="./css/apache-maven-fluido-1.3.0.min.css" />
@@ -218,7 +218,7 @@
- <li id="publishDate" class="pull-right">Last Published: 2013-05-20</li>
+ <li id="publishDate" class="pull-right">Last Published: 2013-05-21</li>
</ul>
</div>