You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2014/07/01 23:42:51 UTC

svn commit: r1607200 [2/2] - in /spark: ./ _layouts/ images/ news/_posts/ site/ site/graphx/ site/images/ site/mllib/ site/news/ site/releases/ site/screencasts/ site/sql/ site/streaming/ sql/

Added: spark/site/sql/index.html
URL: http://svn.apache.org/viewvc/spark/site/sql/index.html?rev=1607200&view=auto
==============================================================================
--- spark/site/sql/index.html (added)
+++ spark/site/sql/index.html Tue Jul  1 21:42:50 2014
@@ -0,0 +1,346 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="utf-8">
+  <meta http-equiv="X-UA-Compatible" content="IE=edge">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+  <title>
+     Spark SQL | Apache Spark
+    
+  </title>
+
+  
+
+  <!-- Bootstrap core CSS -->
+  <link href="/css/cerulean.min.css" rel="stylesheet">
+  <link href="/css/custom.css" rel="stylesheet">
+
+  <script type="text/javascript">
+  <!-- Google Analytics initialization -->
+  var _gaq = _gaq || [];
+  _gaq.push(['_setAccount', 'UA-32518208-2']);
+  _gaq.push(['_trackPageview']);
+  (function() {
+    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+  })();
+
+  <!-- Adds slight delay to links to allow async reporting -->
+  function trackOutboundLink(link, category, action) {
+    try {
+      _gaq.push(['_trackEvent', category , action]);
+    } catch(err){}
+
+    setTimeout(function() {
+      document.location.href = link.href;
+    }, 100);
+  }
+  </script>
+
+  <!-- HTML5 shim and Respond.js IE8 support of HTML5 elements and media queries -->
+  <!--[if lt IE 9]>
+  <script src="https://oss.maxcdn.com/libs/html5shiv/3.7.0/html5shiv.js"></script>
+  <script src="https://oss.maxcdn.com/libs/respond.js/1.3.0/respond.min.js"></script>
+  <![endif]-->
+</head>
+
+<body>
+
+<div class="container" style="max-width: 1200px;">
+
+<div class="masthead">
+  
+    <p class="lead">
+      <a href="/">
+      <img src="/images/spark-logo.png"
+      style="height:100px; width:auto; vertical-align: bottom; margin-top: 20px;"></a>
+      <a href="#"><span class="subproject">
+        SQL
+      </span></a>
+    </p>
+  
+</div>
+
+<nav class="navbar navbar-default" role="navigation">
+  <!-- Brand and toggle get grouped for better mobile display -->
+  <div class="navbar-header">
+    <button type="button" class="navbar-toggle" data-toggle="collapse"
+            data-target="#navbar-collapse-1">
+      <span class="sr-only">Toggle navigation</span>
+      <span class="icon-bar"></span>
+      <span class="icon-bar"></span>
+      <span class="icon-bar"></span>
+    </button>
+  </div>
+
+  <!-- Collect the nav links, forms, and other content for toggling -->
+  <div class="collapse navbar-collapse" id="navbar-collapse-1">
+    <ul class="nav navbar-nav">
+      <li><a href="/downloads.html">Download</a></li>
+      <li class="dropdown">
+        <a href="#" class="dropdown-toggle" data-toggle="dropdown">
+          Related Projects <b class="caret"></b>
+        </a>
+        <ul class="dropdown-menu">
+          
+          <li><a href="/">Apache Spark</a></li>
+          
+          <li><a href="/sql/">Spark SQL</a></li>
+          <li><a href="/streaming/">Spark Streaming</a></li>
+          <li><a href="/mllib/">MLlib (machine learning)</a></li>
+          <li><a href="/graphx/">GraphX (graph)</a></li>
+        </ul>
+      </li>
+      <li class="dropdown">
+        <a href="#" class="dropdown-toggle" data-toggle="dropdown">
+          Documentation <b class="caret"></b>
+        </a>
+        <ul class="dropdown-menu">
+          <li><a href="/documentation.html">Overview</a></li>
+          <li><a href="/docs/latest/">Latest Release (Spark 1.0)</a></li>
+          <li><a href="/examples.html">Examples</a></li>
+        </ul>
+      </li>
+      <li class="dropdown">
+        <a href="#" class="dropdown-toggle" data-toggle="dropdown">
+          Community <b class="caret"></b>
+        </a>
+        <ul class="dropdown-menu">
+          <li><a href="/community.html">Mailing Lists</a></li>
+          <li><a href="/community.html#events">Events and Meetups</a></li>
+          <li><a href="/community.html#history">Project History</a></li>
+          <li><a href="https://cwiki.apache.org/confluence/display/SPARK/Powered+By+Spark">Powered By</a></li>
+        </ul>
+      </li>
+      <li><a href="/faq.html">FAQ</a></li>
+    </ul>
+  </div>
+  <!-- /.navbar-collapse -->
+</nav>
+
+
+<div class="row">
+  <div class="col-md-3 col-md-push-9">
+    <div class="news" style="margin-bottom: 20px;">
+      <h5>Latest News</h5>
+      <ul class="list-unstyled">
+        
+          <li><a href="/news/two-weeks-to-spark-summit-2014.html">Two weeks to Spark Summit 2014</a>
+          <span class="small">(Jun 16, 2014)</span></li>
+        
+          <li><a href="/news/spark-1-0-0-released.html">Spark 1.0.0 released</a>
+          <span class="small">(May 30, 2014)</span></li>
+        
+          <li><a href="/news/spark-summit-agenda-posted.html">Spark Summit agenda posted</a>
+          <span class="small">(May 11, 2014)</span></li>
+        
+          <li><a href="/news/spark-0-9-1-released.html">Spark 0.9.1 released</a>
+          <span class="small">(Apr 09, 2014)</span></li>
+        
+      </ul>
+      <p class="small" style="text-align: right;"><a href="/news/index.html">Archive</a></p>
+    </div>
+    <div class="hidden-xs hidden-sm">
+      <a href="/downloads.html" class="btn btn-success btn-lg btn-block" style="margin-bottom: 30px;">
+        Download Spark
+      </a>
+      <p style="font-size: 16px; font-weight: 500; color: #555;">
+        Related Projects:
+      </p>
+      <ul class="list-narrow">
+        
+        <li><a href="/">Apache Spark</a></li>
+        
+        <li><a href="/sql/">Spark SQL</a></li>
+        <li><a href="/streaming/">Spark Streaming</a></li>
+        <li><a href="/mllib/">MLlib (machine learning)</a></li>
+        <li><a href="/graphx/">GraphX (graph)</a></li>
+      </ul>
+    </div>
+  </div>
+
+  <div class="col-md-9 col-md-pull-3">
+    <div class="jumbotron">
+  <b>Spark SQL</b> unifies access to structured data.
+</div>
+
+<div class="row row-padded">
+  <div class="col-md-7 col-sm-7">
+    <h2>Integrated</h2>
+    <p class="lead">
+	  Seemlessly mix SQL queries with Spark programs.
+    </p>
+    <p>
+	  Spark SQL lets you query structured data as a distributed dataset (RDD) in Spark, with integrated APIs in Python, Scala and Java. 
+	  This tight integration makes it easy to run SQL queries alongside complex analytic algorithms.
+    </p>
+  </div>
+  <div class="col-md-5 col-sm-5 col-padded-top col-center">
+
+    <div style="margin-top: 15px; text-align: left; display: inline-block;">
+      <div class="code">
+	    sqlCtx = new <span class="sparkop">HiveContext</span>(sc)<br />
+		results = sqlCtx.<span class="sparkop">sql</span>(<br />&nbsp;&nbsp;<span class="closure">"SELECT * FROM people"</span>)<br />
+		names = results.<span class="sparkop">map</span>(<span class="closure">lambda p: p.name</span>)
+	  </div>
+      <div class="caption">Apply functions to results of SQL queries.</div>
+    </div>
+  </div>
+</div>
+
+<div class="row row-padded">
+  <div class="col-md-7 col-sm-7">
+    <h2>Unified Data Access</h2>
+    <p class="lead">
+      Load and query data from a variety of sources.
+    </p>
+    <p>
+      SchemaRDDs provide a single interface for efficiently working with structured data, including Apache Hive tables, parquet files and JSON files.
+    </p>
+  </div>
+  <div class="col-md-5 col-sm-5 col-padded-top col-center">
+    <div style="margin-top: 15px; text-align: left; display: inline-block;">
+      <div class="code">
+		sqlCtx.<span class="sparkop">jsonFile</span>(<span class="closure">"s3n://..."</span>)<br />&nbsp;&nbsp;.registerAsTable("json")<br />
+		schema_rdd = sqlCtx.<span class="sparkop">sql</span>(<span class="closure">"""<br />
+			&nbsp;&nbsp;SELECT * <br />
+			&nbsp;&nbsp;FROM hiveTable<br />
+			&nbsp;&nbsp;JOIN json ..."""</span>)<br />
+	  </div>
+      <div class="caption">Query and join different data sources.</div>
+    </div>
+  </div>
+</div>
+
+<div class="row row-padded">
+  <div class="col-md-7 col-sm-7">
+    <h2>Hive Compatibility</h2>
+    <p class="lead">
+      Run unmodified Hive queries on existing warehouses.
+    </p>
+    <p>
+      Spark SQL reuses the Hive frontend and metastore, giving you full compatibility with
+      existing Hive data, queries, and UDFs. Simply install it alongside Hive.
+    </p>
+  </div>
+  <div class="col-md-5 col-sm-5 col-padded-top col-center">
+    <div style="width: 100%; max-width: 323px; display: inline-block">
+      <img src="/images/sql-hive-arch.png" style="width: 100%; max-width: 323px;" />
+      <div class="caption">Spark SQL can use existing Hive metastores, SerDes, and UDFs.</div>
+    </div>
+  </div>
+</div>
+
+<div class="row row-padded">
+  <div class="col-md-7 col-sm-7">
+    <h2>Standard Connectivity</h2>
+    <p class="lead">
+      Connect through JDBC or ODBC.
+    </p>
+    <p>
+      Spark SQL includes a server mode with industry standard JDBC and ODBC connectivity.
+    </p>
+  </div>
+  <div class="col-md-5 col-sm-5 col-padded-top col-center">
+    <div style="width: 100%; max-width: 323px; display: inline-block">
+      <img src="/images/jdbc.png" style="width: 75%; max-width: 323px;" />
+      <div class="caption">Use your existing BI tools to query big data.</div>
+    </div>
+  </div>
+</div>
+
+<!--
+<div class="row row-padded">
+  <div class="col-md-7 col-sm-7">
+    <h2>Speed</h2>
+    <p class="lead">
+      Optimized to execute on Spark.
+    </p>
+    <p>
+      Spark SQL was built using the Catalyst optimizer, which automatically rewrites your queries to execute more efficiently.
+  	  By leveraging advanced techniques like runtime code generation, Spark SQL makes it easier to write lightning-fast analytic applications.
+    </p>
+  </div>
+  <div class="col-md-5 col-sm-5 col-padded-top col-center">
+    <div style="width: 100%; max-width: 272px; display: inline-block; text-align: center;">
+      <img src="/images/sqlperf.png" style="width: 100%; max-width: 250px;">
+      <div class="caption" style="min-width: 272px;">Performance comparison between Shark and Spark SQL</div>
+    </div>
+  </div>
+</div>
+-->
+
+
+  </div>
+</div>
+
+
+  
+<div class="row">
+  <div class="col-md-4 col-padded">
+    <h3>Scalability</h3>
+    <p>
+  	  Use the same engine for both interactive and long queries.		
+    </p>
+	<p>
+      Spark SQL takes advantage of the RDD model to support mid-query fault tolerance, letting it scale to large jobs too.
+	  Don't worry about using a different engine for historical data.
+    </p>
+  </div>
+
+  <div class="col-md-4 col-padded">
+    <h3>Community</h3>
+    <p>
+      Spark SQL is developed as part of Apache Spark. It thus gets
+      tested and updated with each Spark release.
+    </p>
+    <p>
+      If you have questions about the system, ask on the
+      <a href="/community.html#mailing-lists">Spark mailing lists</a>.
+    </p>
+    <p>
+      The Spark SQL developers welcome contributions. If you'd like to help out,
+      read <a href="https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark">how to
+      contribute to Spark</a>, and send us a patch!
+    </p>
+  </div>
+
+  <div class="col-md-4 col-padded">
+    <h3>Getting Started</h3>
+    <p>
+      To get started with Spark SQL:
+    </p>
+    <ul class="list-narrow">
+      <li><a href="/downloads.html">Download Spark</a>. It includes Spark SQL as a module.</li>
+      <li>Read the <a href="/docs/latest/sql-programming-guide.html">Spark SQL programming guide</a>, which includes a examples of common use cases.</li>
+    </ul>
+  </div>
+</div>
+
+<div class="row">
+  <div class="col-sm-12 col-center">
+    <a href="/downloads.html" class="btn btn-success btn-lg btn-multiline">
+      Download Spark<br /><span class="small">Includes Spark SQL</span>
+    </a>
+  </div>
+</div>
+
+
+
+
+<footer class="small">
+  <hr>
+  Apache Spark, Spark, Apache, and the Spark logo are trademarks of
+  <a href="http://www.apache.org">The Apache Software Foundation</a>.
+</footer>
+
+</div>
+
+<script src="https://code.jquery.com/jquery.js"></script>
+<script src="//netdna.bootstrapcdn.com/bootstrap/3.0.3/js/bootstrap.min.js"></script>
+<script src="/js/lang-tabs.js"></script>
+
+</body>
+</html>

Modified: spark/site/streaming/index.html
URL: http://svn.apache.org/viewvc/spark/site/streaming/index.html?rev=1607200&r1=1607199&r2=1607200&view=diff
==============================================================================
--- spark/site/streaming/index.html (original)
+++ spark/site/streaming/index.html Tue Jul  1 21:42:50 2014
@@ -87,7 +87,7 @@
           
           <li><a href="/">Apache Spark</a></li>
           
-          <li><a href="http://shark.cs.berkeley.edu">Shark (SQL)</a></li>
+          <li><a href="/sql/">Spark SQL</a></li>
           <li><a href="/streaming/">Spark Streaming</a></li>
           <li><a href="/mllib/">MLlib (machine learning)</a></li>
           <li><a href="/graphx/">GraphX (graph)</a></li>
@@ -153,7 +153,7 @@
         
         <li><a href="/">Apache Spark</a></li>
         
-        <li><a href="http://shark.cs.berkeley.edu">Shark (SQL)</a></li>
+        <li><a href="/sql/">Spark SQL</a></li>
         <li><a href="/streaming/">Spark Streaming</a></li>
         <li><a href="/mllib/">MLlib (machine learning)</a></li>
         <li><a href="/graphx/">GraphX (graph)</a></li>

Added: spark/sql/index.md
URL: http://svn.apache.org/viewvc/spark/sql/index.md?rev=1607200&view=auto
==============================================================================
--- spark/sql/index.md (added)
+++ spark/sql/index.md Tue Jul  1 21:42:50 2014
@@ -0,0 +1,172 @@
+---
+layout: global
+type: "page singular"
+title: Spark SQL
+subproject: SQL
+---
+
+
+<div class="jumbotron">
+  <b>Spark SQL</b> unifies access to structured data.
+</div>
+
+
+<div class="row row-padded">
+  <div class="col-md-7 col-sm-7">
+    <h2>Integrated</h2>
+    <p class="lead">
+	  Seemlessly mix SQL queries with Spark programs.
+    </p>
+    <p>
+	  Spark SQL lets you query structured data as a distributed dataset (RDD) in Spark, with integrated APIs in Python, Scala and Java. 
+	  This tight integration makes it easy to run SQL queries alongside complex analytic algorithms.
+    </p>
+  </div>
+  <div class="col-md-5 col-sm-5 col-padded-top col-center">
+
+    <div style="margin-top: 15px; text-align: left; display: inline-block;">
+      <div class="code">
+	    sqlCtx = new <span class="sparkop">HiveContext</span>(sc)<br/>
+		results = sqlCtx.<span class="sparkop">sql</span>(<br/>&nbsp;&nbsp;<span class="closure">"SELECT * FROM people"</span>)<br/>
+		names = results.<span class="sparkop">map</span>(<span class="closure">lambda p: p.name</span>)</br>
+	  </div>
+      <div class="caption">Apply functions to results of SQL queries.</div>
+    </div>
+  </div>
+</div>
+
+<div class="row row-padded">
+  <div class="col-md-7 col-sm-7">
+    <h2>Unified Data Access</h2>
+    <p class="lead">
+      Load and query data from a variety of sources.
+    </p>
+    <p>
+      SchemaRDDs provide a single interface for efficiently working with structured data, including Apache Hive tables, parquet files and JSON files.
+    </p>
+  </div>
+  <div class="col-md-5 col-sm-5 col-padded-top col-center">
+    <div style="margin-top: 15px; text-align: left; display: inline-block;">
+      <div class="code">
+		sqlCtx.<span class="sparkop">jsonFile</span>(<span class="closure">"s3n://..."</span>)<br/>&nbsp;&nbsp;.registerAsTable("json")<br/>
+		schema_rdd = sqlCtx.<span class="sparkop">sql</span>(<span class="closure">"""<br/>
+			&nbsp;&nbsp;SELECT * <br/>
+			&nbsp;&nbsp;FROM hiveTable<br/>
+			&nbsp;&nbsp;JOIN json ..."""</span>)<br/>
+	  </div>
+      <div class="caption">Query and join different data sources.</div>
+    </div>
+  </div>
+</div>
+
+<div class="row row-padded">
+  <div class="col-md-7 col-sm-7">
+    <h2>Hive Compatibility</h2>
+    <p class="lead">
+      Run unmodified Hive queries on existing warehouses.
+    </p>
+    <p>
+      Spark SQL reuses the Hive frontend and metastore, giving you full compatibility with
+      existing Hive data, queries, and UDFs. Simply install it alongside Hive.
+    </p>
+  </div>
+  <div class="col-md-5 col-sm-5 col-padded-top col-center">
+    <div style="width: 100%; max-width: 323px; display: inline-block">
+      <img src="{{site.url}}images/sql-hive-arch.png" style="width: 100%; max-width: 323px;">
+      <div class="caption">Spark SQL can use existing Hive metastores, SerDes, and UDFs.</div>
+    </div>
+  </div>
+</div>
+
+<div class="row row-padded">
+  <div class="col-md-7 col-sm-7">
+    <h2>Standard Connectivity</h2>
+    <p class="lead">
+      Connect through JDBC or ODBC.
+    </p>
+    <p>
+      Spark SQL includes a server mode with industry standard JDBC and ODBC connectivity.
+    </p>
+  </div>
+  <div class="col-md-5 col-sm-5 col-padded-top col-center">
+    <div style="width: 100%; max-width: 323px; display: inline-block">
+      <img src="{{site.url}}images/jdbc.png" style="width: 75%; max-width: 323px;">
+      <div class="caption">Use your existing BI tools to query big data.</div>
+    </div>
+  </div>
+</div>
+
+<!--
+<div class="row row-padded">
+  <div class="col-md-7 col-sm-7">
+    <h2>Speed</h2>
+    <p class="lead">
+      Optimized to execute on Spark.
+    </p>
+    <p>
+      Spark SQL was built using the Catalyst optimizer, which automatically rewrites your queries to execute more efficiently.
+  	  By leveraging advanced techniques like runtime code generation, Spark SQL makes it easier to write lightning-fast analytic applications.
+    </p>
+  </div>
+  <div class="col-md-5 col-sm-5 col-padded-top col-center">
+    <div style="width: 100%; max-width: 272px; display: inline-block; text-align: center;">
+      <img src="{{site.url}}images/sqlperf.png" style="width: 100%; max-width: 250px;">
+      <div class="caption" style="min-width: 272px;">Performance comparison between Shark and Spark SQL</div>
+    </div>
+  </div>
+</div>
+-->
+
+{% extra %}
+
+
+<div class="row">
+  <div class="col-md-4 col-padded">
+    <h3>Scalability</h3>
+    <p>
+  	  Use the same engine for both interactive and long queries.		
+    </p>
+	<p>
+      Spark SQL takes advantage of the RDD model to support mid-query fault tolerance, letting it scale to large jobs too.
+	  Don't worry about using a different engine for historical data.
+    </p>
+  </div>
+
+  <div class="col-md-4 col-padded">
+    <h3>Community</h3>
+    <p>
+      Spark SQL is developed as part of Apache Spark. It thus gets
+      tested and updated with each Spark release.
+    </p>
+    <p>
+      If you have questions about the system, ask on the
+      <a href="{{site.url}}community.html#mailing-lists">Spark mailing lists</a>.
+    </p>
+    <p>
+      The Spark SQL developers welcome contributions. If you'd like to help out,
+      read <a href="https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark">how to
+      contribute to Spark</a>, and send us a patch!
+    </p>
+  </div>
+
+  <div class="col-md-4 col-padded">
+    <h3>Getting Started</h3>
+    <p>
+      To get started with Spark SQL:
+    </p>
+    <ul class="list-narrow">
+      <li><a href="{{site.url}}downloads.html">Download Spark</a>. It includes Spark SQL as a module.</li>
+      <li>Read the <a href="{{site.url}}docs/latest/sql-programming-guide.html">Spark SQL programming guide</a>, which includes a examples of common use cases.</li>
+    </ul>
+  </div>
+</div>
+
+<div class="row">
+  <div class="col-sm-12 col-center">
+    <a href="{{site.url}}downloads.html" class="btn btn-success btn-lg btn-multiline">
+      Download Spark<br/><span class="small">Includes Spark SQL</span>
+    </a>
+  </div>
+</div>
+
+{% endextra %}