You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@predictionio.apache.org by do...@apache.org on 2016/10/08 04:58:21 UTC

[45/51] [abbrv] [partial] incubator-predictionio-site git commit: Documentation based on apache/incubator-predictionio#237e17acfbae7ca7eba43c61ce1c4f5498f980be

http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/02715c51/datacollection/analytics-zeppelin/index.html
----------------------------------------------------------------------
diff --git a/datacollection/analytics-zeppelin/index.html b/datacollection/analytics-zeppelin/index.html
new file mode 100644
index 0000000..0dadf68
--- /dev/null
+++ b/datacollection/analytics-zeppelin/index.html
@@ -0,0 +1,47 @@
+<!DOCTYPE html><html><head><title>Machine Learning Analytics with Zeppelin</title><meta charset="utf-8"/><meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><meta class="swiftype" name="title" data-type="string" content="Machine Learning Analytics with Zeppelin"/><link rel="canonical" href="https://docs.prediction.io/datacollection/analytics-zeppelin/"/><link href="/images/favicon/normal-b330020a.png" rel="shortcut icon"/><link href="/images/favicon/apple-c0febcf2.png" rel="apple-touch-icon"/><link href="//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800" rel="stylesheet"/><link href="//maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" rel="stylesheet"/><link href="/stylesheets/application-a2a2f408.css" rel="stylesheet" type="text/css"/><script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.2/html5shiv.min.js"></scri
 pt><script src="//cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script><script src="//use.typekit.net/pqo0itb.js"></script><script>try{Typekit.load({ async: true });}catch(e){}</script></head><body><div id="global"><header><div class="container" id="header-wrapper"><div class="row"><div class="col-sm-12"><div id="logo-wrapper"><span id="drawer-toggle"></span><a href="#"></a><a href="http://predictionio.incubator.apache.org/"><img alt="PredictionIO" id="logo" src="/images/logos/logo-ee2b9bb3.png"/></a></div><div id="menu-wrapper"><div id="pill-wrapper"><a class="pill left" href="/gallery/template-gallery">TEMPLATES</a> <a class="pill right" href="//github.com/apache/incubator-predictionio/">OPEN SOURCE</a></div></div><img class="mobile-search-bar-toggler hidden-md hidden-lg" src="/images/icons/search-glass-704bd4ff.png"/></div></div></div></header><div id="search-bar-row-wrapper"><div class="container-fluid" id="search-bar-row"><div class="row"><div class=
 "col-md-9 col-sm-11 col-xs-11"><div class="hidden-md hidden-lg" id="mobile-page-heading-wrapper"><p>PredictionIO Docs</p><h4>Machine Learning Analytics with Zeppelin</h4></div><h4 class="hidden-sm hidden-xs">PredictionIO Docs</h4></div><div class="col-md-3 col-sm-1 col-xs-1 hidden-md hidden-lg"><img id="left-menu-indicator" src="/images/icons/down-arrow-dfe9f7fe.png"/></div><div class="col-md-3 col-sm-12 col-xs-12 swiftype-wrapper"><div class="swiftype"><form class="search-form"><img class="search-box-toggler hidden-xs hidden-sm" src="/images/icons/search-glass-704bd4ff.png"/><div class="search-box"><img src="/images/icons/search-glass-704bd4ff.png"/><input type="text" id="st-search-input" class="st-search-input" placeholder="Search Doc..."/></div><img class="swiftype-row-hider hidden-md hidden-lg" src="/images/icons/drawer-toggle-active-fcbef12a.png"/></form></div></div><div class="mobile-left-menu-toggler hidden-md hidden-lg"></div></div></div></div><div id="page" class="container
 -fluid"><div class="row"><div id="left-menu-wrapper" class="col-md-3"><nav id="nav-main"><ul><li class="level-1"><a class="expandible" href="/"><span>Apache PredictionIO (incubating) Documentation</span></a><ul><li class="level-2"><a class="final" href="/"><span>Welcome to Apache PredictionIO (incubating)</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Started</span></a><ul><li class="level-2"><a class="final" href="/start/"><span>A Quick Intro</span></a></li><li class="level-2"><a class="final" href="/install/"><span>Installing Apache PredictionIO (incubating)</span></a></li><li class="level-2"><a class="final" href="/start/download/"><span>Downloading an Engine Template</span></a></li><li class="level-2"><a class="final" href="/start/deploy/"><span>Deploying Your First Engine</span></a></li><li class="level-2"><a class="final" href="/start/customize/"><span>Customizing the Engine</span></a></li></ul></li><li class="level-1"><a class="expan
 dible" href="#"><span>Integrating with Your App</span></a><ul><li class="level-2"><a class="final" href="/appintegration/"><span>App Integration Overview</span></a></li><li class="level-2"><a class="expandible" href="/sdk/"><span>List of SDKs</span></a><ul><li class="level-3"><a class="final" href="/sdk/java/"><span>Java & Android SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/php/"><span>PHP SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/python/"><span>Python SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/ruby/"><span>Ruby SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/community/"><span>Community Powered SDKs</span></a></li></ul></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Deploying an Engine</span></a><ul><li class="level-2"><a class="final" href="/deploy/"><span>Deploying as a Web Service</span></a></li><li class="level-2"><a class="final" href="/cli/#engine-commands"><span>
 Engine Command-line Interface</span></a></li><li class="level-2"><a class="final" href="/deploy/monitoring/"><span>Monitoring Engine</span></a></li><li class="level-2"><a class="final" href="/deploy/engineparams/"><span>Setting Engine Parameters</span></a></li><li class="level-2"><a class="final" href="/deploy/enginevariants/"><span>Deploying Multiple Engine Variants</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Customizing an Engine</span></a><ul><li class="level-2"><a class="final" href="/customize/"><span>Learning DASE</span></a></li><li class="level-2"><a class="final" href="/customize/dase/"><span>Implement DASE</span></a></li><li class="level-2"><a class="final" href="/customize/troubleshooting/"><span>Troubleshooting Engine Development</span></a></li><li class="level-2"><a class="final" href="/api/current/#package"><span>Engine Scala APIs</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Collecting and Analyz
 ing Data</span></a><ul><li class="level-2"><a class="final" href="/datacollection/"><span>Event Server Overview</span></a></li><li class="level-2"><a class="final" href="/cli/#event-server-commands"><span>Event Server Command-line Interface</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventapi/"><span>Collecting Data with REST/SDKs</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventmodel/"><span>Events Modeling</span></a></li><li class="level-2"><a class="final" href="/datacollection/webhooks/"><span>Unifying Multichannel Data with Webhooks</span></a></li><li class="level-2"><a class="final" href="/datacollection/channel/"><span>Channel</span></a></li><li class="level-2"><a class="final" href="/datacollection/batchimport/"><span>Importing Data in Batch</span></a></li><li class="level-2"><a class="final" href="/datacollection/analytics/"><span>Using Analytics Tools</span></a></li></ul></li><li class="level-1"><a class="expandib
 le" href="#"><span>Choosing an Algorithm(s)</span></a><ul><li class="level-2"><a class="final" href="/algorithm/"><span>Built-in Algorithm Libraries</span></a></li><li class="level-2"><a class="final" href="/algorithm/switch/"><span>Switching to Another Algorithm</span></a></li><li class="level-2"><a class="final" href="/algorithm/multiple/"><span>Combining Multiple Algorithms</span></a></li><li class="level-2"><a class="final" href="/algorithm/custom/"><span>Adding Your Own Algorithms</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>ML Tuning and Evaluation</span></a><ul><li class="level-2"><a class="final" href="/evaluation/"><span>Overview</span></a></li><li class="level-2"><a class="final" href="/evaluation/paramtuning/"><span>Hyperparameter Tuning</span></a></li><li class="level-2"><a class="final" href="/evaluation/evaluationdashboard/"><span>Evaluation Dashboard</span></a></li><li class="level-2"><a class="final" href="/evaluation/metricchoose
 /"><span>Choosing Evaluation Metrics</span></a></li><li class="level-2"><a class="final" href="/evaluation/metricbuild/"><span>Building Evaluation Metrics</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>System Architecture</span></a><ul><li class="level-2"><a class="final" href="/system/"><span>Architecture Overview</span></a></li><li class="level-2"><a class="final" href="/system/anotherdatastore/"><span>Using Another Data Store</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Engine Template Gallery</span></a><ul><li class="level-2"><a class="final" href="/gallery/template-gallery/"><span>Browse</span></a></li><li class="level-2"><a class="final" href="/community/submit-template/"><span>Submit your Engine as a Template</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Demo Tutorials</span></a><ul><li class="level-2"><a class="final" href="/demo/tapster/"><span>Comics Recommendation D
 emo</span></a></li><li class="level-2"><a class="final" href="/demo/community/"><span>Community Contributed Demo</span></a></li><li class="level-2"><a class="final" href="/demo/textclassification/"><span>Text Classification Engine Tutorial</span></a></li></ul></li><li class="level-1"><a class="expandible" href="/community/"><span>Getting Involved</span></a><ul><li class="level-2"><a class="final" href="/community/contribute-code/"><span>Contribute Code</span></a></li><li class="level-2"><a class="final" href="/community/contribute-documentation/"><span>Contribute Documentation</span></a></li><li class="level-2"><a class="final" href="/community/contribute-sdk/"><span>Contribute a SDK</span></a></li><li class="level-2"><a class="final" href="/community/contribute-webhook/"><span>Contribute a Webhook</span></a></li><li class="level-2"><a class="final" href="/community/projects/"><span>Community Projects</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>
 Getting Help</span></a><ul><li class="level-2"><a class="final" href="/resources/faq/"><span>FAQs</span></a></li><li class="level-2"><a class="final" href="/support/"><span>Support</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Resources</span></a><ul><li class="level-2"><a class="final" href="/resources/intellij/"><span>Developing Engines with IntelliJ IDEA</span></a></li><li class="level-2"><a class="final" href="/resources/upgrade/"><span>Upgrade Instructions</span></a></li><li class="level-2"><a class="final" href="/resources/glossary/"><span>Glossary</span></a></li></ul></li></ul></nav></div><div class="col-md-9 col-sm-12"><div class="content-header hidden-md hidden-lg"><div id="page-title"><h1>Machine Learning Analytics with Zeppelin</h1></div></div><div id="table-of-content-wrapper"><h5>On this page</h5><aside id="table-of-contents"><ul> <li> <a href="#prerequisites">Prerequisites</a> </li> <li> <a href="#building-zeppelin-for-apache-spark-1
 -2">Building Zeppelin for Apache Spark 1.2+</a> </li> <li> <a href="#preparing-zeppelin">Preparing Zeppelin</a> </li> <li> <a href="#performing-analysis-with-zeppelin">Performing Analysis with Zeppelin</a> </li> </ul> </aside><hr/><a id="edit-page-link" href="https://github.com/apache/incubator-predictionio/tree/livedoc/docs/manual/source/datacollection/analytics-zeppelin.html.md.erb"><img src="/images/icons/edit-pencil-d6c1bb3d.png"/>Edit this page</a></div><div class="content-header hidden-sm hidden-xs"><div id="page-title"><h1>Machine Learning Analytics with Zeppelin</h1></div></div><div class="content"><p><a href="http://zeppelin-project.org/">Apache Zeppelin</a> is an interactive computational environment built on Apache Spark like the IPython Notebook. With <a href="http://predictionio.incubator.apache.org">Apache PredictionIO (incubating)</a> and <a href="https://spark.apache.org/sql/">Spark SQL</a>, you can easily analyze your collected events when you are developing or tuni
 ng your engine.</p><h2 id='prerequisites' class='header-anchors'>Prerequisites</h2><p>The following instructions assume that you have the command <code>sbt</code> accessible in your shell&#39;s search path. Alternatively, you can use the <code>sbt</code> command that comes with Apache PredictionIO (incubating) at <code>$PIO_HOME/sbt/sbt</code>.</p><p><h2 id='export-events-to-apache-parquet' class='header-anchors'>Export Events to Apache Parquet</h2><p>PredictionIO supports exporting your events to <a href="http://parquet.incubator.apache.org/">Apache Parquet</a>, a columnar storage format that allows you to query quickly.</p><p>Let&#39;s export the data we imported in <a href="/templates/recommendation/quickstart/#import-sample-data">Recommendation Engine Template Quick Start</a>, and assume the App ID is 1.</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1</pre></td><td class="code"><pre
 ><span class="gp">$ </span><span class="nv">$PIO_HOME</span>/bin/pio <span class="nb">export</span> --appid 1 --output /tmp/movies --format parquet
+</pre></td></tr></tbody></table> </div> <p>After the command has finished successfully, you should see something similar to the following.</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11</pre></td><td class="code"><pre>root
+ |-- creationTime: string <span class="o">(</span>nullable <span class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+ |-- entityId: string <span class="o">(</span>nullable <span class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+ |-- entityType: string <span class="o">(</span>nullable <span class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+ |-- event: string <span class="o">(</span>nullable <span class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+ |-- eventId: string <span class="o">(</span>nullable <span class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+ |-- eventTime: string <span class="o">(</span>nullable <span class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+ |-- properties: struct <span class="o">(</span>nullable <span class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+ |    |-- rating: double <span class="o">(</span>nullable <span class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+ |-- targetEntityId: string <span class="o">(</span>nullable <span class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+ |-- targetEntityType: string <span class="o">(</span>nullable <span class="o">=</span> <span class="nb">true</span><span class="o">)</span>
+</pre></td></tr></tbody></table> </div></p><h2 id='building-zeppelin-for-apache-spark-1.2+' class='header-anchors'>Building Zeppelin for Apache Spark 1.2+</h2><p>Start by cloning Zeppelin.</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1</pre></td><td class="code"><pre><span class="gp">$ </span>git clone https://github.com/apache/incubator-zeppelin.git
+</pre></td></tr></tbody></table> </div> <p>Build Zeppelin with Hadoop 2.4 and Spark 1.2 profiles.</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
+2</pre></td><td class="code"><pre><span class="gp">$ </span><span class="nb">cd </span>zeppelin
+<span class="gp">$ </span>mvn clean package -Pspark-1.2 -Dhadoop.version<span class="o">=</span>2.4.0 -Phadoop-2.4 -DskipTests
+</pre></td></tr></tbody></table> </div> <p>Now you should have working Zeppelin binaries.</p><h2 id='preparing-zeppelin' class='header-anchors'>Preparing Zeppelin</h2><p>First, start Zeppelin.</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1</pre></td><td class="code"><pre><span class="gp">$ </span>bin/zeppelin-daemon.sh start
+</pre></td></tr></tbody></table> </div> <p>By default, you should be able to access Zeppelin via web browser at <a href="http://localhost:8080">http://localhost:8080</a>. Create a new notebook and put the following in the first cell.</p><div class="highlight scala"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1</pre></td><td class="code"><pre><span class="n">sqlc</span><span class="o">.</span><span class="n">parquetFile</span><span class="o">(</span><span class="s">"/tmp/movies"</span><span class="o">).</span><span class="n">registerTempTable</span><span class="o">(</span><span class="s">"events"</span><span class="o">)</span>
+</pre></td></tr></tbody></table> </div> <p><img alt="Preparing Zeppelin" src="/images/datacollection/zeppelin-01-1c476081.png"/></p><h2 id='performing-analysis-with-zeppelin' class='header-anchors'>Performing Analysis with Zeppelin</h2><p>If all steps above ran successfully, you should have a ready-to-use analytics environment by now. Let&#39;s try a few examples to see if everything is functional.</p><p>In the second cell, put in this piece of code and run it.</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
+2
+3</pre></td><td class="code"><pre>%sql
+SELECT entityType, event, targetEntityType, COUNT<span class="o">(</span><span class="k">*</span><span class="o">)</span> AS c FROM events
+GROUP BY entityType, event, targetEntityType
+</pre></td></tr></tbody></table> </div> <p><img alt="Summary of Events" src="/images/datacollection/zeppelin-02-b3687a33.png"/></p><p>We can also easily plot a pie chart.</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
+2</pre></td><td class="code"><pre>%sql
+SELECT event, COUNT<span class="o">(</span><span class="k">*</span><span class="o">)</span> AS c FROM events GROUP BY event
+</pre></td></tr></tbody></table> </div> <p><img alt="Summary of Event in Pie Chart" src="/images/datacollection/zeppelin-03-15dcefee.png"/></p><p>And see a breakdown of rating values.</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
+2
+3</pre></td><td class="code"><pre>%sql
+SELECT properties.rating AS r, COUNT<span class="o">(</span><span class="k">*</span><span class="o">)</span> AS c FROM events
+WHERE properties.rating IS NOT NULL GROUP BY properties.rating ORDER BY r
+</pre></td></tr></tbody></table> </div> <p><img alt="Breakdown of Rating Values" src="/images/datacollection/zeppelin-04-d646c299.png"/></p><p>Happy analyzing!</p></div></div></div></div><footer><div class="container"><div class="seperator"></div><div class="row"><div class="col-md-6 col-xs-6 footer-link-column"><div class="footer-link-column-row"><h4>Community</h4><ul><li><a href="//docs.prediction.io/install/" target="blank">Download</a></li><li><a href="//docs.prediction.io/" target="blank">Docs</a></li><li><a href="//github.com/apache/incubator-predictionio" target="blank">GitHub</a></li><li><a href="mailto:user-subscribe@predictionio.incubator.apache.org" target="blank">Subscribe to User Mailing List</a></li><li><a href="//stackoverflow.com/questions/tagged/predictionio" target="blank">Stackoverflow</a></li></ul></div></div><div class="col-md-6 col-xs-6 footer-link-column"><div class="footer-link-column-row"><h4>Contribute</h4><ul><li><a href="//predictionio.incubator.apache.or
 g/community/contribute-code/" target="blank">Contribute</a></li><li><a href="//github.com/apache/incubator-predictionio" target="blank">Source Code</a></li><li><a href="//issues.apache.org/jira/browse/PIO" target="blank">Bug Tracker</a></li><li><a href="mailto:dev-subscribe@predictionio.incubator.apache.org" target="blank">Subscribe to Development Mailing List</a></li></ul></div></div></div></div><div id="footer-bottom"><div class="container"><div class="row"><div class="col-md-12"><div id="footer-logo-wrapper"><img alt="PredictionIO" src="/images/logos/logo-white-d1e9c6e6.png"/></div><div id="social-icons-wrapper"><a class="github-button" href="https://github.com/apache/incubator-predictionio" data-style="mega" data-count-href="/apache/incubator-predictionio/stargazers" data-count-api="/repos/apache/incubator-predictionio#stargazers_count" data-count-aria-label="# stargazers on GitHub" aria-label="Star apache/incubator-predictionio on GitHub">Star</a> <a class="github-button" href=
 "https://github.com/apache/incubator-predictionio/fork" data-icon="octicon-git-branch" data-style="mega" data-count-href="/apache/incubator-predictionio/network" data-count-api="/repos/apache/incubator-predictionio#forks_count" data-count-aria-label="# forks on GitHub" aria-label="Fork apache/incubator-predictionio on GitHub">Fork</a> <script id="github-bjs" async="" defer="" src="https://buttons.github.io/buttons.js"></script><a href="//www.facebook.com/predictionio" target="blank"><img alt="PredictionIO on Twitter" src="/images/icons/twitter-ea9dc152.png"/></a> <a href="//twitter.com/predictionio" target="blank"><img alt="PredictionIO on Facebook" src="/images/icons/facebook-5c57939c.png"/></a> </div></div></div></div></div></footer></div><script>(function(w,d,t,u,n,s,e){w['SwiftypeObject']=n;w[n]=w[n]||function(){
+(w[n].q=w[n].q||[]).push(arguments);};s=d.createElement(t);
+e=d.getElementsByTagName(t)[0];s.async=1;s.src=u;e.parentNode.insertBefore(s,e);
+})(window,document,'script','//s.swiftypecdn.com/install/v1/st.js','_st');
+
+_st('install','HaUfpXXV87xoB_zzCQ45');</script><script src="/javascripts/application-280db181.js"></script></body></html>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/02715c51/datacollection/analytics-zeppelin/index.html.gz
----------------------------------------------------------------------
diff --git a/datacollection/analytics-zeppelin/index.html.gz b/datacollection/analytics-zeppelin/index.html.gz
new file mode 100644
index 0000000..f479740
Binary files /dev/null and b/datacollection/analytics-zeppelin/index.html.gz differ

http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/02715c51/datacollection/analytics/index.html
----------------------------------------------------------------------
diff --git a/datacollection/analytics/index.html b/datacollection/analytics/index.html
new file mode 100644
index 0000000..127e1f2
--- /dev/null
+++ b/datacollection/analytics/index.html
@@ -0,0 +1,6 @@
+<!DOCTYPE html><html><head><title>Using Analytics Tools</title><meta charset="utf-8"/><meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><meta class="swiftype" name="title" data-type="string" content="Using Analytics Tools"/><link rel="canonical" href="https://docs.prediction.io/datacollection/analytics/"/><link href="/images/favicon/normal-b330020a.png" rel="shortcut icon"/><link href="/images/favicon/apple-c0febcf2.png" rel="apple-touch-icon"/><link href="//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800" rel="stylesheet"/><link href="//maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" rel="stylesheet"/><link href="/stylesheets/application-a2a2f408.css" rel="stylesheet" type="text/css"/><script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.2/html5shiv.min.js"></script><script src="//cdn.mathjax.org/mathjax/lates
 t/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script><script src="//use.typekit.net/pqo0itb.js"></script><script>try{Typekit.load({ async: true });}catch(e){}</script></head><body><div id="global"><header><div class="container" id="header-wrapper"><div class="row"><div class="col-sm-12"><div id="logo-wrapper"><span id="drawer-toggle"></span><a href="#"></a><a href="http://predictionio.incubator.apache.org/"><img alt="PredictionIO" id="logo" src="/images/logos/logo-ee2b9bb3.png"/></a></div><div id="menu-wrapper"><div id="pill-wrapper"><a class="pill left" href="/gallery/template-gallery">TEMPLATES</a> <a class="pill right" href="//github.com/apache/incubator-predictionio/">OPEN SOURCE</a></div></div><img class="mobile-search-bar-toggler hidden-md hidden-lg" src="/images/icons/search-glass-704bd4ff.png"/></div></div></div></header><div id="search-bar-row-wrapper"><div class="container-fluid" id="search-bar-row"><div class="row"><div class="col-md-9 col-sm-11 col-xs-11"><div class="hidd
 en-md hidden-lg" id="mobile-page-heading-wrapper"><p>PredictionIO Docs</p><h4>Using Analytics Tools</h4></div><h4 class="hidden-sm hidden-xs">PredictionIO Docs</h4></div><div class="col-md-3 col-sm-1 col-xs-1 hidden-md hidden-lg"><img id="left-menu-indicator" src="/images/icons/down-arrow-dfe9f7fe.png"/></div><div class="col-md-3 col-sm-12 col-xs-12 swiftype-wrapper"><div class="swiftype"><form class="search-form"><img class="search-box-toggler hidden-xs hidden-sm" src="/images/icons/search-glass-704bd4ff.png"/><div class="search-box"><img src="/images/icons/search-glass-704bd4ff.png"/><input type="text" id="st-search-input" class="st-search-input" placeholder="Search Doc..."/></div><img class="swiftype-row-hider hidden-md hidden-lg" src="/images/icons/drawer-toggle-active-fcbef12a.png"/></form></div></div><div class="mobile-left-menu-toggler hidden-md hidden-lg"></div></div></div></div><div id="page" class="container-fluid"><div class="row"><div id="left-menu-wrapper" class="col-md
 -3"><nav id="nav-main"><ul><li class="level-1"><a class="expandible" href="/"><span>Apache PredictionIO (incubating) Documentation</span></a><ul><li class="level-2"><a class="final" href="/"><span>Welcome to Apache PredictionIO (incubating)</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Started</span></a><ul><li class="level-2"><a class="final" href="/start/"><span>A Quick Intro</span></a></li><li class="level-2"><a class="final" href="/install/"><span>Installing Apache PredictionIO (incubating)</span></a></li><li class="level-2"><a class="final" href="/start/download/"><span>Downloading an Engine Template</span></a></li><li class="level-2"><a class="final" href="/start/deploy/"><span>Deploying Your First Engine</span></a></li><li class="level-2"><a class="final" href="/start/customize/"><span>Customizing the Engine</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Integrating with Your App</span></a><ul><li 
 class="level-2"><a class="final" href="/appintegration/"><span>App Integration Overview</span></a></li><li class="level-2"><a class="expandible" href="/sdk/"><span>List of SDKs</span></a><ul><li class="level-3"><a class="final" href="/sdk/java/"><span>Java & Android SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/php/"><span>PHP SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/python/"><span>Python SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/ruby/"><span>Ruby SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/community/"><span>Community Powered SDKs</span></a></li></ul></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Deploying an Engine</span></a><ul><li class="level-2"><a class="final" href="/deploy/"><span>Deploying as a Web Service</span></a></li><li class="level-2"><a class="final" href="/cli/#engine-commands"><span>Engine Command-line Interface</span></a></li><li class="level-2"><
 a class="final" href="/deploy/monitoring/"><span>Monitoring Engine</span></a></li><li class="level-2"><a class="final" href="/deploy/engineparams/"><span>Setting Engine Parameters</span></a></li><li class="level-2"><a class="final" href="/deploy/enginevariants/"><span>Deploying Multiple Engine Variants</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Customizing an Engine</span></a><ul><li class="level-2"><a class="final" href="/customize/"><span>Learning DASE</span></a></li><li class="level-2"><a class="final" href="/customize/dase/"><span>Implement DASE</span></a></li><li class="level-2"><a class="final" href="/customize/troubleshooting/"><span>Troubleshooting Engine Development</span></a></li><li class="level-2"><a class="final" href="/api/current/#package"><span>Engine Scala APIs</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Collecting and Analyzing Data</span></a><ul><li class="level-2"><a class="final" href="
 /datacollection/"><span>Event Server Overview</span></a></li><li class="level-2"><a class="final" href="/cli/#event-server-commands"><span>Event Server Command-line Interface</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventapi/"><span>Collecting Data with REST/SDKs</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventmodel/"><span>Events Modeling</span></a></li><li class="level-2"><a class="final" href="/datacollection/webhooks/"><span>Unifying Multichannel Data with Webhooks</span></a></li><li class="level-2"><a class="final" href="/datacollection/channel/"><span>Channel</span></a></li><li class="level-2"><a class="final" href="/datacollection/batchimport/"><span>Importing Data in Batch</span></a></li><li class="level-2"><a class="final active" href="/datacollection/analytics/"><span>Using Analytics Tools</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Choosing an Algorithm(s)</span></a><ul><
 li class="level-2"><a class="final" href="/algorithm/"><span>Built-in Algorithm Libraries</span></a></li><li class="level-2"><a class="final" href="/algorithm/switch/"><span>Switching to Another Algorithm</span></a></li><li class="level-2"><a class="final" href="/algorithm/multiple/"><span>Combining Multiple Algorithms</span></a></li><li class="level-2"><a class="final" href="/algorithm/custom/"><span>Adding Your Own Algorithms</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>ML Tuning and Evaluation</span></a><ul><li class="level-2"><a class="final" href="/evaluation/"><span>Overview</span></a></li><li class="level-2"><a class="final" href="/evaluation/paramtuning/"><span>Hyperparameter Tuning</span></a></li><li class="level-2"><a class="final" href="/evaluation/evaluationdashboard/"><span>Evaluation Dashboard</span></a></li><li class="level-2"><a class="final" href="/evaluation/metricchoose/"><span>Choosing Evaluation Metrics</span></a></li><li cla
 ss="level-2"><a class="final" href="/evaluation/metricbuild/"><span>Building Evaluation Metrics</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>System Architecture</span></a><ul><li class="level-2"><a class="final" href="/system/"><span>Architecture Overview</span></a></li><li class="level-2"><a class="final" href="/system/anotherdatastore/"><span>Using Another Data Store</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Engine Template Gallery</span></a><ul><li class="level-2"><a class="final" href="/gallery/template-gallery/"><span>Browse</span></a></li><li class="level-2"><a class="final" href="/community/submit-template/"><span>Submit your Engine as a Template</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Demo Tutorials</span></a><ul><li class="level-2"><a class="final" href="/demo/tapster/"><span>Comics Recommendation Demo</span></a></li><li class="level-2"><a class="final" hre
 f="/demo/community/"><span>Community Contributed Demo</span></a></li><li class="level-2"><a class="final" href="/demo/textclassification/"><span>Text Classification Engine Tutorial</span></a></li></ul></li><li class="level-1"><a class="expandible" href="/community/"><span>Getting Involved</span></a><ul><li class="level-2"><a class="final" href="/community/contribute-code/"><span>Contribute Code</span></a></li><li class="level-2"><a class="final" href="/community/contribute-documentation/"><span>Contribute Documentation</span></a></li><li class="level-2"><a class="final" href="/community/contribute-sdk/"><span>Contribute a SDK</span></a></li><li class="level-2"><a class="final" href="/community/contribute-webhook/"><span>Contribute a Webhook</span></a></li><li class="level-2"><a class="final" href="/community/projects/"><span>Community Projects</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Help</span></a><ul><li class="level-2"><a class="fi
 nal" href="/resources/faq/"><span>FAQs</span></a></li><li class="level-2"><a class="final" href="/support/"><span>Support</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Resources</span></a><ul><li class="level-2"><a class="final" href="/resources/intellij/"><span>Developing Engines with IntelliJ IDEA</span></a></li><li class="level-2"><a class="final" href="/resources/upgrade/"><span>Upgrade Instructions</span></a></li><li class="level-2"><a class="final" href="/resources/glossary/"><span>Glossary</span></a></li></ul></li></ul></nav></div><div class="col-md-9 col-sm-12"><div class="content-header hidden-md hidden-lg"><div id="breadcrumbs" class="hidden-sm hidden xs"><ul><li><a href="#">Collecting and Analyzing Data</a><span class="spacer">&gt;</span></li><li><span class="last">Using Analytics Tools</span></li></ul></div><div id="page-title"><h1>Using Analytics Tools</h1></div></div><div id="table-of-content-wrapper"><a id="edit-page-link" href="htt
 ps://github.com/apache/incubator-predictionio/tree/livedoc/docs/manual/source/datacollection/analytics.html.md"><img src="/images/icons/edit-pencil-d6c1bb3d.png"/>Edit this page</a></div><div class="content-header hidden-sm hidden-xs"><div id="breadcrumbs" class="hidden-sm hidden xs"><ul><li><a href="#">Collecting and Analyzing Data</a><span class="spacer">&gt;</span></li><li><span class="last">Using Analytics Tools</span></li></ul></div><div id="page-title"><h1>Using Analytics Tools</h1></div></div><div class="content"><p>Event Server collects and unifies data for your application from multiple channels.</p><p>Data can be exported to Apache parquet format with <code>pio export</code> for fast analysis. The following analytics tools are currently supported:</p> <ol> <li><p><a href="/datacollection/analytics-ipynb/">IPython Notebook</a></p></li> <li><p><a href="/datacollection/analytics-tableau/">Tableau</a></p></li> <li><p><a href="/datacollection/analytics-zeppelin/">Zeppelin</a></
 p></li> </ol> </div></div></div></div><footer><div class="container"><div class="seperator"></div><div class="row"><div class="col-md-6 col-xs-6 footer-link-column"><div class="footer-link-column-row"><h4>Community</h4><ul><li><a href="//docs.prediction.io/install/" target="blank">Download</a></li><li><a href="//docs.prediction.io/" target="blank">Docs</a></li><li><a href="//github.com/apache/incubator-predictionio" target="blank">GitHub</a></li><li><a href="mailto:user-subscribe@predictionio.incubator.apache.org" target="blank">Subscribe to User Mailing List</a></li><li><a href="//stackoverflow.com/questions/tagged/predictionio" target="blank">Stackoverflow</a></li></ul></div></div><div class="col-md-6 col-xs-6 footer-link-column"><div class="footer-link-column-row"><h4>Contribute</h4><ul><li><a href="//predictionio.incubator.apache.org/community/contribute-code/" target="blank">Contribute</a></li><li><a href="//github.com/apache/incubator-predictionio" target="blank">Source Code</
 a></li><li><a href="//issues.apache.org/jira/browse/PIO" target="blank">Bug Tracker</a></li><li><a href="mailto:dev-subscribe@predictionio.incubator.apache.org" target="blank">Subscribe to Development Mailing List</a></li></ul></div></div></div></div><div id="footer-bottom"><div class="container"><div class="row"><div class="col-md-12"><div id="footer-logo-wrapper"><img alt="PredictionIO" src="/images/logos/logo-white-d1e9c6e6.png"/></div><div id="social-icons-wrapper"><a class="github-button" href="https://github.com/apache/incubator-predictionio" data-style="mega" data-count-href="/apache/incubator-predictionio/stargazers" data-count-api="/repos/apache/incubator-predictionio#stargazers_count" data-count-aria-label="# stargazers on GitHub" aria-label="Star apache/incubator-predictionio on GitHub">Star</a> <a class="github-button" href="https://github.com/apache/incubator-predictionio/fork" data-icon="octicon-git-branch" data-style="mega" data-count-href="/apache/incubator-predictio
 nio/network" data-count-api="/repos/apache/incubator-predictionio#forks_count" data-count-aria-label="# forks on GitHub" aria-label="Fork apache/incubator-predictionio on GitHub">Fork</a> <script id="github-bjs" async="" defer="" src="https://buttons.github.io/buttons.js"></script><a href="//www.facebook.com/predictionio" target="blank"><img alt="PredictionIO on Twitter" src="/images/icons/twitter-ea9dc152.png"/></a> <a href="//twitter.com/predictionio" target="blank"><img alt="PredictionIO on Facebook" src="/images/icons/facebook-5c57939c.png"/></a> </div></div></div></div></div></footer></div><script>(function(w,d,t,u,n,s,e){w['SwiftypeObject']=n;w[n]=w[n]||function(){
+(w[n].q=w[n].q||[]).push(arguments);};s=d.createElement(t);
+e=d.getElementsByTagName(t)[0];s.async=1;s.src=u;e.parentNode.insertBefore(s,e);
+})(window,document,'script','//s.swiftypecdn.com/install/v1/st.js','_st');
+
+_st('install','HaUfpXXV87xoB_zzCQ45');</script><script src="/javascripts/application-280db181.js"></script></body></html>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/02715c51/datacollection/analytics/index.html.gz
----------------------------------------------------------------------
diff --git a/datacollection/analytics/index.html.gz b/datacollection/analytics/index.html.gz
new file mode 100644
index 0000000..e139a41
Binary files /dev/null and b/datacollection/analytics/index.html.gz differ

http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/02715c51/datacollection/batchimport/index.html
----------------------------------------------------------------------
diff --git a/datacollection/batchimport/index.html b/datacollection/batchimport/index.html
new file mode 100644
index 0000000..0a5b7ef
--- /dev/null
+++ b/datacollection/batchimport/index.html
@@ -0,0 +1,68 @@
+<!DOCTYPE html><html><head><title>Importing Data in Batch</title><meta charset="utf-8"/><meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><meta class="swiftype" name="title" data-type="string" content="Importing Data in Batch"/><link rel="canonical" href="https://docs.prediction.io/datacollection/batchimport/"/><link href="/images/favicon/normal-b330020a.png" rel="shortcut icon"/><link href="/images/favicon/apple-c0febcf2.png" rel="apple-touch-icon"/><link href="//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800" rel="stylesheet"/><link href="//maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" rel="stylesheet"/><link href="/stylesheets/application-a2a2f408.css" rel="stylesheet" type="text/css"/><script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.2/html5shiv.min.js"></script><script src="//cdn.mathjax.org/mathjax
 /latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script><script src="//use.typekit.net/pqo0itb.js"></script><script>try{Typekit.load({ async: true });}catch(e){}</script></head><body><div id="global"><header><div class="container" id="header-wrapper"><div class="row"><div class="col-sm-12"><div id="logo-wrapper"><span id="drawer-toggle"></span><a href="#"></a><a href="http://predictionio.incubator.apache.org/"><img alt="PredictionIO" id="logo" src="/images/logos/logo-ee2b9bb3.png"/></a></div><div id="menu-wrapper"><div id="pill-wrapper"><a class="pill left" href="/gallery/template-gallery">TEMPLATES</a> <a class="pill right" href="//github.com/apache/incubator-predictionio/">OPEN SOURCE</a></div></div><img class="mobile-search-bar-toggler hidden-md hidden-lg" src="/images/icons/search-glass-704bd4ff.png"/></div></div></div></header><div id="search-bar-row-wrapper"><div class="container-fluid" id="search-bar-row"><div class="row"><div class="col-md-9 col-sm-11 col-xs-11"><div class
 ="hidden-md hidden-lg" id="mobile-page-heading-wrapper"><p>PredictionIO Docs</p><h4>Importing Data in Batch</h4></div><h4 class="hidden-sm hidden-xs">PredictionIO Docs</h4></div><div class="col-md-3 col-sm-1 col-xs-1 hidden-md hidden-lg"><img id="left-menu-indicator" src="/images/icons/down-arrow-dfe9f7fe.png"/></div><div class="col-md-3 col-sm-12 col-xs-12 swiftype-wrapper"><div class="swiftype"><form class="search-form"><img class="search-box-toggler hidden-xs hidden-sm" src="/images/icons/search-glass-704bd4ff.png"/><div class="search-box"><img src="/images/icons/search-glass-704bd4ff.png"/><input type="text" id="st-search-input" class="st-search-input" placeholder="Search Doc..."/></div><img class="swiftype-row-hider hidden-md hidden-lg" src="/images/icons/drawer-toggle-active-fcbef12a.png"/></form></div></div><div class="mobile-left-menu-toggler hidden-md hidden-lg"></div></div></div></div><div id="page" class="container-fluid"><div class="row"><div id="left-menu-wrapper" class
 ="col-md-3"><nav id="nav-main"><ul><li class="level-1"><a class="expandible" href="/"><span>Apache PredictionIO (incubating) Documentation</span></a><ul><li class="level-2"><a class="final" href="/"><span>Welcome to Apache PredictionIO (incubating)</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Started</span></a><ul><li class="level-2"><a class="final" href="/start/"><span>A Quick Intro</span></a></li><li class="level-2"><a class="final" href="/install/"><span>Installing Apache PredictionIO (incubating)</span></a></li><li class="level-2"><a class="final" href="/start/download/"><span>Downloading an Engine Template</span></a></li><li class="level-2"><a class="final" href="/start/deploy/"><span>Deploying Your First Engine</span></a></li><li class="level-2"><a class="final" href="/start/customize/"><span>Customizing the Engine</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Integrating with Your App</span></a>
 <ul><li class="level-2"><a class="final" href="/appintegration/"><span>App Integration Overview</span></a></li><li class="level-2"><a class="expandible" href="/sdk/"><span>List of SDKs</span></a><ul><li class="level-3"><a class="final" href="/sdk/java/"><span>Java & Android SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/php/"><span>PHP SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/python/"><span>Python SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/ruby/"><span>Ruby SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/community/"><span>Community Powered SDKs</span></a></li></ul></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Deploying an Engine</span></a><ul><li class="level-2"><a class="final" href="/deploy/"><span>Deploying as a Web Service</span></a></li><li class="level-2"><a class="final" href="/cli/#engine-commands"><span>Engine Command-line Interface</span></a></li><li class="le
 vel-2"><a class="final" href="/deploy/monitoring/"><span>Monitoring Engine</span></a></li><li class="level-2"><a class="final" href="/deploy/engineparams/"><span>Setting Engine Parameters</span></a></li><li class="level-2"><a class="final" href="/deploy/enginevariants/"><span>Deploying Multiple Engine Variants</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Customizing an Engine</span></a><ul><li class="level-2"><a class="final" href="/customize/"><span>Learning DASE</span></a></li><li class="level-2"><a class="final" href="/customize/dase/"><span>Implement DASE</span></a></li><li class="level-2"><a class="final" href="/customize/troubleshooting/"><span>Troubleshooting Engine Development</span></a></li><li class="level-2"><a class="final" href="/api/current/#package"><span>Engine Scala APIs</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Collecting and Analyzing Data</span></a><ul><li class="level-2"><a class="final
 " href="/datacollection/"><span>Event Server Overview</span></a></li><li class="level-2"><a class="final" href="/cli/#event-server-commands"><span>Event Server Command-line Interface</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventapi/"><span>Collecting Data with REST/SDKs</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventmodel/"><span>Events Modeling</span></a></li><li class="level-2"><a class="final" href="/datacollection/webhooks/"><span>Unifying Multichannel Data with Webhooks</span></a></li><li class="level-2"><a class="final" href="/datacollection/channel/"><span>Channel</span></a></li><li class="level-2"><a class="final active" href="/datacollection/batchimport/"><span>Importing Data in Batch</span></a></li><li class="level-2"><a class="final" href="/datacollection/analytics/"><span>Using Analytics Tools</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Choosing an Algorithm(s)</span><
 /a><ul><li class="level-2"><a class="final" href="/algorithm/"><span>Built-in Algorithm Libraries</span></a></li><li class="level-2"><a class="final" href="/algorithm/switch/"><span>Switching to Another Algorithm</span></a></li><li class="level-2"><a class="final" href="/algorithm/multiple/"><span>Combining Multiple Algorithms</span></a></li><li class="level-2"><a class="final" href="/algorithm/custom/"><span>Adding Your Own Algorithms</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>ML Tuning and Evaluation</span></a><ul><li class="level-2"><a class="final" href="/evaluation/"><span>Overview</span></a></li><li class="level-2"><a class="final" href="/evaluation/paramtuning/"><span>Hyperparameter Tuning</span></a></li><li class="level-2"><a class="final" href="/evaluation/evaluationdashboard/"><span>Evaluation Dashboard</span></a></li><li class="level-2"><a class="final" href="/evaluation/metricchoose/"><span>Choosing Evaluation Metrics</span></a></li
 ><li class="level-2"><a class="final" href="/evaluation/metricbuild/"><span>Building Evaluation Metrics</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>System Architecture</span></a><ul><li class="level-2"><a class="final" href="/system/"><span>Architecture Overview</span></a></li><li class="level-2"><a class="final" href="/system/anotherdatastore/"><span>Using Another Data Store</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Engine Template Gallery</span></a><ul><li class="level-2"><a class="final" href="/gallery/template-gallery/"><span>Browse</span></a></li><li class="level-2"><a class="final" href="/community/submit-template/"><span>Submit your Engine as a Template</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Demo Tutorials</span></a><ul><li class="level-2"><a class="final" href="/demo/tapster/"><span>Comics Recommendation Demo</span></a></li><li class="level-2"><a class="fi
 nal" href="/demo/community/"><span>Community Contributed Demo</span></a></li><li class="level-2"><a class="final" href="/demo/textclassification/"><span>Text Classification Engine Tutorial</span></a></li></ul></li><li class="level-1"><a class="expandible" href="/community/"><span>Getting Involved</span></a><ul><li class="level-2"><a class="final" href="/community/contribute-code/"><span>Contribute Code</span></a></li><li class="level-2"><a class="final" href="/community/contribute-documentation/"><span>Contribute Documentation</span></a></li><li class="level-2"><a class="final" href="/community/contribute-sdk/"><span>Contribute a SDK</span></a></li><li class="level-2"><a class="final" href="/community/contribute-webhook/"><span>Contribute a Webhook</span></a></li><li class="level-2"><a class="final" href="/community/projects/"><span>Community Projects</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Help</span></a><ul><li class="level-2"><a c
 lass="final" href="/resources/faq/"><span>FAQs</span></a></li><li class="level-2"><a class="final" href="/support/"><span>Support</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Resources</span></a><ul><li class="level-2"><a class="final" href="/resources/intellij/"><span>Developing Engines with IntelliJ IDEA</span></a></li><li class="level-2"><a class="final" href="/resources/upgrade/"><span>Upgrade Instructions</span></a></li><li class="level-2"><a class="final" href="/resources/glossary/"><span>Glossary</span></a></li></ul></li></ul></nav></div><div class="col-md-9 col-sm-12"><div class="content-header hidden-md hidden-lg"><div id="breadcrumbs" class="hidden-sm hidden xs"><ul><li><a href="#">Collecting and Analyzing Data</a><span class="spacer">&gt;</span></li><li><span class="last">Importing Data in Batch</span></li></ul></div><div id="page-title"><h1>Importing Data in Batch</h1></div></div><div id="table-of-content-wrapper"><h5>On this page</h5
 ><aside id="table-of-contents"><ul> <li> <a href="#preparing-input-file">Preparing Input File</a> </li> <li> <a href="#use-sdk-to-prepare-batch-input-file">Use SDK to Prepare Batch Input File</a> </li> <li> <a href="#import-events-from-input-file">Import Events from Input File</a> </li> </ul> </aside><hr/><a id="edit-page-link" href="https://github.com/apache/incubator-predictionio/tree/livedoc/docs/manual/source/datacollection/batchimport.html.md"><img src="/images/icons/edit-pencil-d6c1bb3d.png"/>Edit this page</a></div><div class="content-header hidden-sm hidden-xs"><div id="breadcrumbs" class="hidden-sm hidden xs"><ul><li><a href="#">Collecting and Analyzing Data</a><span class="spacer">&gt;</span></li><li><span class="last">Importing Data in Batch</span></li></ul></div><div id="page-title"><h1>Importing Data in Batch</h1></div></div><div class="content"><p>If you have a large amount of data to start with, performing batch import will be much faster than sending every event over
  an HTTP connection.</p><h2 id='preparing-input-file' class='header-anchors'>Preparing Input File</h2><p>The import tool expects its input to be a file stored either in the local filesystem or on HDFS. Each line of the file should be a JSON object string representing an event. For more information about the format of event JSON object, please refer to <a href="/datacollection/eventapi/#using-event-api">this page</a>.</p><p>Shown below is an example that contains 5 events ready to be imported to the Event Server.</p><div class="highlight json"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
+2
+3
+4
+5</pre></td><td class="code"><pre><span class="p">{</span><span class="s2">"event"</span><span class="p">:</span><span class="s2">"buy"</span><span class="p">,</span><span class="s2">"entityType"</span><span class="p">:</span><span class="s2">"user"</span><span class="p">,</span><span class="s2">"entityId"</span><span class="p">:</span><span class="s2">"3"</span><span class="p">,</span><span class="s2">"targetEntityType"</span><span class="p">:</span><span class="s2">"item"</span><span class="p">,</span><span class="s2">"targetEntityId"</span><span class="p">:</span><span class="s2">"0"</span><span class="p">,</span><span class="s2">"eventTime"</span><span class="p">:</span><span class="s2">"2014-11-21T01:04:14.716Z"</span><span class="p">}</span><span class="w">
+</span><span class="p">{</span><span class="s2">"event"</span><span class="p">:</span><span class="s2">"buy"</span><span class="p">,</span><span class="s2">"entityType"</span><span class="p">:</span><span class="s2">"user"</span><span class="p">,</span><span class="s2">"entityId"</span><span class="p">:</span><span class="s2">"3"</span><span class="p">,</span><span class="s2">"targetEntityType"</span><span class="p">:</span><span class="s2">"item"</span><span class="p">,</span><span class="s2">"targetEntityId"</span><span class="p">:</span><span class="s2">"1"</span><span class="p">,</span><span class="s2">"eventTime"</span><span class="p">:</span><span class="s2">"2014-11-21T01:04:14.722Z"</span><span class="p">}</span><span class="w">
+</span><span class="p">{</span><span class="s2">"event"</span><span class="p">:</span><span class="s2">"rate"</span><span class="p">,</span><span class="s2">"entityType"</span><span class="p">:</span><span class="s2">"user"</span><span class="p">,</span><span class="s2">"entityId"</span><span class="p">:</span><span class="s2">"3"</span><span class="p">,</span><span class="s2">"targetEntityType"</span><span class="p">:</span><span class="s2">"item"</span><span class="p">,</span><span class="s2">"targetEntityId"</span><span class="p">:</span><span class="s2">"2"</span><span class="p">,</span><span class="s2">"properties"</span><span class="p">:{</span><span class="s2">"rating"</span><span class="p">:</span><span class="mf">1.0</span><span class="p">},</span><span class="s2">"eventTime"</span><span class="p">:</span><span class="s2">"2014-11-21T01:04:14.729Z"</span><span class="p">}</span><span class="w">
+</span><span class="p">{</span><span class="s2">"event"</span><span class="p">:</span><span class="s2">"buy"</span><span class="p">,</span><span class="s2">"entityType"</span><span class="p">:</span><span class="s2">"user"</span><span class="p">,</span><span class="s2">"entityId"</span><span class="p">:</span><span class="s2">"3"</span><span class="p">,</span><span class="s2">"targetEntityType"</span><span class="p">:</span><span class="s2">"item"</span><span class="p">,</span><span class="s2">"targetEntityId"</span><span class="p">:</span><span class="s2">"7"</span><span class="p">,</span><span class="s2">"eventTime"</span><span class="p">:</span><span class="s2">"2014-11-21T01:04:14.735Z"</span><span class="p">}</span><span class="w">
+</span><span class="p">{</span><span class="s2">"event"</span><span class="p">:</span><span class="s2">"buy"</span><span class="p">,</span><span class="s2">"entityType"</span><span class="p">:</span><span class="s2">"user"</span><span class="p">,</span><span class="s2">"entityId"</span><span class="p">:</span><span class="s2">"3"</span><span class="p">,</span><span class="s2">"targetEntityType"</span><span class="p">:</span><span class="s2">"item"</span><span class="p">,</span><span class="s2">"targetEntityId"</span><span class="p">:</span><span class="s2">"8"</span><span class="p">,</span><span class="s2">"eventTime"</span><span class="p">:</span><span class="s2">"2014-11-21T01:04:14.741Z"</span><span class="p">}</span><span class="w">
+</span></pre></td></tr></tbody></table> </div> <div class="alert-message warning"><p>Please make sure your import file does not contain any empty lines. Empty lines will be treated as a null object and will return an error during import.</p></div><h2 id='use-sdk-to-prepare-batch-input-file' class='header-anchors'>Use SDK to Prepare Batch Input File</h2><p>Some of the Apache PredictionIO (incubating) SDKs also provides FileExporter client. You may use them to prepare the JSON file as described above. The FileExporter creates event in the same way as EventClient except that the events are written to a JSON file instead of being sent to EventSever. The written JSON file can then be used by batch import.</p><div class="tabs"> <ul class="control"> <li data-lang="php"><a href="#tab-2f1c6d4b-1fb7-4237-b9ac-c7df7d24d1b6">PHP SDK</a></li> <li data-lang="python"><a href="#tab-3ecb56f5-a879-4886-9981-f91c6a930452">Python SDK</a></li> <li data-lang="ruby"><a href="#tab-11ae42bb-fa6a-4984-9d5b-0
 ab3aad5fa40">Ruby SDK</a></li> <li data-lang="java"><a href="#tab-1bc08717-b1f8-4ad4-b862-845ee88e9b6e">Java SDK</a></li> </ul> <div data-tab="PHP SDK" data-lang="php" id="tab-2f1c6d4b-1fb7-4237-b9ac-c7df7d24d1b6"> (coming soon) </div> <div data-tab="Python SDK" data-lang="python" id="tab-3ecb56f5-a879-4886-9981-f91c6a930452"> <div class="highlight python"> <table style="border-spacing: 0"><tbody><tr> <td class="gutter gl" style="text-align: right"><pre class="lineno">1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26</pre></td> <td class="code"><pre><span class="kn">import</span> <span class="nn">predictionio</span>
+<span class="kn">from</span> <span class="nn">datetime</span> <span class="kn">import</span> <span class="n">datetime</span>
+<span class="kn">import</span> <span class="nn">pytz</span>
+
+<span class="c"># Create a FileExporter and specify "my_events.json" as destination file</span>
+<span class="n">exporter</span> <span class="o">=</span> <span class="n">predictionio</span><span class="o">.</span><span class="n">FileExporter</span><span class="p">(</span><span class="n">file_name</span><span class="o">=</span><span class="s">"my_events.json"</span><span class="p">)</span>
+
+<span class="n">event_properties</span> <span class="o">=</span> <span class="p">{</span>
+    <span class="s">"someProperty"</span> <span class="p">:</span> <span class="s">"value1"</span><span class="p">,</span>
+    <span class="s">"anotherProperty"</span> <span class="p">:</span> <span class="s">"value2"</span><span class="p">,</span>
+    <span class="p">}</span>
+<span class="c"># write the events to a file</span>
+<span class="n">event_response</span> <span class="o">=</span> <span class="n">exporter</span><span class="o">.</span><span class="n">create_event</span><span class="p">(</span>
+    <span class="n">event</span><span class="o">=</span><span class="s">"my_event"</span><span class="p">,</span>
+    <span class="n">entity_type</span><span class="o">=</span><span class="s">"user"</span><span class="p">,</span>
+    <span class="n">entity_id</span><span class="o">=</span><span class="s">"uid"</span><span class="p">,</span>
+    <span class="n">target_entity_type</span><span class="o">=</span><span class="s">"item"</span><span class="p">,</span>
+    <span class="n">target_entity_id</span><span class="o">=</span><span class="s">"iid"</span><span class="p">,</span>
+    <span class="n">properties</span><span class="o">=</span><span class="n">event_properties</span><span class="p">,</span>
+    <span class="n">event_time</span><span class="o">=</span><span class="n">datetime</span><span class="p">(</span><span class="mi">2014</span><span class="p">,</span> <span class="mi">12</span><span class="p">,</span> <span class="mi">13</span><span class="p">,</span> <span class="mi">21</span><span class="p">,</span> <span class="mi">38</span><span class="p">,</span> <span class="mi">45</span><span class="p">,</span> <span class="mi">618000</span><span class="p">,</span> <span class="n">pytz</span><span class="o">.</span><span class="n">utc</span><span class="p">))</span>
+
+<span class="c"># ...</span>
+
+<span class="c"># close the FileExporter when finish writing all events</span>
+<span class="n">exporter</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
+
+</pre></td> </tr></tbody></table> </div> </div> <div data-tab="Ruby SDK" data-lang="ruby" id="tab-11ae42bb-fa6a-4984-9d5b-0ab3aad5fa40"> (coming soon) </div> <div data-tab="Java SDK" data-lang="java" id="tab-1bc08717-b1f8-4ad4-b862-845ee88e9b6e"> <div class="highlight java"> <table style="border-spacing: 0"><tbody><tr> <td class="gutter gl" style="text-align: right"><pre class="lineno">1</pre></td> <td class="code"><pre><span class="o">(</span><span class="n">coming</span> <span class="n">soon</span><span class="o">)</span>
+</pre></td> </tr></tbody></table> </div> </div> </div> <h2 id='import-events-from-input-file' class='header-anchors'>Import Events from Input File</h2><p>Importing events from a file can be done easily using the command line interface. Assuming that <code>pio</code> be in your search path, your App ID be <code>123</code>, and the input file <code>my_events.json</code> be in your current working directory:</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1</pre></td><td class="code"><pre><span class="gp">$ </span>pio import --appid 123 --input my_events.json
+</pre></td></tr></tbody></table> </div> <p>After a brief while, the tool should return to the console without any error. Congratulations! You have successfully imported your events.</p></div></div></div></div><footer><div class="container"><div class="seperator"></div><div class="row"><div class="col-md-6 col-xs-6 footer-link-column"><div class="footer-link-column-row"><h4>Community</h4><ul><li><a href="//docs.prediction.io/install/" target="blank">Download</a></li><li><a href="//docs.prediction.io/" target="blank">Docs</a></li><li><a href="//github.com/apache/incubator-predictionio" target="blank">GitHub</a></li><li><a href="mailto:user-subscribe@predictionio.incubator.apache.org" target="blank">Subscribe to User Mailing List</a></li><li><a href="//stackoverflow.com/questions/tagged/predictionio" target="blank">Stackoverflow</a></li></ul></div></div><div class="col-md-6 col-xs-6 footer-link-column"><div class="footer-link-column-row"><h4>Contribute</h4><ul><li><a href="//prediction
 io.incubator.apache.org/community/contribute-code/" target="blank">Contribute</a></li><li><a href="//github.com/apache/incubator-predictionio" target="blank">Source Code</a></li><li><a href="//issues.apache.org/jira/browse/PIO" target="blank">Bug Tracker</a></li><li><a href="mailto:dev-subscribe@predictionio.incubator.apache.org" target="blank">Subscribe to Development Mailing List</a></li></ul></div></div></div></div><div id="footer-bottom"><div class="container"><div class="row"><div class="col-md-12"><div id="footer-logo-wrapper"><img alt="PredictionIO" src="/images/logos/logo-white-d1e9c6e6.png"/></div><div id="social-icons-wrapper"><a class="github-button" href="https://github.com/apache/incubator-predictionio" data-style="mega" data-count-href="/apache/incubator-predictionio/stargazers" data-count-api="/repos/apache/incubator-predictionio#stargazers_count" data-count-aria-label="# stargazers on GitHub" aria-label="Star apache/incubator-predictionio on GitHub">Star</a> <a class
 ="github-button" href="https://github.com/apache/incubator-predictionio/fork" data-icon="octicon-git-branch" data-style="mega" data-count-href="/apache/incubator-predictionio/network" data-count-api="/repos/apache/incubator-predictionio#forks_count" data-count-aria-label="# forks on GitHub" aria-label="Fork apache/incubator-predictionio on GitHub">Fork</a> <script id="github-bjs" async="" defer="" src="https://buttons.github.io/buttons.js"></script><a href="//www.facebook.com/predictionio" target="blank"><img alt="PredictionIO on Twitter" src="/images/icons/twitter-ea9dc152.png"/></a> <a href="//twitter.com/predictionio" target="blank"><img alt="PredictionIO on Facebook" src="/images/icons/facebook-5c57939c.png"/></a> </div></div></div></div></div></footer></div><script>(function(w,d,t,u,n,s,e){w['SwiftypeObject']=n;w[n]=w[n]||function(){
+(w[n].q=w[n].q||[]).push(arguments);};s=d.createElement(t);
+e=d.getElementsByTagName(t)[0];s.async=1;s.src=u;e.parentNode.insertBefore(s,e);
+})(window,document,'script','//s.swiftypecdn.com/install/v1/st.js','_st');
+
+_st('install','HaUfpXXV87xoB_zzCQ45');</script><script src="/javascripts/application-280db181.js"></script></body></html>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/02715c51/datacollection/batchimport/index.html.gz
----------------------------------------------------------------------
diff --git a/datacollection/batchimport/index.html.gz b/datacollection/batchimport/index.html.gz
new file mode 100644
index 0000000..876ee44
Binary files /dev/null and b/datacollection/batchimport/index.html.gz differ