You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@predictionio.apache.org by do...@apache.org on 2016/10/08 23:42:34 UTC

[37/51] [abbrv] [partial] incubator-predictionio-site git commit: Documentation based on apache/incubator-predictionio#df568b6d505812928b59a662408d90119d524173

http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/64c98d37/evaluation/metricchoose/index.html
----------------------------------------------------------------------
diff --git a/evaluation/metricchoose/index.html b/evaluation/metricchoose/index.html
new file mode 100644
index 0000000..8d52994
--- /dev/null
+++ b/evaluation/metricchoose/index.html
@@ -0,0 +1,6 @@
+<!DOCTYPE html><html><head><title>Choosing Evaluation Metrics</title><meta charset="utf-8"/><meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><meta class="swiftype" name="title" data-type="string" content="Choosing Evaluation Metrics"/><link rel="canonical" href="https://docs.prediction.io/evaluation/metricchoose/"/><link href="/images/favicon/normal-b330020a.png" rel="shortcut icon"/><link href="/images/favicon/apple-c0febcf2.png" rel="apple-touch-icon"/><link href="//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800" rel="stylesheet"/><link href="//maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" rel="stylesheet"/><link href="/stylesheets/application-a2a2f408.css" rel="stylesheet" type="text/css"/><script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.2/html5shiv.min.js"></script><script src="//cdn.mathjax.org/ma
 thjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script><script src="//use.typekit.net/pqo0itb.js"></script><script>try{Typekit.load({ async: true });}catch(e){}</script></head><body><div id="global"><header><div class="container" id="header-wrapper"><div class="row"><div class="col-sm-12"><div id="logo-wrapper"><span id="drawer-toggle"></span><a href="#"></a><a href="http://predictionio.incubator.apache.org/"><img alt="PredictionIO" id="logo" src="/images/logos/logo-ee2b9bb3.png"/></a></div><div id="menu-wrapper"><div id="pill-wrapper"><a class="pill left" href="/gallery/template-gallery">TEMPLATES</a> <a class="pill right" href="//github.com/apache/incubator-predictionio/">OPEN SOURCE</a></div></div><img class="mobile-search-bar-toggler hidden-md hidden-lg" src="/images/icons/search-glass-704bd4ff.png"/></div></div></div></header><div id="search-bar-row-wrapper"><div class="container-fluid" id="search-bar-row"><div class="row"><div class="col-md-9 col-sm-11 col-xs-11"><div 
 class="hidden-md hidden-lg" id="mobile-page-heading-wrapper"><p>PredictionIO Docs</p><h4>Choosing Evaluation Metrics</h4></div><h4 class="hidden-sm hidden-xs">PredictionIO Docs</h4></div><div class="col-md-3 col-sm-1 col-xs-1 hidden-md hidden-lg"><img id="left-menu-indicator" src="/images/icons/down-arrow-dfe9f7fe.png"/></div><div class="col-md-3 col-sm-12 col-xs-12 swiftype-wrapper"><div class="swiftype"><form class="search-form"><img class="search-box-toggler hidden-xs hidden-sm" src="/images/icons/search-glass-704bd4ff.png"/><div class="search-box"><img src="/images/icons/search-glass-704bd4ff.png"/><input type="text" id="st-search-input" class="st-search-input" placeholder="Search Doc..."/></div><img class="swiftype-row-hider hidden-md hidden-lg" src="/images/icons/drawer-toggle-active-fcbef12a.png"/></form></div></div><div class="mobile-left-menu-toggler hidden-md hidden-lg"></div></div></div></div><div id="page" class="container-fluid"><div class="row"><div id="left-menu-wrapp
 er" class="col-md-3"><nav id="nav-main"><ul><li class="level-1"><a class="expandible" href="/"><span>Apache PredictionIO (incubating) Documentation</span></a><ul><li class="level-2"><a class="final" href="/"><span>Welcome to Apache PredictionIO (incubating)</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Started</span></a><ul><li class="level-2"><a class="final" href="/start/"><span>A Quick Intro</span></a></li><li class="level-2"><a class="final" href="/install/"><span>Installing Apache PredictionIO (incubating)</span></a></li><li class="level-2"><a class="final" href="/start/download/"><span>Downloading an Engine Template</span></a></li><li class="level-2"><a class="final" href="/start/deploy/"><span>Deploying Your First Engine</span></a></li><li class="level-2"><a class="final" href="/start/customize/"><span>Customizing the Engine</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Integrating with Your App</
 span></a><ul><li class="level-2"><a class="final" href="/appintegration/"><span>App Integration Overview</span></a></li><li class="level-2"><a class="expandible" href="/sdk/"><span>List of SDKs</span></a><ul><li class="level-3"><a class="final" href="/sdk/java/"><span>Java & Android SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/php/"><span>PHP SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/python/"><span>Python SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/ruby/"><span>Ruby SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/community/"><span>Community Powered SDKs</span></a></li></ul></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Deploying an Engine</span></a><ul><li class="level-2"><a class="final" href="/deploy/"><span>Deploying as a Web Service</span></a></li><li class="level-2"><a class="final" href="/cli/#engine-commands"><span>Engine Command-line Interface</span></a></li><li 
 class="level-2"><a class="final" href="/deploy/monitoring/"><span>Monitoring Engine</span></a></li><li class="level-2"><a class="final" href="/deploy/engineparams/"><span>Setting Engine Parameters</span></a></li><li class="level-2"><a class="final" href="/deploy/enginevariants/"><span>Deploying Multiple Engine Variants</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Customizing an Engine</span></a><ul><li class="level-2"><a class="final" href="/customize/"><span>Learning DASE</span></a></li><li class="level-2"><a class="final" href="/customize/dase/"><span>Implement DASE</span></a></li><li class="level-2"><a class="final" href="/customize/troubleshooting/"><span>Troubleshooting Engine Development</span></a></li><li class="level-2"><a class="final" href="/api/current/#package"><span>Engine Scala APIs</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Collecting and Analyzing Data</span></a><ul><li class="level-2"><a cla
 ss="final" href="/datacollection/"><span>Event Server Overview</span></a></li><li class="level-2"><a class="final" href="/cli/#event-server-commands"><span>Event Server Command-line Interface</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventapi/"><span>Collecting Data with REST/SDKs</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventmodel/"><span>Events Modeling</span></a></li><li class="level-2"><a class="final" href="/datacollection/webhooks/"><span>Unifying Multichannel Data with Webhooks</span></a></li><li class="level-2"><a class="final" href="/datacollection/channel/"><span>Channel</span></a></li><li class="level-2"><a class="final" href="/datacollection/batchimport/"><span>Importing Data in Batch</span></a></li><li class="level-2"><a class="final" href="/datacollection/analytics/"><span>Using Analytics Tools</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Choosing an Algorithm(s)</span
 ></a><ul><li class="level-2"><a class="final" href="/algorithm/"><span>Built-in Algorithm Libraries</span></a></li><li class="level-2"><a class="final" href="/algorithm/switch/"><span>Switching to Another Algorithm</span></a></li><li class="level-2"><a class="final" href="/algorithm/multiple/"><span>Combining Multiple Algorithms</span></a></li><li class="level-2"><a class="final" href="/algorithm/custom/"><span>Adding Your Own Algorithms</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>ML Tuning and Evaluation</span></a><ul><li class="level-2"><a class="final" href="/evaluation/"><span>Overview</span></a></li><li class="level-2"><a class="final" href="/evaluation/paramtuning/"><span>Hyperparameter Tuning</span></a></li><li class="level-2"><a class="final" href="/evaluation/evaluationdashboard/"><span>Evaluation Dashboard</span></a></li><li class="level-2"><a class="final active" href="/evaluation/metricchoose/"><span>Choosing Evaluation Metrics</span
 ></a></li><li class="level-2"><a class="final" href="/evaluation/metricbuild/"><span>Building Evaluation Metrics</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>System Architecture</span></a><ul><li class="level-2"><a class="final" href="/system/"><span>Architecture Overview</span></a></li><li class="level-2"><a class="final" href="/system/anotherdatastore/"><span>Using Another Data Store</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Engine Template Gallery</span></a><ul><li class="level-2"><a class="final" href="/gallery/template-gallery/"><span>Browse</span></a></li><li class="level-2"><a class="final" href="/community/submit-template/"><span>Submit your Engine as a Template</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Demo Tutorials</span></a><ul><li class="level-2"><a class="final" href="/demo/tapster/"><span>Comics Recommendation Demo</span></a></li><li class="level-2"><a 
 class="final" href="/demo/community/"><span>Community Contributed Demo</span></a></li><li class="level-2"><a class="final" href="/demo/textclassification/"><span>Text Classification Engine Tutorial</span></a></li></ul></li><li class="level-1"><a class="expandible" href="/community/"><span>Getting Involved</span></a><ul><li class="level-2"><a class="final" href="/community/contribute-code/"><span>Contribute Code</span></a></li><li class="level-2"><a class="final" href="/community/contribute-documentation/"><span>Contribute Documentation</span></a></li><li class="level-2"><a class="final" href="/community/contribute-sdk/"><span>Contribute a SDK</span></a></li><li class="level-2"><a class="final" href="/community/contribute-webhook/"><span>Contribute a Webhook</span></a></li><li class="level-2"><a class="final" href="/community/projects/"><span>Community Projects</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Help</span></a><ul><li class="leve
 l-2"><a class="final" href="/resources/faq/"><span>FAQs</span></a></li><li class="level-2"><a class="final" href="/support/"><span>Support</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Resources</span></a><ul><li class="level-2"><a class="final" href="/resources/intellij/"><span>Developing Engines with IntelliJ IDEA</span></a></li><li class="level-2"><a class="final" href="/resources/upgrade/"><span>Upgrade Instructions</span></a></li><li class="level-2"><a class="final" href="/resources/glossary/"><span>Glossary</span></a></li></ul></li></ul></nav></div><div class="col-md-9 col-sm-12"><div class="content-header hidden-md hidden-lg"><div id="breadcrumbs" class="hidden-sm hidden xs"><ul><li><a href="#">ML Tuning and Evaluation</a><span class="spacer">&gt;</span></li><li><span class="last">Choosing Evaluation Metrics</span></li></ul></div><div id="page-title"><h1>Choosing Evaluation Metrics</h1></div></div><div id="table-of-content-wrapper"><h5>On t
 his page</h5><aside id="table-of-contents"><ul> <li> <a href="#defining-metric">Defining Metric</a> </li> <li> <a href="#common-metrics">Common Metrics</a> </li> </ul> </aside><hr/><a id="edit-page-link" href="https://github.com/apache/incubator-predictionio/tree/livedoc/docs/manual/source/evaluation/metricchoose.html.md"><img src="/images/icons/edit-pencil-d6c1bb3d.png"/>Edit this page</a></div><div class="content-header hidden-sm hidden-xs"><div id="breadcrumbs" class="hidden-sm hidden xs"><ul><li><a href="#">ML Tuning and Evaluation</a><span class="spacer">&gt;</span></li><li><span class="last">Choosing Evaluation Metrics</span></li></ul></div><div id="page-title"><h1>Choosing Evaluation Metrics</h1></div></div><div class="content"><p>The <a href="/evaluation/paramtuning/">hyperparameter tuning module</a> allows us to select the optimal engine parameter defined by a <code>Metric</code>. <code>Metric</code> determines the quality of an engine variant. We have skimmmed through the 
 process of choosing the right <code>Metric</code> in previous sections.</p><p>This secion discusses basic evaluation metrics commonly used for classification problems. If you are more interested in knowing how to <em>implement</em> a custom metric, please skip to <a href="/evaluation/metricbuild/">the next section</a>.</p><h2 id='defining-metric' class='header-anchors'>Defining Metric</h2><p>Metric evaluates the quality of an engine by comparing engine&#39;s output (predicted result) with the original label (actual result). A engine serving better prediction should yield a higher metric score, the tuning module returns the engine parameter with the highest score. It is sometimes called <a href="http://en.wikipedia.org/wiki/Loss_function"><em>loss function</em></a> in literature, where the goal is to minimize the loss function. </p><p>During tuning, it is important for us to understand the definition of the metric, to make sure it is aligned with the prediction engine&#39;s goal.</p>
 <p>In the classificaiton template, we use <em>Accuracy</em> as our metric. <em>Accuracy</em> is defined as: the percentage of queries which the engine is able to predict the correct label. </p><h2 id='common-metrics' class='header-anchors'>Common Metrics</h2><p>We illustrate the choice of metric with the following confusion matrix. Row represents the engine predicted label, column represents the acutal label. The second row means that of the 200 testing data points, the engine predicted 60 (15 + 35 + 10) of them as label 2.0, among which 35 are correct prediction (i.e. actual label is 2.0, matches with the prediction), and 25 are wrong.</p> <table><thead> <tr> <th style="text-align: center"></th> <th style="text-align: center">Actual = 1.0</th> <th style="text-align: center">Actual = 2.0</th> <th style="text-align: center">Actual = 3.0</th> </tr> </thead><tbody> <tr> <td style="text-align: center"><strong>Predicted = 1.0</strong></td> <td style="text-align: center">30</td> <td style
 ="text-align: center">0</td> <td style="text-align: center">60</td> </tr> <tr> <td style="text-align: center"><strong>Predicted = 2.0</strong></td> <td style="text-align: center">15</td> <td style="text-align: center">35</td> <td style="text-align: center">10</td> </tr> <tr> <td style="text-align: center"><strong>Predicted = 3.0</strong></td> <td style="text-align: center">0</td> <td style="text-align: center">0</td> <td style="text-align: center">50</td> </tr> </tbody></table> <h3 id='accuracy' class='header-anchors'>Accuracy</h3><p>Accuracy means that how many data points are predicted correctly. It is one of the simplest form of evaluation metrics. The accuracy score is # of correct points / # total = (30 + 35 + 50) / 200 = 0.575.</p><h3 id='precision' class='header-anchors'>Precision</h3><p>Precision is a metric for binary classifier which measures the correctness among all positive labels. A binary classifier gives only two output values (i.e. positive and negative). For proble
 m where there are multiple values (3 in our example), we first have to tranform our problem into a binary classification problem. For example, we can have problem whether label = 1.0. The confusion matrix now becomes:</p> <table><thead> <tr> <th style="text-align: center"></th> <th style="text-align: center">Actual = 1.0</th> <th style="text-align: center">Actual != 1.0</th> </tr> </thead><tbody> <tr> <td style="text-align: center"><strong>Predicted = 1.0</strong></td> <td style="text-align: center">30</td> <td style="text-align: center">60</td> </tr> <tr> <td style="text-align: center"><strong>Predicted != 1.0</strong></td> <td style="text-align: center">15</td> <td style="text-align: center">95</td> </tr> </tbody></table> <p>Precision is the ratio between the number of correct positive answer (true positive) and the sum of correct positive answer (true positive) and wrong but positively labeled answer (false positive). In this case, the precision is 30 / (30 + 60) = ~0.3333.</p><h
 3 id='recall' class='header-anchors'>Recall</h3><p>Recall is a metric for binary classifier which measures how many positive labels are successfully predicted amongst all positive labels. Formally, it is the ratio between the number of correct positive answer (true positive) and the sum of correct positive answer (true positive) and wrongly negatively labeled asnwer (false negative). In this case, the recall is 30 / (30 + 15) = ~0.6667.</p><p>As we have discussed several common metrics for classification problem, we can implement them using the <code>Metric</code> class in <a href="/evaluation/metricbuild">the next section</a>.</p></div></div></div></div><footer><div class="container"><div class="seperator"></div><div class="row"><div class="col-md-6 col-xs-6 footer-link-column"><div class="footer-link-column-row"><h4>Community</h4><ul><li><a href="//docs.prediction.io/install/" target="blank">Download</a></li><li><a href="//docs.prediction.io/" target="blank">Docs</a></li><li><a hr
 ef="//github.com/apache/incubator-predictionio" target="blank">GitHub</a></li><li><a href="mailto:user-subscribe@predictionio.incubator.apache.org" target="blank">Subscribe to User Mailing List</a></li><li><a href="//stackoverflow.com/questions/tagged/predictionio" target="blank">Stackoverflow</a></li></ul></div></div><div class="col-md-6 col-xs-6 footer-link-column"><div class="footer-link-column-row"><h4>Contribute</h4><ul><li><a href="//predictionio.incubator.apache.org/community/contribute-code/" target="blank">Contribute</a></li><li><a href="//github.com/apache/incubator-predictionio" target="blank">Source Code</a></li><li><a href="//issues.apache.org/jira/browse/PIO" target="blank">Bug Tracker</a></li><li><a href="mailto:dev-subscribe@predictionio.incubator.apache.org" target="blank">Subscribe to Development Mailing List</a></li></ul></div></div></div></div><div id="footer-bottom"><div class="container"><div class="row"><div class="col-md-12"><div id="footer-logo-wrapper"><img
  alt="PredictionIO" src="/images/logos/logo-white-d1e9c6e6.png"/></div><div id="social-icons-wrapper"><a class="github-button" href="https://github.com/apache/incubator-predictionio" data-style="mega" data-count-href="/apache/incubator-predictionio/stargazers" data-count-api="/repos/apache/incubator-predictionio#stargazers_count" data-count-aria-label="# stargazers on GitHub" aria-label="Star apache/incubator-predictionio on GitHub">Star</a> <a class="github-button" href="https://github.com/apache/incubator-predictionio/fork" data-icon="octicon-git-branch" data-style="mega" data-count-href="/apache/incubator-predictionio/network" data-count-api="/repos/apache/incubator-predictionio#forks_count" data-count-aria-label="# forks on GitHub" aria-label="Fork apache/incubator-predictionio on GitHub">Fork</a> <script id="github-bjs" async="" defer="" src="https://buttons.github.io/buttons.js"></script><a href="//www.facebook.com/predictionio" target="blank"><img alt="PredictionIO on Twitter
 " src="/images/icons/twitter-ea9dc152.png"/></a> <a href="//twitter.com/predictionio" target="blank"><img alt="PredictionIO on Facebook" src="/images/icons/facebook-5c57939c.png"/></a> </div></div></div></div></div></footer></div><script>(function(w,d,t,u,n,s,e){w['SwiftypeObject']=n;w[n]=w[n]||function(){
+(w[n].q=w[n].q||[]).push(arguments);};s=d.createElement(t);
+e=d.getElementsByTagName(t)[0];s.async=1;s.src=u;e.parentNode.insertBefore(s,e);
+})(window,document,'script','//s.swiftypecdn.com/install/v1/st.js','_st');
+
+_st('install','HaUfpXXV87xoB_zzCQ45');</script><script src="/javascripts/application-280db181.js"></script></body></html>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/64c98d37/evaluation/metricchoose/index.html.gz
----------------------------------------------------------------------
diff --git a/evaluation/metricchoose/index.html.gz b/evaluation/metricchoose/index.html.gz
new file mode 100644
index 0000000..53455fb
Binary files /dev/null and b/evaluation/metricchoose/index.html.gz differ

http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/64c98d37/evaluation/paramtuning/index.html
----------------------------------------------------------------------
diff --git a/evaluation/paramtuning/index.html b/evaluation/paramtuning/index.html
new file mode 100644
index 0000000..88df894
--- /dev/null
+++ b/evaluation/paramtuning/index.html
@@ -0,0 +1,393 @@
+<!DOCTYPE html><html><head><title>Hyperparameter Tuning</title><meta charset="utf-8"/><meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><meta class="swiftype" name="title" data-type="string" content="Hyperparameter Tuning"/><link rel="canonical" href="https://docs.prediction.io/evaluation/paramtuning/"/><link href="/images/favicon/normal-b330020a.png" rel="shortcut icon"/><link href="/images/favicon/apple-c0febcf2.png" rel="apple-touch-icon"/><link href="//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800" rel="stylesheet"/><link href="//maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" rel="stylesheet"/><link href="/stylesheets/application-a2a2f408.css" rel="stylesheet" type="text/css"/><script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.2/html5shiv.min.js"></script><script src="//cdn.mathjax.org/mathjax/latest/
 MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script><script src="//use.typekit.net/pqo0itb.js"></script><script>try{Typekit.load({ async: true });}catch(e){}</script></head><body><div id="global"><header><div class="container" id="header-wrapper"><div class="row"><div class="col-sm-12"><div id="logo-wrapper"><span id="drawer-toggle"></span><a href="#"></a><a href="http://predictionio.incubator.apache.org/"><img alt="PredictionIO" id="logo" src="/images/logos/logo-ee2b9bb3.png"/></a></div><div id="menu-wrapper"><div id="pill-wrapper"><a class="pill left" href="/gallery/template-gallery">TEMPLATES</a> <a class="pill right" href="//github.com/apache/incubator-predictionio/">OPEN SOURCE</a></div></div><img class="mobile-search-bar-toggler hidden-md hidden-lg" src="/images/icons/search-glass-704bd4ff.png"/></div></div></div></header><div id="search-bar-row-wrapper"><div class="container-fluid" id="search-bar-row"><div class="row"><div class="col-md-9 col-sm-11 col-xs-11"><div class="hidden
 -md hidden-lg" id="mobile-page-heading-wrapper"><p>PredictionIO Docs</p><h4>Hyperparameter Tuning</h4></div><h4 class="hidden-sm hidden-xs">PredictionIO Docs</h4></div><div class="col-md-3 col-sm-1 col-xs-1 hidden-md hidden-lg"><img id="left-menu-indicator" src="/images/icons/down-arrow-dfe9f7fe.png"/></div><div class="col-md-3 col-sm-12 col-xs-12 swiftype-wrapper"><div class="swiftype"><form class="search-form"><img class="search-box-toggler hidden-xs hidden-sm" src="/images/icons/search-glass-704bd4ff.png"/><div class="search-box"><img src="/images/icons/search-glass-704bd4ff.png"/><input type="text" id="st-search-input" class="st-search-input" placeholder="Search Doc..."/></div><img class="swiftype-row-hider hidden-md hidden-lg" src="/images/icons/drawer-toggle-active-fcbef12a.png"/></form></div></div><div class="mobile-left-menu-toggler hidden-md hidden-lg"></div></div></div></div><div id="page" class="container-fluid"><div class="row"><div id="left-menu-wrapper" class="col-md-3
 "><nav id="nav-main"><ul><li class="level-1"><a class="expandible" href="/"><span>Apache PredictionIO (incubating) Documentation</span></a><ul><li class="level-2"><a class="final" href="/"><span>Welcome to Apache PredictionIO (incubating)</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Started</span></a><ul><li class="level-2"><a class="final" href="/start/"><span>A Quick Intro</span></a></li><li class="level-2"><a class="final" href="/install/"><span>Installing Apache PredictionIO (incubating)</span></a></li><li class="level-2"><a class="final" href="/start/download/"><span>Downloading an Engine Template</span></a></li><li class="level-2"><a class="final" href="/start/deploy/"><span>Deploying Your First Engine</span></a></li><li class="level-2"><a class="final" href="/start/customize/"><span>Customizing the Engine</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Integrating with Your App</span></a><ul><li cl
 ass="level-2"><a class="final" href="/appintegration/"><span>App Integration Overview</span></a></li><li class="level-2"><a class="expandible" href="/sdk/"><span>List of SDKs</span></a><ul><li class="level-3"><a class="final" href="/sdk/java/"><span>Java & Android SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/php/"><span>PHP SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/python/"><span>Python SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/ruby/"><span>Ruby SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/community/"><span>Community Powered SDKs</span></a></li></ul></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Deploying an Engine</span></a><ul><li class="level-2"><a class="final" href="/deploy/"><span>Deploying as a Web Service</span></a></li><li class="level-2"><a class="final" href="/cli/#engine-commands"><span>Engine Command-line Interface</span></a></li><li class="level-2"><a 
 class="final" href="/deploy/monitoring/"><span>Monitoring Engine</span></a></li><li class="level-2"><a class="final" href="/deploy/engineparams/"><span>Setting Engine Parameters</span></a></li><li class="level-2"><a class="final" href="/deploy/enginevariants/"><span>Deploying Multiple Engine Variants</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Customizing an Engine</span></a><ul><li class="level-2"><a class="final" href="/customize/"><span>Learning DASE</span></a></li><li class="level-2"><a class="final" href="/customize/dase/"><span>Implement DASE</span></a></li><li class="level-2"><a class="final" href="/customize/troubleshooting/"><span>Troubleshooting Engine Development</span></a></li><li class="level-2"><a class="final" href="/api/current/#package"><span>Engine Scala APIs</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Collecting and Analyzing Data</span></a><ul><li class="level-2"><a class="final" href="/d
 atacollection/"><span>Event Server Overview</span></a></li><li class="level-2"><a class="final" href="/cli/#event-server-commands"><span>Event Server Command-line Interface</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventapi/"><span>Collecting Data with REST/SDKs</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventmodel/"><span>Events Modeling</span></a></li><li class="level-2"><a class="final" href="/datacollection/webhooks/"><span>Unifying Multichannel Data with Webhooks</span></a></li><li class="level-2"><a class="final" href="/datacollection/channel/"><span>Channel</span></a></li><li class="level-2"><a class="final" href="/datacollection/batchimport/"><span>Importing Data in Batch</span></a></li><li class="level-2"><a class="final" href="/datacollection/analytics/"><span>Using Analytics Tools</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Choosing an Algorithm(s)</span></a><ul><li class=
 "level-2"><a class="final" href="/algorithm/"><span>Built-in Algorithm Libraries</span></a></li><li class="level-2"><a class="final" href="/algorithm/switch/"><span>Switching to Another Algorithm</span></a></li><li class="level-2"><a class="final" href="/algorithm/multiple/"><span>Combining Multiple Algorithms</span></a></li><li class="level-2"><a class="final" href="/algorithm/custom/"><span>Adding Your Own Algorithms</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>ML Tuning and Evaluation</span></a><ul><li class="level-2"><a class="final" href="/evaluation/"><span>Overview</span></a></li><li class="level-2"><a class="final active" href="/evaluation/paramtuning/"><span>Hyperparameter Tuning</span></a></li><li class="level-2"><a class="final" href="/evaluation/evaluationdashboard/"><span>Evaluation Dashboard</span></a></li><li class="level-2"><a class="final" href="/evaluation/metricchoose/"><span>Choosing Evaluation Metrics</span></a></li><li class
 ="level-2"><a class="final" href="/evaluation/metricbuild/"><span>Building Evaluation Metrics</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>System Architecture</span></a><ul><li class="level-2"><a class="final" href="/system/"><span>Architecture Overview</span></a></li><li class="level-2"><a class="final" href="/system/anotherdatastore/"><span>Using Another Data Store</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Engine Template Gallery</span></a><ul><li class="level-2"><a class="final" href="/gallery/template-gallery/"><span>Browse</span></a></li><li class="level-2"><a class="final" href="/community/submit-template/"><span>Submit your Engine as a Template</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Demo Tutorials</span></a><ul><li class="level-2"><a class="final" href="/demo/tapster/"><span>Comics Recommendation Demo</span></a></li><li class="level-2"><a class="final" href=
 "/demo/community/"><span>Community Contributed Demo</span></a></li><li class="level-2"><a class="final" href="/demo/textclassification/"><span>Text Classification Engine Tutorial</span></a></li></ul></li><li class="level-1"><a class="expandible" href="/community/"><span>Getting Involved</span></a><ul><li class="level-2"><a class="final" href="/community/contribute-code/"><span>Contribute Code</span></a></li><li class="level-2"><a class="final" href="/community/contribute-documentation/"><span>Contribute Documentation</span></a></li><li class="level-2"><a class="final" href="/community/contribute-sdk/"><span>Contribute a SDK</span></a></li><li class="level-2"><a class="final" href="/community/contribute-webhook/"><span>Contribute a Webhook</span></a></li><li class="level-2"><a class="final" href="/community/projects/"><span>Community Projects</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Help</span></a><ul><li class="level-2"><a class="fina
 l" href="/resources/faq/"><span>FAQs</span></a></li><li class="level-2"><a class="final" href="/support/"><span>Support</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Resources</span></a><ul><li class="level-2"><a class="final" href="/resources/intellij/"><span>Developing Engines with IntelliJ IDEA</span></a></li><li class="level-2"><a class="final" href="/resources/upgrade/"><span>Upgrade Instructions</span></a></li><li class="level-2"><a class="final" href="/resources/glossary/"><span>Glossary</span></a></li></ul></li></ul></nav></div><div class="col-md-9 col-sm-12"><div class="content-header hidden-md hidden-lg"><div id="breadcrumbs" class="hidden-sm hidden xs"><ul><li><a href="#">ML Tuning and Evaluation</a><span class="spacer">&gt;</span></li><li><span class="last">Hyperparameter Tuning</span></li></ul></div><div id="page-title"><h1>Hyperparameter Tuning</h1></div></div><div id="table-of-content-wrapper"><h5>On this page</h5><aside id="table-o
 f-contents"><ul> <li> <a href="#quick-start">Quick Start</a> </li> <li> <a href="#detailed-explanation">Detailed Explanation</a> </li> <li> <a href="#the-evaluation-design">The Evaluation Design</a> </li> <li> <a href="#evaluation-data-generation">Evaluation Data Generation</a> </li> <li> <a href="#evaluation-metrics">Evaluation Metrics</a> </li> <li> <a href="#parameters-generation">Parameters Generation</a> </li> <li> <a href="#running-the-evaluation">Running the Evaluation</a> </li> <li> <a href="#notes">Notes</a> </li> </ul> </aside><hr/><a id="edit-page-link" href="https://github.com/apache/incubator-predictionio/tree/livedoc/docs/manual/source/evaluation/paramtuning.html.md"><img src="/images/icons/edit-pencil-d6c1bb3d.png"/>Edit this page</a></div><div class="content-header hidden-sm hidden-xs"><div id="breadcrumbs" class="hidden-sm hidden xs"><ul><li><a href="#">ML Tuning and Evaluation</a><span class="spacer">&gt;</span></li><li><span class="last">Hyperparameter Tuning</spa
 n></li></ul></div><div id="page-title"><h1>Hyperparameter Tuning</h1></div></div><div class="content"><p>A PredictionIO engine is instantiated by a set of parameters. These parameters define which algorithm is to be used, as well supply the parameters for the algorithm itself. This naturally raises the question of how to choose the best set of parameters. The evaluation module streamlines the process of <em>tuning</em> the engine to the best parameter set and deploys it.</p><h2 id='quick-start' class='header-anchors'>Quick Start</h2><p>We demonstrate the evaluation with <a href="/templates/classification/quickstart/">the classification template</a>. The classification template uses a naive bayesian algorithm that has a smoothing parameter. We evaluate the prediction quality against different parameter values to find the best parameter values, and then deploy it.</p><h3 id='edit-the-appid' class='header-anchors'>Edit the AppId</h3><p>Edit MyClassification/src/main/scala/<strong><em>E
 valuation.scala</em></strong> to specify the <em>appId</em> you used to import the data.</p><div class="highlight scala"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
+2
+3
+4
+5
+6</pre></td><td class="code"><pre><span class="k">object</span> <span class="nc">EngineParamsList</span> <span class="k">extends</span> <span class="nc">EngineParamsGenerator</span> <span class="o">{</span>
+  <span class="o">...</span>
+  <span class="k">private</span><span class="o">[</span><span class="kt">this</span><span class="o">]</span> <span class="k">val</span> <span class="n">baseEP</span> <span class="k">=</span> <span class="nc">EngineParams</span><span class="o">(</span>
+    <span class="n">dataSourceParams</span> <span class="k">=</span> <span class="nc">DataSourceParams</span><span class="o">(</span><span class="n">appId</span> <span class="k">=</span> <span class="o">&lt;</span><span class="nc">YOUR_APP_ID</span><span class="o">&gt;,</span> <span class="n">evalK</span> <span class="k">=</span> <span class="nc">Some</span><span class="o">(</span><span class="mi">5</span><span class="o">)))</span>
+  <span class="o">...</span>
+<span class="o">}</span>
+</pre></td></tr></tbody></table> </div> <h3 id='build-and-run-the-evaluation' class='header-anchors'>Build and run the evaluation</h3><p>To run an evaluation, the command <code>pio eval</code> is used. It takes two mandatory parameter, 1. the <code>Evaluation</code> object, which tells PredictionIO the engine and metric we use for the evaluation; and 2. the <code>EngineParamsGenerator</code>, which contains a list of engine params to test against. The following command kickstarts the evaluation workflow for the classification template.</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
+2
+3
+4</pre></td><td class="code"><pre><span class="gp">$ </span>pio build
+...
+<span class="gp">$ </span>pio <span class="nb">eval </span>org.template.classification.AccuracyEvaluation <span class="se">\</span>
+    org.template.classification.EngineParamsList 
+</pre></td></tr></tbody></table> </div> <p>You will see the following output:</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45</pre></td><td class="code"><pre>...
+<span class="o">[</span>INFO] <span class="o">[</span>CoreWorkflow<span class="nv">$]</span> runEvaluation started
+...
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] Iteration 0
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] EngineParams: <span class="o">{</span><span class="s2">"dataSourceParams"</span>:<span class="o">{</span><span class="s2">""</span>:<span class="o">{</span><span class="s2">"appId"</span>:19,<span class="s2">"evalK"</span>:5<span class="o">}}</span>,<span class="s2">"preparatorParams"</span>:<span class="o">{</span><span class="s2">""</span>:<span class="o">{}}</span>,<span class="s2">"algorithmParamsList"</span>:[<span class="o">{</span><span class="s2">"naive"</span>:<span class="o">{</span><span class="s2">"lambda"</span>:10.0<span class="o">}}]</span>,<span class="s2">"servingParams"</span>:<span class="o">{</span><span class="s2">""</span>:<span class="o">{}}}</span>
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] Result: MetricScores<span class="o">(</span>0.9281045751633987,List<span class="o">())</span>
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] Iteration 1
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] EngineParams: <span class="o">{</span><span class="s2">"dataSourceParams"</span>:<span class="o">{</span><span class="s2">""</span>:<span class="o">{</span><span class="s2">"appId"</span>:19,<span class="s2">"evalK"</span>:5<span class="o">}}</span>,<span class="s2">"preparatorParams"</span>:<span class="o">{</span><span class="s2">""</span>:<span class="o">{}}</span>,<span class="s2">"algorithmParamsList"</span>:[<span class="o">{</span><span class="s2">"naive"</span>:<span class="o">{</span><span class="s2">"lambda"</span>:100.0<span class="o">}}]</span>,<span class="s2">"servingParams"</span>:<span class="o">{</span><span class="s2">""</span>:<span class="o">{}}}</span>
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] Result: MetricScores<span class="o">(</span>0.9150326797385621,List<span class="o">())</span>
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] Iteration 2
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] EngineParams: <span class="o">{</span><span class="s2">"dataSourceParams"</span>:<span class="o">{</span><span class="s2">""</span>:<span class="o">{</span><span class="s2">"appId"</span>:19,<span class="s2">"evalK"</span>:5<span class="o">}}</span>,<span class="s2">"preparatorParams"</span>:<span class="o">{</span><span class="s2">""</span>:<span class="o">{}}</span>,<span class="s2">"algorithmParamsList"</span>:[<span class="o">{</span><span class="s2">"naive"</span>:<span class="o">{</span><span class="s2">"lambda"</span>:1000.0<span class="o">}}]</span>,<span class="s2">"servingParams"</span>:<span class="o">{</span><span class="s2">""</span>:<span class="o">{}}}</span>
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] Result: MetricScores<span class="o">(</span>0.4444444444444444,List<span class="o">())</span>
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] Writing best variant params to disk...
+<span class="o">[</span>INFO] <span class="o">[</span>CoreWorkflow<span class="nv">$]</span> Updating evaluation instance with result: MetricEvaluatorResult:
+  <span class="c"># engine params evaluated: 3</span>
+Optimal Engine Params:
+  <span class="o">{</span>
+  <span class="s2">"dataSourceParams"</span>:<span class="o">{</span>
+    <span class="s2">""</span>:<span class="o">{</span>
+      <span class="s2">"appId"</span>:19,
+      <span class="s2">"evalK"</span>:5
+    <span class="o">}</span>
+  <span class="o">}</span>,
+  <span class="s2">"preparatorParams"</span>:<span class="o">{</span>
+    <span class="s2">""</span>:<span class="o">{</span>
+
+    <span class="o">}</span>
+  <span class="o">}</span>,
+  <span class="s2">"algorithmParamsList"</span>:[
+    <span class="o">{</span>
+      <span class="s2">"naive"</span>:<span class="o">{</span>
+        <span class="s2">"lambda"</span>:10.0
+      <span class="o">}</span>
+    <span class="o">}</span>
+  <span class="o">]</span>,
+  <span class="s2">"servingParams"</span>:<span class="o">{</span>
+    <span class="s2">""</span>:<span class="o">{</span>
+
+    <span class="o">}</span>
+  <span class="o">}</span>
+<span class="o">}</span>
+Metrics:
+  org.template.classification.Accuracy: 0.9281045751633987
+The best variant params can be found <span class="k">in </span>best.json
+<span class="o">[</span>INFO] <span class="o">[</span>CoreWorkflow<span class="nv">$]</span> runEvaluation completed
+</pre></td></tr></tbody></table> </div> <p>The console prints out the evaluation metric score of each engine params, and finally pretty print the optimal engine params. Amongst the 3 engine params we evaluate, <em>lambda = 10.0</em> yields the highest accuracy score of ~0.9281.</p><h3 id='deploy-the-best-engine-parameter' class='header-anchors'>Deploy the best engine parameter</h3><p>The evaluation module also writes out the best engine parameter to disk at <code>best.json</code>. We can train and deploy this specify engine variant using the extra parameter <code>-v</code>. For example:</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
+2
+3
+4
+5
+6
+7</pre></td><td class="code"><pre><span class="gp">$ </span>pio train -v best.json
+...
+<span class="o">[</span>INFO] <span class="o">[</span>CoreWorkflow<span class="nv">$]</span> Training completed successfully.
+<span class="gp">$ </span>pio deploy -v best.json
+...
+<span class="o">[</span>INFO] <span class="o">[</span>HttpListener] Bound to localhost/127.0.0.1:8000
+<span class="o">[</span>INFO] <span class="o">[</span>MasterActor] Bind successful. Ready to serve.
+</pre></td></tr></tbody></table> </div> <p>At this point, we have successfully deployed the best engine variant we found through the evaluation process.</p><h2 id='detailed-explanation' class='header-anchors'>Detailed Explanation</h2><p>An engine often depends on a number of parameters, for example, the naive bayesian classification algorithm has a smoothing parameter to make the model more adaptive to unseen data. Compared with parameters which are <em>learnt</em> by the machine learning algorithm, this smoothing parameter <em>teaches</em> the algorithm how to work. Therefore, such parameters are usually called <em>hyperparameters</em>.</p><p>In PredictionIO, we always take a holistic view of an engine. An engine is comprised of a set of <strong><em>DAS</em></strong> controllers, as well as the necessary parameters for the controllers themselves. In the evaluation, we attempt to find out the best hyperparameters for an <em>engine</em>, which we call <strong><em>engine params</em></
 strong>. Using engine params we can deploy a complete engine.</p><p>This section demonstrates how to select the optimal engine params whilst ensuring the model doesn&#39;t overfit using PredictionIO&#39;s evaluation module.</p><h2 id='the-evaluation-design' class='header-anchors'>The Evaluation Design</h2><p>The PredictionIO evaluation module tests for the best engine params for an engine.</p><p>Given a set of engine params, we instantiate an engine and evaluate it with existing data. The data is split into two sets, a training set and a validation set. The training set is used to train the engine, which is deployed using the same steps described in earlier sections. We query the engine with the test set data, and compare the predicted values in the response with the actual data contained in the validation set. We define a <strong><em>metric</em></strong> to compare <strong><em>predicted result</em></strong> returned from the engine with the <strong><em>actual result</em></strong> w
 hich we obtained from the test data. The goal is to maximize the metric score.</p><p>This process is repeated many times with a series of engine params. At the end, PredictionIO returns the best engine params.</p><p>We demonstrate the evaluation with <a href="/templates/classification/quickstart/">the classification template</a>.</p><h2 id='evaluation-data-generation' class='header-anchors'>Evaluation Data Generation</h2><p>In evaluation data generation, the goal is to generate a sequence of (training, validation) data tuple. A common way is to use a <em>k-fold</em> generation process. The data set is split into <em>k folds</em>. We generate k tuples of training and validation sets, for each tuple, the training set takes <em>k - 1</em> of the folds and the validation set takes the remaining fold.</p><p>To enable evaluation data generation, we need to define the <strong><em>actual result</em></strong> and implement the method for generating the (training, validation) data tuple.</p><
 h3 id='actual-result' class='header-anchors'>Actual Result</h3><p>In MyClassification/src/main/scala/<strong><em>Engine.scala</em></strong>, the <code>ActualResult</code> class defines the <strong><em>actual result</em></strong>:</p><div class="highlight scala"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
+2
+3</pre></td><td class="code"><pre><span class="k">class</span> <span class="nc">ActualResult</span><span class="o">(</span>
+  <span class="k">val</span> <span class="n">label</span><span class="k">:</span> <span class="kt">Double</span>
+<span class="o">)</span> <span class="k">extends</span> <span class="nc">Serializable</span>
+</pre></td></tr></tbody></table> </div> <p>This class is used to store the actual label of the data (contrast to <code>PredictedResult</code> which is output of the engine).</p><h3 id='implement-data-generation-method-in-datasource' class='header-anchors'>Implement Data Generation Method in DataSource</h3><p>In MyClassification/src/main/scala/<strong><em>DataSource.scala</em></strong>, the method <code>readEval</code> reads and selects data from datastore and returns a sequence of (training, validation) data.</p><div class="highlight scala"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60</pre></td><td class="code"><pre><span class="k">class</span> <span class="nc">DataSource</span><span class="o">(</span><span class="k">val</span> <span class="n">dsp</span><span class="k">:</span> <span class="kt">DataSourceParams</span><span class="o">)</span>
+  <span class="k">extends</span> <span class="nc">PDataSource</span><span class="o">[</span><span class="kt">TrainingData</span>, <span class="kt">EmptyEvaluationInfo</span>, <span class="kt">Query</span>, <span class="kt">ActualResult</span><span class="o">]</span> <span class="o">{</span>
+
+  <span class="o">...</span>
+
+  <span class="k">override</span>
+  <span class="k">def</span> <span class="n">readEval</span><span class="o">(</span><span class="n">sc</span><span class="k">:</span> <span class="kt">SparkContext</span><span class="o">)</span>
+  <span class="k">:</span> <span class="kt">Seq</span><span class="o">[(</span><span class="kt">TrainingData</span>, <span class="kt">EmptyEvaluationInfo</span>, <span class="kt">RDD</span><span class="o">[(</span><span class="kt">Query</span>, <span class="kt">ActualResult</span><span class="o">)])]</span> <span class="k">=</span> <span class="o">{</span>
+    <span class="n">require</span><span class="o">(!</span><span class="n">dsp</span><span class="o">.</span><span class="n">evalK</span><span class="o">.</span><span class="n">isEmpty</span><span class="o">,</span> <span class="s">"DataSourceParams.evalK must not be None"</span><span class="o">)</span>
+
+    <span class="c1">// The following code reads the data from data store. It is equivalent to
+</span>    <span class="c1">// the readTraining method. We copy-and-paste the exact code here for
+</span>    <span class="c1">// illustration purpose, a recommended approach is to factor out this logic
+</span>    <span class="c1">// into a helper function and have both readTraining and readEval call the
+</span>    <span class="c1">// helper.
+</span>    <span class="k">val</span> <span class="n">eventsDb</span> <span class="k">=</span> <span class="nc">Storage</span><span class="o">.</span><span class="n">getPEvents</span><span class="o">()</span>
+    <span class="k">val</span> <span class="n">labeledPoints</span><span class="k">:</span> <span class="kt">RDD</span><span class="o">[</span><span class="kt">LabeledPoint</span><span class="o">]</span> <span class="k">=</span> <span class="n">eventsDb</span><span class="o">.</span><span class="n">aggregateProperties</span><span class="o">(</span>
+      <span class="n">appId</span> <span class="k">=</span> <span class="n">dsp</span><span class="o">.</span><span class="n">appId</span><span class="o">,</span>
+      <span class="n">entityType</span> <span class="k">=</span> <span class="s">"user"</span><span class="o">,</span>
+      <span class="c1">// only keep entities with these required properties defined
+</span>      <span class="n">required</span> <span class="k">=</span> <span class="nc">Some</span><span class="o">(</span><span class="nc">List</span><span class="o">(</span><span class="s">"plan"</span><span class="o">,</span> <span class="s">"attr0"</span><span class="o">,</span> <span class="s">"attr1"</span><span class="o">,</span> <span class="s">"attr2"</span><span class="o">)))(</span><span class="n">sc</span><span class="o">)</span>
+      <span class="c1">// aggregateProperties() returns RDD pair of
+</span>      <span class="c1">// entity ID and its aggregated properties
+</span>      <span class="o">.</span><span class="n">map</span> <span class="o">{</span> <span class="k">case</span> <span class="o">(</span><span class="n">entityId</span><span class="o">,</span> <span class="n">properties</span><span class="o">)</span> <span class="k">=&gt;</span>
+        <span class="k">try</span> <span class="o">{</span>
+          <span class="nc">LabeledPoint</span><span class="o">(</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="o">[</span><span class="kt">Double</span><span class="o">](</span><span class="s">"plan"</span><span class="o">),</span>
+            <span class="nc">Vectors</span><span class="o">.</span><span class="n">dense</span><span class="o">(</span><span class="nc">Array</span><span class="o">(</span>
+              <span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="o">[</span><span class="kt">Double</span><span class="o">](</span><span class="s">"attr0"</span><span class="o">),</span>
+              <span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="o">[</span><span class="kt">Double</span><span class="o">](</span><span class="s">"attr1"</span><span class="o">),</span>
+              <span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="o">[</span><span class="kt">Double</span><span class="o">](</span><span class="s">"attr2"</span><span class="o">)</span>
+            <span class="o">))</span>
+          <span class="o">)</span>
+        <span class="o">}</span> <span class="k">catch</span> <span class="o">{</span>
+          <span class="k">case</span> <span class="n">e</span><span class="k">:</span> <span class="kt">Exception</span> <span class="o">=&gt;</span> <span class="o">{</span>
+            <span class="n">logger</span><span class="o">.</span><span class="n">error</span><span class="o">(</span><span class="n">s</span><span class="s">"Failed to get properties ${properties} of"</span> <span class="o">+</span>
+              <span class="n">s</span><span class="s">" ${entityId}. Exception: ${e}."</span><span class="o">)</span>
+            <span class="k">throw</span> <span class="n">e</span>
+          <span class="o">}</span>
+        <span class="o">}</span>
+      <span class="o">}.</span><span class="n">cache</span><span class="o">()</span>
+    <span class="c1">// End of reading from data store
+</span>
+    <span class="c1">// K-fold splitting
+</span>    <span class="k">val</span> <span class="n">evalK</span> <span class="k">=</span> <span class="n">dsp</span><span class="o">.</span><span class="n">evalK</span><span class="o">.</span><span class="n">get</span>
+    <span class="k">val</span> <span class="n">indexedPoints</span><span class="k">:</span> <span class="kt">RDD</span><span class="o">[(</span><span class="kt">LabeledPoint</span>, <span class="kt">Long</span><span class="o">)]</span> <span class="k">=</span> <span class="n">labeledPoints</span><span class="o">.</span><span class="n">zipWithIndex</span>
+
+    <span class="o">(</span><span class="mi">0</span> <span class="n">until</span> <span class="n">evalK</span><span class="o">).</span><span class="n">map</span> <span class="o">{</span> <span class="n">idx</span> <span class="k">=&gt;</span> 
+      <span class="k">val</span> <span class="n">trainingPoints</span> <span class="k">=</span> <span class="n">indexedPoints</span><span class="o">.</span><span class="n">filter</span><span class="o">(</span><span class="k">_</span><span class="o">.</span><span class="n">_2</span> <span class="o">%</span> <span class="n">evalK</span> <span class="o">!=</span> <span class="n">idx</span><span class="o">).</span><span class="n">map</span><span class="o">(</span><span class="k">_</span><span class="o">.</span><span class="n">_1</span><span class="o">)</span>
+      <span class="k">val</span> <span class="n">testingPoints</span> <span class="k">=</span> <span class="n">indexedPoints</span><span class="o">.</span><span class="n">filter</span><span class="o">(</span><span class="k">_</span><span class="o">.</span><span class="n">_2</span> <span class="o">%</span> <span class="n">evalK</span> <span class="o">==</span> <span class="n">idx</span><span class="o">).</span><span class="n">map</span><span class="o">(</span><span class="k">_</span><span class="o">.</span><span class="n">_1</span><span class="o">)</span>
+
+      <span class="o">(</span>
+        <span class="k">new</span> <span class="nc">TrainingData</span><span class="o">(</span><span class="n">trainingPoints</span><span class="o">),</span>
+        <span class="k">new</span> <span class="nc">EmptyEvaluationInfo</span><span class="o">(),</span>
+        <span class="n">testingPoints</span><span class="o">.</span><span class="n">map</span> <span class="o">{</span> 
+          <span class="n">p</span> <span class="k">=&gt;</span> <span class="o">(</span><span class="k">new</span> <span class="nc">Query</span><span class="o">(</span><span class="n">p</span><span class="o">.</span><span class="n">features</span><span class="o">.</span><span class="n">toArray</span><span class="o">),</span> <span class="k">new</span> <span class="nc">ActualResult</span><span class="o">(</span><span class="n">p</span><span class="o">.</span><span class="n">label</span><span class="o">))</span> 
+        <span class="o">}</span>
+      <span class="o">)</span>
+    <span class="o">}</span>
+  <span class="o">}</span>
+<span class="o">}</span>
+</pre></td></tr></tbody></table> </div> <p>The <code>readEval</code> method returns a sequence of (<code>TrainingData</code>, <code>EvaluationInfo</code>, <code>RDD[(Query, ActualResult)]</code>. <code>TrainingData</code> is the same class we use for deploy, <code>RDD[(Query, ActualResult)]</code> is the validation set, <code>EvaluationInfo</code> can be used to hold some global evaluation data ; it is not used in the current example.</p><p>Lines 11 to 41 is the logic of reading and transforming data from the datastore; it is equvialent to the existing <code>readTraining</code> method. After line 41, the variable <code>labeledPoints</code> contains the complete dataset with which we use to generate the (training, validation) sequence.</p><p>Lines 43 to 57 is the <em>k-fold</em> logic. Line 45 gives each data point a unique id, and we decide whether the point belongs to the training or validation set depends on the <em>mod</em> of the id (lines 48 to 49). For each point in the valida
 tion set, we construct the <code>Query</code> and <code>ActualResult</code> (line 55) which is used validate the engine.</p><h2 id='evaluation-metrics' class='header-anchors'>Evaluation Metrics</h2><p>We define a <code>Metric</code> which gives a <em>score</em> to engine params. The higher the score, the better the engine params are. In this template, we use accuray score which measures the portion of correct prediction among all data points.</p><p>In MyClassification/src/main/scala/<strong>Evaluation.scala</strong>, the class <code>Accuracy</code> implements the <em>accuracy</em> score. It extends a base helper class <code>AverageMetric</code> which calculates the average score overall <em>(Query, PredictionResult, ActualResult)</em> tuple.</p><div class="highlight scala"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
+2
+3
+4
+5</pre></td><td class="code"><pre><span class="k">case</span> <span class="k">class</span> <span class="nc">Accuracy</span>
+  <span class="k">extends</span> <span class="nc">AverageMetric</span><span class="o">[</span><span class="kt">EmptyEvaluationInfo</span>, <span class="kt">Query</span>, <span class="kt">PredictedResult</span>, <span class="kt">ActualResult</span><span class="o">]</span> <span class="o">{</span>
+  <span class="k">def</span> <span class="n">calculate</span><span class="o">(</span><span class="n">query</span><span class="k">:</span> <span class="kt">Query</span><span class="o">,</span> <span class="n">predicted</span><span class="k">:</span> <span class="kt">PredictedResult</span><span class="o">,</span> <span class="n">actual</span><span class="k">:</span> <span class="kt">ActualResult</span><span class="o">)</span>
+  <span class="k">:</span> <span class="kt">Double</span> <span class="o">=</span> <span class="o">(</span><span class="k">if</span> <span class="o">(</span><span class="n">predicted</span><span class="o">.</span><span class="n">label</span> <span class="o">==</span> <span class="n">actual</span><span class="o">.</span><span class="n">label</span><span class="o">)</span> <span class="mf">1.0</span> <span class="k">else</span> <span class="mf">0.0</span><span class="o">)</span>
+<span class="o">}</span>
+</pre></td></tr></tbody></table> </div> <p>Then, implement a <code>Evaluation</code> object to define the engine and metric used in this evaluation.</p><div class="highlight scala"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
+2
+3</pre></td><td class="code"><pre><span class="k">object</span> <span class="nc">AccuracyEvaluation</span> <span class="k">extends</span> <span class="nc">Evaluation</span> <span class="o">{</span>
+  <span class="n">engineMetric</span> <span class="k">=</span> <span class="o">(</span><span class="nc">ClassificationEngine</span><span class="o">(),</span> <span class="k">new</span> <span class="nc">Accuracy</span><span class="o">())</span>
+<span class="o">}</span>
+</pre></td></tr></tbody></table> </div> <h2 id='parameters-generation' class='header-anchors'>Parameters Generation</h2><p>The last component is to specify the list of engine params we want to evaluate. In this guide, we discuss the simplest method. We specify an explicit list of engine params to be evaluated. </p><p>In MyClassification/src/main/scala/<strong>Evaluation.scala</strong>, the object <code>EngineParamsList</code> specifies the engine params list to be used.</p><div class="highlight scala"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17</pre></td><td class="code"><pre><span class="k">object</span> <span class="nc">EngineParamsList</span> <span class="k">extends</span> <span class="nc">EngineParamsGenerator</span> <span class="o">{</span>
+  <span class="c1">// Define list of EngineParams used in Evaluation
+</span>
+  <span class="c1">// First, we define the base engine params. It specifies the appId from which
+</span>  <span class="c1">// the data is read, and a evalK parameter is used to define the
+</span>  <span class="c1">// cross-validation.
+</span>  <span class="k">private</span><span class="o">[</span><span class="kt">this</span><span class="o">]</span> <span class="k">val</span> <span class="n">baseEP</span> <span class="k">=</span> <span class="nc">EngineParams</span><span class="o">(</span>
+    <span class="n">dataSourceParams</span> <span class="k">=</span> <span class="nc">DataSourceParams</span><span class="o">(</span><span class="n">appId</span> <span class="k">=</span> <span class="mi">18</span><span class="o">,</span> <span class="n">evalK</span> <span class="k">=</span> <span class="nc">Some</span><span class="o">(</span><span class="mi">5</span><span class="o">)))</span>
+
+  <span class="c1">// Second, we specify the engine params list by explicitly listing all
+</span>  <span class="c1">// algorithm parameters. In this case, we evaluate 3 engine params, each with
+</span>  <span class="c1">// a different algorithm params value.
+</span>  <span class="n">engineParamsList</span> <span class="k">=</span> <span class="nc">Seq</span><span class="o">(</span>
+    <span class="n">baseEP</span><span class="o">.</span><span class="n">copy</span><span class="o">(</span><span class="n">algorithmParamsList</span> <span class="k">=</span> <span class="nc">Seq</span><span class="o">((</span><span class="s">"naive"</span><span class="o">,</span> <span class="nc">AlgorithmParams</span><span class="o">(</span><span class="mf">10.0</span><span class="o">)))),</span>
+    <span class="n">baseEP</span><span class="o">.</span><span class="n">copy</span><span class="o">(</span><span class="n">algorithmParamsList</span> <span class="k">=</span> <span class="nc">Seq</span><span class="o">((</span><span class="s">"naive"</span><span class="o">,</span> <span class="nc">AlgorithmParams</span><span class="o">(</span><span class="mf">100.0</span><span class="o">)))),</span>
+    <span class="n">baseEP</span><span class="o">.</span><span class="n">copy</span><span class="o">(</span><span class="n">algorithmParamsList</span> <span class="k">=</span> <span class="nc">Seq</span><span class="o">((</span><span class="s">"naive"</span><span class="o">,</span> <span class="nc">AlgorithmParams</span><span class="o">(</span><span class="mf">1000.0</span><span class="o">)))))</span>
+<span class="o">}</span>
+</pre></td></tr></tbody></table> </div> <p>A good practise is to first define a base engine params, it contains the common parameters used in all evaluations (lines 7 to 8). With the base params, we construct the list of engine params we want to evaluation by adding or replacing the controller parameter. Lines 13 to 16 generate 3 engine parameters, each has a different smoothing parameters.</p><h2 id='running-the-evaluation' class='header-anchors'>Running the Evaluation</h2><p>It remains to run the evaluation. Let&#39;s recap the quick start section above. The <code>pio eval</code> command kick starts the evaluation, and the result can be seen from the console.</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
+2
+3
+4</pre></td><td class="code"><pre><span class="gp">$ </span>pio build
+...
+<span class="gp">$ </span>pio <span class="nb">eval </span>org.template.classification.AccuracyEvaluation <span class="se">\</span>
+    org.template.classification.EngineParamsList 
+</pre></td></tr></tbody></table> </div> <p>You will see the following output:</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45</pre></td><td class="code"><pre>...
+<span class="o">[</span>INFO] <span class="o">[</span>CoreWorkflow<span class="nv">$]</span> runEvaluation started
+...
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] Iteration 0
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] EngineParams: <span class="o">{</span><span class="s2">"dataSourceParams"</span>:<span class="o">{</span><span class="s2">""</span>:<span class="o">{</span><span class="s2">"appId"</span>:19,<span class="s2">"evalK"</span>:5<span class="o">}}</span>,<span class="s2">"preparatorParams"</span>:<span class="o">{</span><span class="s2">""</span>:<span class="o">{}}</span>,<span class="s2">"algorithmParamsList"</span>:[<span class="o">{</span><span class="s2">"naive"</span>:<span class="o">{</span><span class="s2">"lambda"</span>:10.0<span class="o">}}]</span>,<span class="s2">"servingParams"</span>:<span class="o">{</span><span class="s2">""</span>:<span class="o">{}}}</span>
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] Result: MetricScores<span class="o">(</span>0.9281045751633987,List<span class="o">())</span>
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] Iteration 1
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] EngineParams: <span class="o">{</span><span class="s2">"dataSourceParams"</span>:<span class="o">{</span><span class="s2">""</span>:<span class="o">{</span><span class="s2">"appId"</span>:19,<span class="s2">"evalK"</span>:5<span class="o">}}</span>,<span class="s2">"preparatorParams"</span>:<span class="o">{</span><span class="s2">""</span>:<span class="o">{}}</span>,<span class="s2">"algorithmParamsList"</span>:[<span class="o">{</span><span class="s2">"naive"</span>:<span class="o">{</span><span class="s2">"lambda"</span>:100.0<span class="o">}}]</span>,<span class="s2">"servingParams"</span>:<span class="o">{</span><span class="s2">""</span>:<span class="o">{}}}</span>
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] Result: MetricScores<span class="o">(</span>0.9150326797385621,List<span class="o">())</span>
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] Iteration 2
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] EngineParams: <span class="o">{</span><span class="s2">"dataSourceParams"</span>:<span class="o">{</span><span class="s2">""</span>:<span class="o">{</span><span class="s2">"appId"</span>:19,<span class="s2">"evalK"</span>:5<span class="o">}}</span>,<span class="s2">"preparatorParams"</span>:<span class="o">{</span><span class="s2">""</span>:<span class="o">{}}</span>,<span class="s2">"algorithmParamsList"</span>:[<span class="o">{</span><span class="s2">"naive"</span>:<span class="o">{</span><span class="s2">"lambda"</span>:1000.0<span class="o">}}]</span>,<span class="s2">"servingParams"</span>:<span class="o">{</span><span class="s2">""</span>:<span class="o">{}}}</span>
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] Result: MetricScores<span class="o">(</span>0.4444444444444444,List<span class="o">())</span>
+<span class="o">[</span>INFO] <span class="o">[</span>MetricEvaluator] Writing best variant params to disk...
+<span class="o">[</span>INFO] <span class="o">[</span>CoreWorkflow<span class="nv">$]</span> Updating evaluation instance with result: MetricEvaluatorResult:
+  <span class="c"># engine params evaluated: 3</span>
+Optimal Engine Params:
+  <span class="o">{</span>
+  <span class="s2">"dataSourceParams"</span>:<span class="o">{</span>
+    <span class="s2">""</span>:<span class="o">{</span>
+      <span class="s2">"appId"</span>:19,
+      <span class="s2">"evalK"</span>:5
+    <span class="o">}</span>
+  <span class="o">}</span>,
+  <span class="s2">"preparatorParams"</span>:<span class="o">{</span>
+    <span class="s2">""</span>:<span class="o">{</span>
+
+    <span class="o">}</span>
+  <span class="o">}</span>,
+  <span class="s2">"algorithmParamsList"</span>:[
+    <span class="o">{</span>
+      <span class="s2">"naive"</span>:<span class="o">{</span>
+        <span class="s2">"lambda"</span>:10.0
+      <span class="o">}</span>
+    <span class="o">}</span>
+  <span class="o">]</span>,
+  <span class="s2">"servingParams"</span>:<span class="o">{</span>
+    <span class="s2">""</span>:<span class="o">{</span>
+
+    <span class="o">}</span>
+  <span class="o">}</span>
+<span class="o">}</span>
+Metrics:
+  org.template.classification.Accuracy: 0.9281045751633987
+The best variant params can be found <span class="k">in </span>best.json
+<span class="o">[</span>INFO] <span class="o">[</span>CoreWorkflow<span class="nv">$]</span> runEvaluation completed
+</pre></td></tr></tbody></table> </div> <h2 id='notes' class='header-anchors'>Notes</h2> <ul> <li>We deliberately not metion <strong><em>test set</em></strong> in this hyperparameter tuning guide. In machine learning literature, the <strong><em>test set</em></strong> is a separate piece of data which is used to evaluate the final engine params outputted by the evaluation process. This guarantees that no information in the training / validation set is <em>leaked</em> into the engine params and yields a biased outcome. With PredictionIO, there are multiple ways of conducting robust tuning, we will cover this topic in the coming sections.</li> </ul> </div></div></div></div><footer><div class="container"><div class="seperator"></div><div class="row"><div class="col-md-6 col-xs-6 footer-link-column"><div class="footer-link-column-row"><h4>Community</h4><ul><li><a href="//docs.prediction.io/install/" target="blank">Download</a></li><li><a href="//docs.prediction.io/" target="blank">Docs</
 a></li><li><a href="//github.com/apache/incubator-predictionio" target="blank">GitHub</a></li><li><a href="mailto:user-subscribe@predictionio.incubator.apache.org" target="blank">Subscribe to User Mailing List</a></li><li><a href="//stackoverflow.com/questions/tagged/predictionio" target="blank">Stackoverflow</a></li></ul></div></div><div class="col-md-6 col-xs-6 footer-link-column"><div class="footer-link-column-row"><h4>Contribute</h4><ul><li><a href="//predictionio.incubator.apache.org/community/contribute-code/" target="blank">Contribute</a></li><li><a href="//github.com/apache/incubator-predictionio" target="blank">Source Code</a></li><li><a href="//issues.apache.org/jira/browse/PIO" target="blank">Bug Tracker</a></li><li><a href="mailto:dev-subscribe@predictionio.incubator.apache.org" target="blank">Subscribe to Development Mailing List</a></li></ul></div></div></div></div><div id="footer-bottom"><div class="container"><div class="row"><div class="col-md-12"><div id="footer-lo
 go-wrapper"><img alt="PredictionIO" src="/images/logos/logo-white-d1e9c6e6.png"/></div><div id="social-icons-wrapper"><a class="github-button" href="https://github.com/apache/incubator-predictionio" data-style="mega" data-count-href="/apache/incubator-predictionio/stargazers" data-count-api="/repos/apache/incubator-predictionio#stargazers_count" data-count-aria-label="# stargazers on GitHub" aria-label="Star apache/incubator-predictionio on GitHub">Star</a> <a class="github-button" href="https://github.com/apache/incubator-predictionio/fork" data-icon="octicon-git-branch" data-style="mega" data-count-href="/apache/incubator-predictionio/network" data-count-api="/repos/apache/incubator-predictionio#forks_count" data-count-aria-label="# forks on GitHub" aria-label="Fork apache/incubator-predictionio on GitHub">Fork</a> <script id="github-bjs" async="" defer="" src="https://buttons.github.io/buttons.js"></script><a href="//www.facebook.com/predictionio" target="blank"><img alt="Predict
 ionIO on Twitter" src="/images/icons/twitter-ea9dc152.png"/></a> <a href="//twitter.com/predictionio" target="blank"><img alt="PredictionIO on Facebook" src="/images/icons/facebook-5c57939c.png"/></a> </div></div></div></div></div></footer></div><script>(function(w,d,t,u,n,s,e){w['SwiftypeObject']=n;w[n]=w[n]||function(){
+(w[n].q=w[n].q||[]).push(arguments);};s=d.createElement(t);
+e=d.getElementsByTagName(t)[0];s.async=1;s.src=u;e.parentNode.insertBefore(s,e);
+})(window,document,'script','//s.swiftypecdn.com/install/v1/st.js','_st');
+
+_st('install','HaUfpXXV87xoB_zzCQ45');</script><script src="/javascripts/application-280db181.js"></script></body></html>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/64c98d37/evaluation/paramtuning/index.html.gz
----------------------------------------------------------------------
diff --git a/evaluation/paramtuning/index.html.gz b/evaluation/paramtuning/index.html.gz
new file mode 100644
index 0000000..35bf461
Binary files /dev/null and b/evaluation/paramtuning/index.html.gz differ

http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/64c98d37/favicon.ico
----------------------------------------------------------------------
diff --git a/favicon.ico b/favicon.ico
new file mode 100644
index 0000000..92d6430
Binary files /dev/null and b/favicon.ico differ

http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/64c98d37/fonts/bootstrap/glyphicons-halflings-regular-278e49a8.woff
----------------------------------------------------------------------
diff --git a/fonts/bootstrap/glyphicons-halflings-regular-278e49a8.woff b/fonts/bootstrap/glyphicons-halflings-regular-278e49a8.woff
new file mode 100644
index 0000000..9e61285
Binary files /dev/null and b/fonts/bootstrap/glyphicons-halflings-regular-278e49a8.woff differ

http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/64c98d37/fonts/bootstrap/glyphicons-halflings-regular-44bc1850.ttf
----------------------------------------------------------------------
diff --git a/fonts/bootstrap/glyphicons-halflings-regular-44bc1850.ttf b/fonts/bootstrap/glyphicons-halflings-regular-44bc1850.ttf
new file mode 100644
index 0000000..1413fc6
Binary files /dev/null and b/fonts/bootstrap/glyphicons-halflings-regular-44bc1850.ttf differ

http://git-wip-us.apache.org/repos/asf/incubator-predictionio-site/blob/64c98d37/fonts/bootstrap/glyphicons-halflings-regular-86b6f62b.eot
----------------------------------------------------------------------
diff --git a/fonts/bootstrap/glyphicons-halflings-regular-86b6f62b.eot b/fonts/bootstrap/glyphicons-halflings-regular-86b6f62b.eot
new file mode 100644
index 0000000..b93a495
Binary files /dev/null and b/fonts/bootstrap/glyphicons-halflings-regular-86b6f62b.eot differ