You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@predictionio.apache.org by gi...@apache.org on 2018/08/11 00:24:09 UTC
[10/11] predictionio-site git commit: Documentation based on apache/predictionio#54415e1066ae2d646eef62ebfdf801ace1de2097

http://git-wip-us.apache.org/repos/asf/predictionio-site/blob/c17b9607/evaluation/metricchoose/index.html
----------------------------------------------------------------------
diff --git a/evaluation/metricchoose/index.html b/evaluation/metricchoose/index.html
index 69c04ba..3b8707c 100644
--- a/evaluation/metricchoose/index.html
+++ b/evaluation/metricchoose/index.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><title>Choosing Evaluation Metrics</title><meta charset="utf-8"/><meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><meta class="swiftype" name="title" data-type="string" content="Choosing Evaluation Metrics"/><link rel="canonical" href="https://predictionio.apache.org/evaluation/metricchoose/"/><link href="/images/favicon/normal-b330020a.png" rel="shortcut icon"/><link href="/images/favicon/apple-c0febcf2.png" rel="apple-touch-icon"/><link href="//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800" rel="stylesheet"/><link href="//maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" rel="stylesheet"/><link href="/stylesheets/application-eccfc6cb.css" rel="stylesheet" type="text/css"/><script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.2/html5shiv.min.js"></script><script src="//cdn.mathjax.o
 rg/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script><script src="//use.typekit.net/pqo0itb.js"></script><script>try{Typekit.load({ async: true });}catch(e){}</script></head><body><div id="global"><header><div class="container" id="header-wrapper"><div class="row"><div class="col-sm-12"><div id="logo-wrapper"><span id="drawer-toggle"></span><a href="#"></a><a href="http://predictionio.apache.org/"><img alt="Apache PredictionIO" id="logo" src="/images/logos/logo-ee2b9bb3.png"/></a><span>®</span></div><div id="menu-wrapper"><div id="pill-wrapper"><a class="pill left" href="/gallery/template-gallery">TEMPLATES</a> <a class="pill right" href="//github.com/apache/predictionio/">OPEN SOURCE</a></div></div><img class="mobile-search-bar-toggler hidden-md hidden-lg" src="/images/icons/search-glass-704bd4ff.png"/></div></div></div></header><div id="search-bar-row-wrapper"><div class="container-fluid" id="search-bar-row"><div class="row"><div class="col-md-9 col-sm-11 col-xs-11
 "><div class="hidden-md hidden-lg" id="mobile-page-heading-wrapper"><p>PredictionIO Docs</p><h4>Choosing Evaluation Metrics</h4></div><h4 class="hidden-sm hidden-xs">PredictionIO Docs</h4></div><div class="col-md-3 col-sm-1 col-xs-1 hidden-md hidden-lg"><img id="left-menu-indicator" src="/images/icons/down-arrow-dfe9f7fe.png"/></div><div class="col-md-3 col-sm-12 col-xs-12 swiftype-wrapper"><div class="swiftype"><form class="search-form"><img class="search-box-toggler hidden-xs hidden-sm" src="/images/icons/search-glass-704bd4ff.png"/><div class="search-box"><img src="/images/icons/search-glass-704bd4ff.png"/><input type="text" id="st-search-input" class="st-search-input" placeholder="Search Doc..."/></div><img class="swiftype-row-hider hidden-md hidden-lg" src="/images/icons/drawer-toggle-active-fcbef12a.png"/></form></div></div><div class="mobile-left-menu-toggler hidden-md hidden-lg"></div></div></div></div><div id="page" class="container-fluid"><div class="row"><div id="left-men
 u-wrapper" class="col-md-3"><nav id="nav-main"><ul><li class="level-1"><a class="expandible" href="/"><span>Apache PredictionIO® Documentation</span></a><ul><li class="level-2"><a class="final" href="/"><span>Welcome to Apache PredictionIO®</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Started</span></a><ul><li class="level-2"><a class="final" href="/start/"><span>A Quick Intro</span></a></li><li class="level-2"><a class="final" href="/install/"><span>Installing Apache PredictionIO</span></a></li><li class="level-2"><a class="final" href="/start/download/"><span>Downloading an Engine Template</span></a></li><li class="level-2"><a class="final" href="/start/deploy/"><span>Deploying Your First Engine</span></a></li><li class="level-2"><a class="final" href="/start/customize/"><span>Customizing the Engine</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Integrating with Your App</span></a><ul><li class="leve
 l-2"><a class="final" href="/appintegration/"><span>App Integration Overview</span></a></li><li class="level-2"><a class="expandible" href="/sdk/"><span>List of SDKs</span></a><ul><li class="level-3"><a class="final" href="/sdk/java/"><span>Java & Android SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/php/"><span>PHP SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/python/"><span>Python SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/ruby/"><span>Ruby SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/community/"><span>Community Powered SDKs</span></a></li></ul></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Deploying an Engine</span></a><ul><li class="level-2"><a class="final" href="/deploy/"><span>Deploying as a Web Service</span></a></li><li class="level-2"><a class="final" href="/batchpredict/"><span>Batch Predictions</span></a></li><li class="level-2"><a class="final" href="/deploy/
 monitoring/"><span>Monitoring Engine</span></a></li><li class="level-2"><a class="final" href="/deploy/engineparams/"><span>Setting Engine Parameters</span></a></li><li class="level-2"><a class="final" href="/deploy/enginevariants/"><span>Deploying Multiple Engine Variants</span></a></li><li class="level-2"><a class="final" href="/deploy/plugin/"><span>Engine Server Plugin</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Customizing an Engine</span></a><ul><li class="level-2"><a class="final" href="/customize/"><span>Learning DASE</span></a></li><li class="level-2"><a class="final" href="/customize/dase/"><span>Implement DASE</span></a></li><li class="level-2"><a class="final" href="/customize/troubleshooting/"><span>Troubleshooting Engine Development</span></a></li><li class="level-2"><a class="final" href="/api/current/#package"><span>Engine Scala APIs</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Collecting and 
 Analyzing Data</span></a><ul><li class="level-2"><a class="final" href="/datacollection/"><span>Event Server Overview</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventapi/"><span>Collecting Data with REST/SDKs</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventmodel/"><span>Events Modeling</span></a></li><li class="level-2"><a class="final" href="/datacollection/webhooks/"><span>Unifying Multichannel Data with Webhooks</span></a></li><li class="level-2"><a class="final" href="/datacollection/channel/"><span>Channel</span></a></li><li class="level-2"><a class="final" href="/datacollection/batchimport/"><span>Importing Data in Batch</span></a></li><li class="level-2"><a class="final" href="/datacollection/analytics/"><span>Using Analytics Tools</span></a></li><li class="level-2"><a class="final" href="/datacollection/plugin/"><span>Event Server Plugin</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><
 span>Choosing an Algorithm(s)</span></a><ul><li class="level-2"><a class="final" href="/algorithm/"><span>Built-in Algorithm Libraries</span></a></li><li class="level-2"><a class="final" href="/algorithm/switch/"><span>Switching to Another Algorithm</span></a></li><li class="level-2"><a class="final" href="/algorithm/multiple/"><span>Combining Multiple Algorithms</span></a></li><li class="level-2"><a class="final" href="/algorithm/custom/"><span>Adding Your Own Algorithms</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>ML Tuning and Evaluation</span></a><ul><li class="level-2"><a class="final" href="/evaluation/"><span>Overview</span></a></li><li class="level-2"><a class="final" href="/evaluation/paramtuning/"><span>Hyperparameter Tuning</span></a></li><li class="level-2"><a class="final" href="/evaluation/evaluationdashboard/"><span>Evaluation Dashboard</span></a></li><li class="level-2"><a class="final active" href="/evaluation/metricchoose/"><spa
 n>Choosing Evaluation Metrics</span></a></li><li class="level-2"><a class="final" href="/evaluation/metricbuild/"><span>Building Evaluation Metrics</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>System Architecture</span></a><ul><li class="level-2"><a class="final" href="/system/"><span>Architecture Overview</span></a></li><li class="level-2"><a class="final" href="/system/anotherdatastore/"><span>Using Another Data Store</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>PredictionIO® Official Templates</span></a><ul><li class="level-2"><a class="final" href="/templates/"><span>Intro</span></a></li><li class="level-2"><a class="expandible" href="#"><span>Recommendation</span></a><ul><li class="level-3"><a class="final" href="/templates/recommendation/quickstart/"><span>Quick Start</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/dase/"><span>DASE</span></a></li><li class="level-3">
 <a class="final" href="/templates/recommendation/evaluation/"><span>Evaluation Explained</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/how-to/"><span>How-To</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/reading-custom-events/"><span>Read Custom Events</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/customize-data-prep/"><span>Customize Data Preparator</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/customize-serving/"><span>Customize Serving</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/training-with-implicit-preference/"><span>Train with Implicit Preference</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/blacklist-items/"><span>Filter Recommended Items by Blacklist in Query</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/batch-e
 valuator/"><span>Batch Persistable Evaluator</span></a></li></ul></li><li class="level-2"><a class="expandible" href="#"><span>E-Commerce Recommendation</span></a><ul><li class="level-3"><a class="final" href="/templates/ecommercerecommendation/quickstart/"><span>Quick Start</span></a></li><li class="level-3"><a class="final" href="/templates/ecommercerecommendation/dase/"><span>DASE</span></a></li><li class="level-3"><a class="final" href="/templates/ecommercerecommendation/how-to/"><span>How-To</span></a></li><li class="level-3"><a class="final" href="/templates/ecommercerecommendation/train-with-rate-event/"><span>Train with Rate Event</span></a></li><li class="level-3"><a class="final" href="/templates/ecommercerecommendation/adjust-score/"><span>Adjust Score</span></a></li></ul></li><li class="level-2"><a class="expandible" href="#"><span>Similar Product</span></a><ul><li class="level-3"><a class="final" href="/templates/similarproduct/quickstart/"><span>Quick Start</span></a><
 /li><li class="level-3"><a class="final" href="/templates/similarproduct/dase/"><span>DASE</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/how-to/"><span>How-To</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/multi-events-multi-algos/"><span>Multiple Events and Multiple Algorithms</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/return-item-properties/"><span>Returns Item Properties</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/train-with-rate-event/"><span>Train with Rate Event</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/rid-user-set-event/"><span>Get Rid of Events for Users</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/recommended-user/"><span>Recommend Users</span></a></li></ul></li><li class="level-2"><a class="expandible" href="#"><span>Classification</span><
 /a><ul><li class="level-3"><a class="final" href="/templates/classification/quickstart/"><span>Quick Start</span></a></li><li class="level-3"><a class="final" href="/templates/classification/dase/"><span>DASE</span></a></li><li class="level-3"><a class="final" href="/templates/classification/how-to/"><span>How-To</span></a></li><li class="level-3"><a class="final" href="/templates/classification/add-algorithm/"><span>Use Alternative Algorithm</span></a></li><li class="level-3"><a class="final" href="/templates/classification/reading-custom-properties/"><span>Read Custom Properties</span></a></li></ul></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Engine Template Gallery</span></a><ul><li class="level-2"><a class="final" href="/gallery/template-gallery/"><span>Browse</span></a></li><li class="level-2"><a class="final" href="/community/submit-template/"><span>Submit your Engine as a Template</span></a></li></ul></li><li class="level-1"><a class="expandible" hr
 ef="#"><span>Demo Tutorials</span></a><ul><li class="level-2"><a class="final" href="/demo/tapster/"><span>Comics Recommendation Demo</span></a></li><li class="level-2"><a class="final" href="/demo/community/"><span>Community Contributed Demo</span></a></li><li class="level-2"><a class="final" href="/demo/textclassification/"><span>Text Classification Engine Tutorial</span></a></li></ul></li><li class="level-1"><a class="expandible" href="/community/"><span>Getting Involved</span></a><ul><li class="level-2"><a class="final" href="/community/contribute-code/"><span>Contribute Code</span></a></li><li class="level-2"><a class="final" href="/community/contribute-documentation/"><span>Contribute Documentation</span></a></li><li class="level-2"><a class="final" href="/community/contribute-sdk/"><span>Contribute a SDK</span></a></li><li class="level-2"><a class="final" href="/community/contribute-webhook/"><span>Contribute a Webhook</span></a></li><li class="level-2"><a class="final" href=
 "/community/projects/"><span>Community Projects</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Help</span></a><ul><li class="level-2"><a class="final" href="/resources/faq/"><span>FAQs</span></a></li><li class="level-2"><a class="final" href="/support/"><span>Support</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Resources</span></a><ul><li class="level-2"><a class="final" href="/cli/"><span>Command-line Interface</span></a></li><li class="level-2"><a class="final" href="/resources/release/"><span>Release Cadence</span></a></li><li class="level-2"><a class="final" href="/resources/intellij/"><span>Developing Engines with IntelliJ IDEA</span></a></li><li class="level-2"><a class="final" href="/resources/upgrade/"><span>Upgrade Instructions</span></a></li><li class="level-2"><a class="final" href="/resources/glossary/"><span>Glossary</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"
 ><span>Apache Software Foundation</span></a><ul><li class="level-2"><a class="final" href="https://www.apache.org/"><span>Apache Homepage</span></a></li><li class="level-2"><a class="final" href="https://www.apache.org/licenses/"><span>License</span></a></li><li class="level-2"><a class="final" href="https://www.apache.org/foundation/sponsorship.html"><span>Sponsorship</span></a></li><li class="level-2"><a class="final" href="https://www.apache.org/foundation/thanks.html"><span>Thanks</span></a></li><li class="level-2"><a class="final" href="https://www.apache.org/security/"><span>Security</span></a></li></ul></li></ul></nav></div><div class="col-md-9 col-sm-12"><div class="content-header hidden-md hidden-lg"><div id="breadcrumbs" class="hidden-sm hidden xs"><ul><li><a href="#">ML Tuning and Evaluation</a><span class="spacer">&gt;</span></li><li><span class="last">Choosing Evaluation Metrics</span></li></ul></div><div id="page-title"><h1>Choosing Evaluation Metrics</h1></div></div><
 div id="table-of-content-wrapper"><h5>On this page</h5><aside id="table-of-contents"><ul> <li> <a href="#defining-metric">Defining Metric</a> </li> <li> <a href="#common-metrics">Common Metrics</a> </li> </ul> </aside><hr/><a id="edit-page-link" href="https://github.com/apache/predictionio/tree/livedoc/docs/manual/source/evaluation/metricchoose.html.md"><img src="/images/icons/edit-pencil-d6c1bb3d.png"/>Edit this page</a></div><div class="content-header hidden-sm hidden-xs"><div id="breadcrumbs" class="hidden-sm hidden xs"><ul><li><a href="#">ML Tuning and Evaluation</a><span class="spacer">&gt;</span></li><li><span class="last">Choosing Evaluation Metrics</span></li></ul></div><div id="page-title"><h1>Choosing Evaluation Metrics</h1></div></div><div class="content"> <p>The <a href="/evaluation/paramtuning/">hyperparameter tuning module</a> allows us to select the optimal engine parameter defined by a <code>Metric</code>. <code>Metric</code> determines the quality of an engine varia
 nt. We have skimmmed through the process of choosing the right <code>Metric</code> in previous sections.</p><p>This secion discusses basic evaluation metrics commonly used for classification problems. If you are more interested in knowing how to <em>implement</em> a custom metric, please skip to <a href="/evaluation/metricbuild/">the next section</a>.</p><h2 id='defining-metric' class='header-anchors'>Defining Metric</h2><p>Metric evaluates the quality of an engine by comparing engine&#39;s output (predicted result) with the original label (actual result). A engine serving better prediction should yield a higher metric score, the tuning module returns the engine parameter with the highest score. It is sometimes called <a href="http://en.wikipedia.org/wiki/Loss_function"><em>loss function</em></a> in literature, where the goal is to minimize the loss function.</p><p>During tuning, it is important for us to understand the definition of the metric, to make sure it is aligned with the p
 rediction engine&#39;s goal.</p><p>In the classificaiton template, we use <em>Accuracy</em> as our metric. <em>Accuracy</em> is defined as: the percentage of queries which the engine is able to predict the correct label.</p><h2 id='common-metrics' class='header-anchors'>Common Metrics</h2><p>We illustrate the choice of metric with the following confusion matrix. Row represents the engine predicted label, column represents the acutal label. The second row means that of the 200 testing data points, the engine predicted 60 (15 + 35 + 10) of them as label 2.0, among which 35 are correct prediction (i.e. actual label is 2.0, matches with the prediction), and 25 are wrong.</p> <table><thead> <tr> <th style="text-align: center"></th> <th style="text-align: center">Actual = 1.0</th> <th style="text-align: center">Actual = 2.0</th> <th style="text-align: center">Actual = 3.0</th> </tr> </thead><tbody> <tr> <td style="text-align: center"><strong>Predicted = 1.0</strong></td> <td style="text-a
 lign: center">30</td> <td style="text-align: center">0</td> <td style="text-align: center">60</td> </tr> <tr> <td style="text-align: center"><strong>Predicted = 2.0</strong></td> <td style="text-align: center">15</td> <td style="text-align: center">35</td> <td style="text-align: center">10</td> </tr> <tr> <td style="text-align: center"><strong>Predicted = 3.0</strong></td> <td style="text-align: center">0</td> <td style="text-align: center">0</td> <td style="text-align: center">50</td> </tr> </tbody></table> <h3 id='accuracy' class='header-anchors'>Accuracy</h3><p>Accuracy means that how many data points are predicted correctly. It is one of the simplest form of evaluation metrics. The accuracy score is # of correct points / # total = (30 + 35 + 50) / 200 = 0.575.</p><h3 id='precision' class='header-anchors'>Precision</h3><p>Precision is a metric for binary classifier which measures the correctness among all positive labels. A binary classifier gives only two output values (i.e. pos
 itive and negative). For problem where there are multiple values (3 in our example), we first have to tranform our problem into a binary classification problem. For example, we can have problem whether label = 1.0. The confusion matrix now becomes:</p> <table><thead> <tr> <th style="text-align: center"></th> <th style="text-align: center">Actual = 1.0</th> <th style="text-align: center">Actual != 1.0</th> </tr> </thead><tbody> <tr> <td style="text-align: center"><strong>Predicted = 1.0</strong></td> <td style="text-align: center">30</td> <td style="text-align: center">60</td> </tr> <tr> <td style="text-align: center"><strong>Predicted != 1.0</strong></td> <td style="text-align: center">15</td> <td style="text-align: center">95</td> </tr> </tbody></table> <p>Precision is the ratio between the number of correct positive answer (true positive) and the sum of correct positive answer (true positive) and wrong but positively labeled answer (false positive). In this case, the precision is 
 30 / (30 + 60) = ~0.3333.</p><h3 id='recall' class='header-anchors'>Recall</h3><p>Recall is a metric for binary classifier which measures how many positive labels are successfully predicted amongst all positive labels. Formally, it is the ratio between the number of correct positive answer (true positive) and the sum of correct positive answer (true positive) and wrongly negatively labeled asnwer (false negative). In this case, the recall is 30 / (30 + 15) = ~0.6667.</p><p>As we have discussed several common metrics for classification problem, we can implement them using the <code>Metric</code> class in <a href="/evaluation/metricbuild">the next section</a>.</p></div></div></div></div><footer><div class="container"><div class="seperator"></div><div class="row"><div class="col-md-6 footer-link-column"><div class="footer-link-column-row"><h4>Community</h4><ul><li><a href="//predictionio.apache.org/install/" target="blank">Download</a></li><li><a href="//predictionio.apache.org/" targe
 t="blank">Docs</a></li><li><a href="//github.com/apache/predictionio" target="blank">GitHub</a></li><li><a href="mailto:user-subscribe@predictionio.apache.org" target="blank">Subscribe to User Mailing List</a></li><li><a href="//stackoverflow.com/questions/tagged/predictionio" target="blank">Stackoverflow</a></li></ul></div></div><div class="col-md-6 footer-link-column"><div class="footer-link-column-row"><h4>Contribute</h4><ul><li><a href="//predictionio.apache.org/community/contribute-code/" target="blank">Contribute</a></li><li><a href="//github.com/apache/predictionio" target="blank">Source Code</a></li><li><a href="//issues.apache.org/jira/browse/PIO" target="blank">Bug Tracker</a></li><li><a href="mailto:dev-subscribe@predictionio.apache.org" target="blank">Subscribe to Development Mailing List</a></li></ul></div></div></div><div class="row"><div class="col-md-12 footer-link-column"><p>Apache PredictionIO, PredictionIO, Apache, the Apache feather logo, and the Apache Predictio
 nIO project logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</p><p>All other marks mentioned may be trademarks or registered trademarks of their respective owners.</p></div></div></div><div id="footer-bottom"><div class="container"><div class="row"><div class="col-md-12"><div id="footer-logo-wrapper"><img alt="PredictionIO" src="/images/logos/logo-white-d1e9c6e6.png"/><span>®</span></div><div id="social-icons-wrapper"><a class="github-button" href="https://github.com/apache/predictionio" data-style="mega" data-count-href="/apache/predictionio/stargazers" data-count-api="/repos/apache/predictionio#stargazers_count" data-count-aria-label="# stargazers on GitHub" aria-label="Star apache/predictionio on GitHub">Star</a> <a class="github-button" href="https://github.com/apache/predictionio/fork" data-icon="octicon-git-branch" data-style="mega" data-count-href="/apache/predictionio/network" data-count-api="/re
 pos/apache/predictionio#forks_count" data-count-aria-label="# forks on GitHub" aria-label="Fork apache/predictionio on GitHub">Fork</a> <script id="github-bjs" async="" defer="" src="https://buttons.github.io/buttons.js"></script><a href="https://twitter.com/predictionio" target="blank"><img alt="PredictionIO on Twitter" src="/images/icons/twitter-ea9dc152.png"/></a> <a href="https://www.facebook.com/predictionio" target="blank"><img alt="PredictionIO on Facebook" src="/images/icons/facebook-5c57939c.png"/></a> </div></div></div></div></div></footer></div><script>(function(w,d,t,u,n,s,e){w['SwiftypeObject']=n;w[n]=w[n]||function(){
+<!DOCTYPE html><html><head><title>Choosing Evaluation Metrics</title><meta charset="utf-8"/><meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><meta class="swiftype" name="title" data-type="string" content="Choosing Evaluation Metrics"/><link rel="canonical" href="https://predictionio.apache.org/evaluation/metricchoose/"/><link href="/images/favicon/normal-b330020a.png" rel="shortcut icon"/><link href="/images/favicon/apple-c0febcf2.png" rel="apple-touch-icon"/><link href="//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800" rel="stylesheet"/><link href="//maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" rel="stylesheet"/><link href="/stylesheets/application-eccfc6cb.css" rel="stylesheet" type="text/css"/><script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.2/html5shiv.min.js"></script><script src="//cdn.mathjax.o
 rg/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script><script src="//use.typekit.net/pqo0itb.js"></script><script>try{Typekit.load({ async: true });}catch(e){}</script></head><body><div id="global"><header><div class="container" id="header-wrapper"><div class="row"><div class="col-sm-12"><div id="logo-wrapper"><span id="drawer-toggle"></span><a href="#"></a><a href="http://predictionio.apache.org/"><img alt="Apache PredictionIO" id="logo" src="/images/logos/logo-ee2b9bb3.png"/></a><span>®</span></div><div id="menu-wrapper"><div id="pill-wrapper"><a class="pill left" href="/gallery/template-gallery">TEMPLATES</a> <a class="pill right" href="//github.com/apache/predictionio/">OPEN SOURCE</a></div></div><img class="mobile-search-bar-toggler hidden-md hidden-lg" src="/images/icons/search-glass-704bd4ff.png"/></div></div></div></header><div id="search-bar-row-wrapper"><div class="container-fluid" id="search-bar-row"><div class="row"><div class="col-md-9 col-sm-11 col-xs-11
 "><div class="hidden-md hidden-lg" id="mobile-page-heading-wrapper"><p>PredictionIO Docs</p><h4>Choosing Evaluation Metrics</h4></div><h4 class="hidden-sm hidden-xs">PredictionIO Docs</h4></div><div class="col-md-3 col-sm-1 col-xs-1 hidden-md hidden-lg"><img id="left-menu-indicator" src="/images/icons/down-arrow-dfe9f7fe.png"/></div><div class="col-md-3 col-sm-12 col-xs-12 swiftype-wrapper"><div class="swiftype"><form class="search-form"><img class="search-box-toggler hidden-xs hidden-sm" src="/images/icons/search-glass-704bd4ff.png"/><div class="search-box"><img src="/images/icons/search-glass-704bd4ff.png"/><input type="text" id="st-search-input" class="st-search-input" placeholder="Search Doc..."/></div><img class="swiftype-row-hider hidden-md hidden-lg" src="/images/icons/drawer-toggle-active-fcbef12a.png"/></form></div></div><div class="mobile-left-menu-toggler hidden-md hidden-lg"></div></div></div></div><div id="page" class="container-fluid"><div class="row"><div id="left-men
 u-wrapper" class="col-md-3"><nav id="nav-main"><ul><li class="level-1"><a class="expandible" href="/"><span>Apache PredictionIO® Documentation</span></a><ul><li class="level-2"><a class="final" href="/"><span>Welcome to Apache PredictionIO®</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Started</span></a><ul><li class="level-2"><a class="final" href="/start/"><span>A Quick Intro</span></a></li><li class="level-2"><a class="final" href="/install/"><span>Installing Apache PredictionIO</span></a></li><li class="level-2"><a class="final" href="/start/download/"><span>Downloading an Engine Template</span></a></li><li class="level-2"><a class="final" href="/start/deploy/"><span>Deploying Your First Engine</span></a></li><li class="level-2"><a class="final" href="/start/customize/"><span>Customizing the Engine</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Integrating with Your App</span></a><ul><li class="leve
 l-2"><a class="final" href="/appintegration/"><span>App Integration Overview</span></a></li><li class="level-2"><a class="expandible" href="/sdk/"><span>List of SDKs</span></a><ul><li class="level-3"><a class="final" href="/sdk/java/"><span>Java & Android SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/php/"><span>PHP SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/python/"><span>Python SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/ruby/"><span>Ruby SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/community/"><span>Community Powered SDKs</span></a></li></ul></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Deploying an Engine</span></a><ul><li class="level-2"><a class="final" href="/deploy/"><span>Deploying as a Web Service</span></a></li><li class="level-2"><a class="final" href="/batchpredict/"><span>Batch Predictions</span></a></li><li class="level-2"><a class="final" href="/deploy/
 monitoring/"><span>Monitoring Engine</span></a></li><li class="level-2"><a class="final" href="/deploy/engineparams/"><span>Setting Engine Parameters</span></a></li><li class="level-2"><a class="final" href="/deploy/enginevariants/"><span>Deploying Multiple Engine Variants</span></a></li><li class="level-2"><a class="final" href="/deploy/plugin/"><span>Engine Server Plugin</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Customizing an Engine</span></a><ul><li class="level-2"><a class="final" href="/customize/"><span>Learning DASE</span></a></li><li class="level-2"><a class="final" href="/customize/dase/"><span>Implement DASE</span></a></li><li class="level-2"><a class="final" href="/customize/troubleshooting/"><span>Troubleshooting Engine Development</span></a></li><li class="level-2"><a class="final" href="/api/current/#package"><span>Engine Scala APIs</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Collecting and 
 Analyzing Data</span></a><ul><li class="level-2"><a class="final" href="/datacollection/"><span>Event Server Overview</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventapi/"><span>Collecting Data with REST/SDKs</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventmodel/"><span>Events Modeling</span></a></li><li class="level-2"><a class="final" href="/datacollection/webhooks/"><span>Unifying Multichannel Data with Webhooks</span></a></li><li class="level-2"><a class="final" href="/datacollection/channel/"><span>Channel</span></a></li><li class="level-2"><a class="final" href="/datacollection/batchimport/"><span>Importing Data in Batch</span></a></li><li class="level-2"><a class="final" href="/datacollection/analytics/"><span>Using Analytics Tools</span></a></li><li class="level-2"><a class="final" href="/datacollection/plugin/"><span>Event Server Plugin</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><
 span>Choosing an Algorithm(s)</span></a><ul><li class="level-2"><a class="final" href="/algorithm/"><span>Built-in Algorithm Libraries</span></a></li><li class="level-2"><a class="final" href="/algorithm/switch/"><span>Switching to Another Algorithm</span></a></li><li class="level-2"><a class="final" href="/algorithm/multiple/"><span>Combining Multiple Algorithms</span></a></li><li class="level-2"><a class="final" href="/algorithm/custom/"><span>Adding Your Own Algorithms</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>ML Tuning and Evaluation</span></a><ul><li class="level-2"><a class="final" href="/evaluation/"><span>Overview</span></a></li><li class="level-2"><a class="final" href="/evaluation/paramtuning/"><span>Hyperparameter Tuning</span></a></li><li class="level-2"><a class="final" href="/evaluation/evaluationdashboard/"><span>Evaluation Dashboard</span></a></li><li class="level-2"><a class="final active" href="/evaluation/metricchoose/"><spa
 n>Choosing Evaluation Metrics</span></a></li><li class="level-2"><a class="final" href="/evaluation/metricbuild/"><span>Building Evaluation Metrics</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>System Architecture</span></a><ul><li class="level-2"><a class="final" href="/system/"><span>Architecture Overview</span></a></li><li class="level-2"><a class="final" href="/system/anotherdatastore/"><span>Using Another Data Store</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>PredictionIO® Official Templates</span></a><ul><li class="level-2"><a class="final" href="/templates/"><span>Intro</span></a></li><li class="level-2"><a class="expandible" href="#"><span>Recommendation</span></a><ul><li class="level-3"><a class="final" href="/templates/recommendation/quickstart/"><span>Quick Start</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/dase/"><span>DASE</span></a></li><li class="level-3">
 <a class="final" href="/templates/recommendation/evaluation/"><span>Evaluation Explained</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/how-to/"><span>How-To</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/reading-custom-events/"><span>Read Custom Events</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/customize-data-prep/"><span>Customize Data Preparator</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/customize-serving/"><span>Customize Serving</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/training-with-implicit-preference/"><span>Train with Implicit Preference</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/blacklist-items/"><span>Filter Recommended Items by Blacklist in Query</span></a></li><li class="level-3"><a class="final" href="/templates/recommendation/batch-e
 valuator/"><span>Batch Persistable Evaluator</span></a></li></ul></li><li class="level-2"><a class="expandible" href="#"><span>E-Commerce Recommendation</span></a><ul><li class="level-3"><a class="final" href="/templates/ecommercerecommendation/quickstart/"><span>Quick Start</span></a></li><li class="level-3"><a class="final" href="/templates/ecommercerecommendation/dase/"><span>DASE</span></a></li><li class="level-3"><a class="final" href="/templates/ecommercerecommendation/how-to/"><span>How-To</span></a></li><li class="level-3"><a class="final" href="/templates/ecommercerecommendation/train-with-rate-event/"><span>Train with Rate Event</span></a></li><li class="level-3"><a class="final" href="/templates/ecommercerecommendation/adjust-score/"><span>Adjust Score</span></a></li></ul></li><li class="level-2"><a class="expandible" href="#"><span>Similar Product</span></a><ul><li class="level-3"><a class="final" href="/templates/similarproduct/quickstart/"><span>Quick Start</span></a><
 /li><li class="level-3"><a class="final" href="/templates/similarproduct/dase/"><span>DASE</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/how-to/"><span>How-To</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/multi-events-multi-algos/"><span>Multiple Events and Multiple Algorithms</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/return-item-properties/"><span>Returns Item Properties</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/train-with-rate-event/"><span>Train with Rate Event</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/rid-user-set-event/"><span>Get Rid of Events for Users</span></a></li><li class="level-3"><a class="final" href="/templates/similarproduct/recommended-user/"><span>Recommend Users</span></a></li></ul></li><li class="level-2"><a class="expandible" href="#"><span>Classification</span><
 /a><ul><li class="level-3"><a class="final" href="/templates/classification/quickstart/"><span>Quick Start</span></a></li><li class="level-3"><a class="final" href="/templates/classification/dase/"><span>DASE</span></a></li><li class="level-3"><a class="final" href="/templates/classification/how-to/"><span>How-To</span></a></li><li class="level-3"><a class="final" href="/templates/classification/add-algorithm/"><span>Use Alternative Algorithm</span></a></li><li class="level-3"><a class="final" href="/templates/classification/reading-custom-properties/"><span>Read Custom Properties</span></a></li></ul></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Engine Template Gallery</span></a><ul><li class="level-2"><a class="final" href="/gallery/template-gallery/"><span>Browse</span></a></li><li class="level-2"><a class="final" href="/community/submit-template/"><span>Submit your Engine as a Template</span></a></li></ul></li><li class="level-1"><a class="expandible" hr
 ef="#"><span>Demo Tutorials</span></a><ul><li class="level-2"><a class="final" href="/demo/tapster/"><span>Comics Recommendation Demo</span></a></li><li class="level-2"><a class="final" href="/demo/community/"><span>Community Contributed Demo</span></a></li><li class="level-2"><a class="final" href="/demo/textclassification/"><span>Text Classification Engine Tutorial</span></a></li></ul></li><li class="level-1"><a class="expandible" href="/community/"><span>Getting Involved</span></a><ul><li class="level-2"><a class="final" href="/community/contribute-code/"><span>Contribute Code</span></a></li><li class="level-2"><a class="final" href="/community/contribute-documentation/"><span>Contribute Documentation</span></a></li><li class="level-2"><a class="final" href="/community/contribute-sdk/"><span>Contribute a SDK</span></a></li><li class="level-2"><a class="final" href="/community/contribute-webhook/"><span>Contribute a Webhook</span></a></li><li class="level-2"><a class="final" href=
 "/community/projects/"><span>Community Projects</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Help</span></a><ul><li class="level-2"><a class="final" href="/resources/faq/"><span>FAQs</span></a></li><li class="level-2"><a class="final" href="/support/"><span>Support</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Resources</span></a><ul><li class="level-2"><a class="final" href="/cli/"><span>Command-line Interface</span></a></li><li class="level-2"><a class="final" href="/resources/release/"><span>Release Cadence</span></a></li><li class="level-2"><a class="final" href="/resources/intellij/"><span>Developing Engines with IntelliJ IDEA</span></a></li><li class="level-2"><a class="final" href="/resources/upgrade/"><span>Upgrade Instructions</span></a></li><li class="level-2"><a class="final" href="/resources/glossary/"><span>Glossary</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"
 ><span>Apache Software Foundation</span></a><ul><li class="level-2"><a class="final" href="https://www.apache.org/"><span>Apache Homepage</span></a></li><li class="level-2"><a class="final" href="https://www.apache.org/licenses/"><span>License</span></a></li><li class="level-2"><a class="final" href="https://www.apache.org/foundation/sponsorship.html"><span>Sponsorship</span></a></li><li class="level-2"><a class="final" href="https://www.apache.org/foundation/thanks.html"><span>Thanks</span></a></li><li class="level-2"><a class="final" href="https://www.apache.org/security/"><span>Security</span></a></li></ul></li></ul></nav></div><div class="col-md-9 col-sm-12"><div class="content-header hidden-md hidden-lg"><div id="breadcrumbs" class="hidden-sm hidden xs"><ul><li><a href="#">ML Tuning and Evaluation</a><span class="spacer">&gt;</span></li><li><span class="last">Choosing Evaluation Metrics</span></li></ul></div><div id="page-title"><h1>Choosing Evaluation Metrics</h1></div></div><
 div id="table-of-content-wrapper"><h5>On this page</h5><aside id="table-of-contents"><ul> <li> <a href="#defining-metric">Defining Metric</a> </li> <li> <a href="#common-metrics">Common Metrics</a> </li> </ul> </aside><hr/><a id="edit-page-link" href="https://github.com/apache/predictionio/tree/livedoc/docs/manual/source/evaluation/metricchoose.html.md"><img src="/images/icons/edit-pencil-d6c1bb3d.png"/>Edit this page</a></div><div class="content-header hidden-sm hidden-xs"><div id="breadcrumbs" class="hidden-sm hidden xs"><ul><li><a href="#">ML Tuning and Evaluation</a><span class="spacer">&gt;</span></li><li><span class="last">Choosing Evaluation Metrics</span></li></ul></div><div id="page-title"><h1>Choosing Evaluation Metrics</h1></div></div><div class="content"> <p>The <a href="/evaluation/paramtuning/">hyperparameter tuning module</a> allows us to select the optimal engine parameter defined by a <code>Metric</code>. <code>Metric</code> determines the quality of an engine varia
 nt. We have skimmed through the process of choosing the right <code>Metric</code> in previous sections.</p><p>This section discusses basic evaluation metrics commonly used for classification problems. If you are more interested in knowing how to <em>implement</em> a custom metric, please skip to <a href="/evaluation/metricbuild/">the next section</a>.</p><h2 id='defining-metric' class='header-anchors'>Defining Metric</h2><p>Metric evaluates the quality of an engine by comparing engine&#39;s output (predicted result) with the original label (actual result). A engine serving better prediction should yield a higher metric score, the tuning module returns the engine parameter with the highest score. It is sometimes called <a href="http://en.wikipedia.org/wiki/Loss_function"><em>loss function</em></a> in literature, where the goal is to minimize the loss function.</p><p>During tuning, it is important for us to understand the definition of the metric, to make sure it is aligned with the p
 rediction engine&#39;s goal.</p><p>In the classification template, we use <em>Accuracy</em> as our metric. <em>Accuracy</em> is defined as: the percentage of queries which the engine is able to predict the correct label.</p><h2 id='common-metrics' class='header-anchors'>Common Metrics</h2><p>We illustrate the choice of metric with the following confusion matrix. Row represents the engine predicted label, column represents the actual label. The second row means that of the 200 testing data points, the engine predicted 60 (15 + 35 + 10) of them as label 2.0, among which 35 are correct prediction (i.e. actual label is 2.0, matches with the prediction), and 25 are wrong.</p> <table><thead> <tr> <th style="text-align: center"></th> <th style="text-align: center">Actual = 1.0</th> <th style="text-align: center">Actual = 2.0</th> <th style="text-align: center">Actual = 3.0</th> </tr> </thead><tbody> <tr> <td style="text-align: center"><strong>Predicted = 1.0</strong></td> <td style="text-a
 lign: center">30</td> <td style="text-align: center">0</td> <td style="text-align: center">60</td> </tr> <tr> <td style="text-align: center"><strong>Predicted = 2.0</strong></td> <td style="text-align: center">15</td> <td style="text-align: center">35</td> <td style="text-align: center">10</td> </tr> <tr> <td style="text-align: center"><strong>Predicted = 3.0</strong></td> <td style="text-align: center">0</td> <td style="text-align: center">0</td> <td style="text-align: center">50</td> </tr> </tbody></table> <h3 id='accuracy' class='header-anchors'>Accuracy</h3><p>Accuracy means that how many data points are predicted correctly. It is one of the simplest form of evaluation metrics. The accuracy score is # of correct points / # total = (30 + 35 + 50) / 200 = 0.575.</p><h3 id='precision' class='header-anchors'>Precision</h3><p>Precision is a metric for binary classifier which measures the correctness among all positive labels. A binary classifier gives only two output values (i.e. pos
 itive and negative). For problem where there are multiple values (3 in our example), we first have to transform our problem into a binary classification problem. For example, we can have problem whether label = 1.0. The confusion matrix now becomes:</p> <table><thead> <tr> <th style="text-align: center"></th> <th style="text-align: center">Actual = 1.0</th> <th style="text-align: center">Actual != 1.0</th> </tr> </thead><tbody> <tr> <td style="text-align: center"><strong>Predicted = 1.0</strong></td> <td style="text-align: center">30</td> <td style="text-align: center">60</td> </tr> <tr> <td style="text-align: center"><strong>Predicted != 1.0</strong></td> <td style="text-align: center">15</td> <td style="text-align: center">95</td> </tr> </tbody></table> <p>Precision is the ratio between the number of correct positive answer (true positive) and the sum of correct positive answer (true positive) and wrong but positively labeled answer (false positive). In this case, the precision is
  30 / (30 + 60) = ~0.3333.</p><h3 id='recall' class='header-anchors'>Recall</h3><p>Recall is a metric for binary classifier which measures how many positive labels are successfully predicted amongst all positive labels. Formally, it is the ratio between the number of correct positive answer (true positive) and the sum of correct positive answer (true positive) and wrongly negatively labeled answer (false negative). In this case, the recall is 30 / (30 + 15) = ~0.6667.</p><p>As we have discussed several common metrics for classification problem, we can implement them using the <code>Metric</code> class in <a href="/evaluation/metricbuild">the next section</a>.</p></div></div></div></div><footer><div class="container"><div class="seperator"></div><div class="row"><div class="col-md-6 footer-link-column"><div class="footer-link-column-row"><h4>Community</h4><ul><li><a href="//predictionio.apache.org/install/" target="blank">Download</a></li><li><a href="//predictionio.apache.org/" targ
 et="blank">Docs</a></li><li><a href="//github.com/apache/predictionio" target="blank">GitHub</a></li><li><a href="mailto:user-subscribe@predictionio.apache.org" target="blank">Subscribe to User Mailing List</a></li><li><a href="//stackoverflow.com/questions/tagged/predictionio" target="blank">Stackoverflow</a></li></ul></div></div><div class="col-md-6 footer-link-column"><div class="footer-link-column-row"><h4>Contribute</h4><ul><li><a href="//predictionio.apache.org/community/contribute-code/" target="blank">Contribute</a></li><li><a href="//github.com/apache/predictionio" target="blank">Source Code</a></li><li><a href="//issues.apache.org/jira/browse/PIO" target="blank">Bug Tracker</a></li><li><a href="mailto:dev-subscribe@predictionio.apache.org" target="blank">Subscribe to Development Mailing List</a></li></ul></div></div></div><div class="row"><div class="col-md-12 footer-link-column"><p>Apache PredictionIO, PredictionIO, Apache, the Apache feather logo, and the Apache Predicti
 onIO project logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</p><p>All other marks mentioned may be trademarks or registered trademarks of their respective owners.</p></div></div></div><div id="footer-bottom"><div class="container"><div class="row"><div class="col-md-12"><div id="footer-logo-wrapper"><img alt="PredictionIO" src="/images/logos/logo-white-d1e9c6e6.png"/><span>®</span></div><div id="social-icons-wrapper"><a class="github-button" href="https://github.com/apache/predictionio" data-style="mega" data-count-href="/apache/predictionio/stargazers" data-count-api="/repos/apache/predictionio#stargazers_count" data-count-aria-label="# stargazers on GitHub" aria-label="Star apache/predictionio on GitHub">Star</a> <a class="github-button" href="https://github.com/apache/predictionio/fork" data-icon="octicon-git-branch" data-style="mega" data-count-href="/apache/predictionio/network" data-count-api="/r
 epos/apache/predictionio#forks_count" data-count-aria-label="# forks on GitHub" aria-label="Fork apache/predictionio on GitHub">Fork</a> <script id="github-bjs" async="" defer="" src="https://buttons.github.io/buttons.js"></script><a href="https://twitter.com/predictionio" target="blank"><img alt="PredictionIO on Twitter" src="/images/icons/twitter-ea9dc152.png"/></a> <a href="https://www.facebook.com/predictionio" target="blank"><img alt="PredictionIO on Facebook" src="/images/icons/facebook-5c57939c.png"/></a> </div></div></div></div></div></footer></div><script>(function(w,d,t,u,n,s,e){w['SwiftypeObject']=n;w[n]=w[n]||function(){
 (w[n].q=w[n].q||[]).push(arguments);};s=d.createElement(t);
 e=d.getElementsByTagName(t)[0];s.async=1;s.src=u;e.parentNode.insertBefore(s,e);
 })(window,document,'script','//s.swiftypecdn.com/install/v1/st.js','_st');

http://git-wip-us.apache.org/repos/asf/predictionio-site/blob/c17b9607/evaluation/paramtuning/index.html
----------------------------------------------------------------------
diff --git a/evaluation/paramtuning/index.html b/evaluation/paramtuning/index.html
index 0a99ab6..f7c21ec 100644
--- a/evaluation/paramtuning/index.html
+++ b/evaluation/paramtuning/index.html
@@ -240,7 +240,7 @@ The best variant params can be found <span class="k">in </span>best.json
     <span class="o">}</span>
   <span class="o">}</span>
 <span class="o">}</span>
-</pre></td></tr></tbody></table> </div> <p>The <code>readEval</code> method returns a sequence of (<code>TrainingData</code>, <code>EvaluationInfo</code>, <code>RDD[(Query, ActualResult)]</code>. <code>TrainingData</code> is the same class we use for deploy, <code>RDD[(Query, ActualResult)]</code> is the validation set, <code>EvaluationInfo</code> can be used to hold some global evaluation data ; it is not used in the current example.</p><p>Lines 11 to 41 is the logic of reading and transforming data from the datastore; it is equvialent to the existing <code>readTraining</code> method. After line 41, the variable <code>labeledPoints</code> contains the complete dataset with which we use to generate the (training, validation) sequence.</p><p>Lines 43 to 57 is the <em>k-fold</em> logic. Line 45 gives each data point a unique id, and we decide whether the point belongs to the training or validation set depends on the <em>mod</em> of the id (lines 48 to 49). For each point in the valida
 tion set, we construct the <code>Query</code> and <code>ActualResult</code> (line 55) which is used validate the engine.</p><h2 id='evaluation-metrics' class='header-anchors'>Evaluation Metrics</h2><p>We define a <code>Metric</code> which gives a <em>score</em> to engine params. The higher the score, the better the engine params are. In this template, we use accuray score which measures the portion of correct prediction among all data points.</p><p>In MyClassification/src/main/scala/<strong>Evaluation.scala</strong>, the class <code>Accuracy</code> implements the <em>accuracy</em> score. It extends a base helper class <code>AverageMetric</code> which calculates the average score overall <em>(Query, PredictionResult, ActualResult)</em> tuple.</p><div class="highlight scala"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
+</pre></td></tr></tbody></table> </div> <p>The <code>readEval</code> method returns a sequence of (<code>TrainingData</code>, <code>EvaluationInfo</code>, <code>RDD[(Query, ActualResult)]</code>. <code>TrainingData</code> is the same class we use for deploy, <code>RDD[(Query, ActualResult)]</code> is the validation set, <code>EvaluationInfo</code> can be used to hold some global evaluation data ; it is not used in the current example.</p><p>Lines 11 to 41 is the logic of reading and transforming data from the datastore; it is equvialent to the existing <code>readTraining</code> method. After line 41, the variable <code>labeledPoints</code> contains the complete dataset with which we use to generate the (training, validation) sequence.</p><p>Lines 43 to 57 is the <em>k-fold</em> logic. Line 45 gives each data point a unique id, and we decide whether the point belongs to the training or validation set depends on the <em>mod</em> of the id (lines 48 to 49). For each point in the valida
 tion set, we construct the <code>Query</code> and <code>ActualResult</code> (line 55) which is used validate the engine.</p><h2 id='evaluation-metrics' class='header-anchors'>Evaluation Metrics</h2><p>We define a <code>Metric</code> which gives a <em>score</em> to engine params. The higher the score, the better the engine params are. In this template, we use accuracy score which measures the portion of correct prediction among all data points.</p><p>In MyClassification/src/main/scala/<strong>Evaluation.scala</strong>, the class <code>Accuracy</code> implements the <em>accuracy</em> score. It extends a base helper class <code>AverageMetric</code> which calculates the average score overall <em>(Query, PredictionResult, ActualResult)</em> tuple.</p><div class="highlight scala"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
 2
 3
 4
@@ -287,7 +287,7 @@ The best variant params can be found <span class="k">in </span>best.json
     <span class="n">baseEP</span><span class="o">.</span><span class="n">copy</span><span class="o">(</span><span class="n">algorithmParamsList</span> <span class="k">=</span> <span class="nc">Seq</span><span class="o">((</span><span class="s">"naive"</span><span class="o">,</span> <span class="nc">AlgorithmParams</span><span class="o">(</span><span class="mf">100.0</span><span class="o">)))),</span>
     <span class="n">baseEP</span><span class="o">.</span><span class="n">copy</span><span class="o">(</span><span class="n">algorithmParamsList</span> <span class="k">=</span> <span class="nc">Seq</span><span class="o">((</span><span class="s">"naive"</span><span class="o">,</span> <span class="nc">AlgorithmParams</span><span class="o">(</span><span class="mf">1000.0</span><span class="o">)))))</span>
 <span class="o">}</span>
-</pre></td></tr></tbody></table> </div> <p>A good practise is to first define a base engine params, it contains the common parameters used in all evaluations (lines 7 to 8). With the base params, we construct the list of engine params we want to evaluation by adding or replacing the controller parameter. Lines 13 to 16 generate 3 engine parameters, each has a different smoothing parameters.</p><h2 id='running-the-evaluation' class='header-anchors'>Running the Evaluation</h2><p>It remains to run the evaluation. Let&#39;s recap the quick start section above. The <code>pio eval</code> command kick starts the evaluation, and the result can be seen from the console.</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
+</pre></td></tr></tbody></table> </div> <p>A good practice is to first define a base engine params, it contains the common parameters used in all evaluations (lines 7 to 8). With the base params, we construct the list of engine params we want to evaluation by adding or replacing the controller parameter. Lines 13 to 16 generate 3 engine parameters, each has a different smoothing parameters.</p><h2 id='running-the-evaluation' class='header-anchors'>Running the Evaluation</h2><p>It remains to run the evaluation. Let&#39;s recap the quick start section above. The <code>pio eval</code> command kick starts the evaluation, and the result can be seen from the console.</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
 2
 3</pre></td><td class="code"><pre><span class="gp">$ </span>pio build
 ...
@@ -381,7 +381,7 @@ Metrics:
   org.template.classification.Accuracy: 0.9281045751633987
 The best variant params can be found <span class="k">in </span>best.json
 <span class="o">[</span>INFO] <span class="o">[</span>CoreWorkflow<span class="nv">$]</span> runEvaluation completed
-</pre></td></tr></tbody></table> </div> <h2 id='notes' class='header-anchors'>Notes</h2> <ul> <li>We deliberately not metion <strong><em>test set</em></strong> in this hyperparameter tuning guide. In machine learning literature, the <strong><em>test set</em></strong> is a separate piece of data which is used to evaluate the final engine params outputted by the evaluation process. This guarantees that no information in the training / validation set is <em>leaked</em> into the engine params and yields a biased outcome. With PredictionIO, there are multiple ways of conducting robust tuning, we will cover this topic in the coming sections.</li> </ul> </div></div></div></div><footer><div class="container"><div class="seperator"></div><div class="row"><div class="col-md-6 footer-link-column"><div class="footer-link-column-row"><h4>Community</h4><ul><li><a href="//predictionio.apache.org/install/" target="blank">Download</a></li><li><a href="//predictionio.apache.org/" target="blank">Docs<
 /a></li><li><a href="//github.com/apache/predictionio" target="blank">GitHub</a></li><li><a href="mailto:user-subscribe@predictionio.apache.org" target="blank">Subscribe to User Mailing List</a></li><li><a href="//stackoverflow.com/questions/tagged/predictionio" target="blank">Stackoverflow</a></li></ul></div></div><div class="col-md-6 footer-link-column"><div class="footer-link-column-row"><h4>Contribute</h4><ul><li><a href="//predictionio.apache.org/community/contribute-code/" target="blank">Contribute</a></li><li><a href="//github.com/apache/predictionio" target="blank">Source Code</a></li><li><a href="//issues.apache.org/jira/browse/PIO" target="blank">Bug Tracker</a></li><li><a href="mailto:dev-subscribe@predictionio.apache.org" target="blank">Subscribe to Development Mailing List</a></li></ul></div></div></div><div class="row"><div class="col-md-12 footer-link-column"><p>Apache PredictionIO, PredictionIO, Apache, the Apache feather logo, and the Apache PredictionIO project log
 o are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</p><p>All other marks mentioned may be trademarks or registered trademarks of their respective owners.</p></div></div></div><div id="footer-bottom"><div class="container"><div class="row"><div class="col-md-12"><div id="footer-logo-wrapper"><img alt="PredictionIO" src="/images/logos/logo-white-d1e9c6e6.png"/><span>®</span></div><div id="social-icons-wrapper"><a class="github-button" href="https://github.com/apache/predictionio" data-style="mega" data-count-href="/apache/predictionio/stargazers" data-count-api="/repos/apache/predictionio#stargazers_count" data-count-aria-label="# stargazers on GitHub" aria-label="Star apache/predictionio on GitHub">Star</a> <a class="github-button" href="https://github.com/apache/predictionio/fork" data-icon="octicon-git-branch" data-style="mega" data-count-href="/apache/predictionio/network" data-count-api="/repos/apache/pred
 ictionio#forks_count" data-count-aria-label="# forks on GitHub" aria-label="Fork apache/predictionio on GitHub">Fork</a> <script id="github-bjs" async="" defer="" src="https://buttons.github.io/buttons.js"></script><a href="https://twitter.com/predictionio" target="blank"><img alt="PredictionIO on Twitter" src="/images/icons/twitter-ea9dc152.png"/></a> <a href="https://www.facebook.com/predictionio" target="blank"><img alt="PredictionIO on Facebook" src="/images/icons/facebook-5c57939c.png"/></a> </div></div></div></div></div></footer></div><script>(function(w,d,t,u,n,s,e){w['SwiftypeObject']=n;w[n]=w[n]||function(){
+</pre></td></tr></tbody></table> </div> <h2 id='notes' class='header-anchors'>Notes</h2> <ul> <li>We deliberately not mention <strong><em>test set</em></strong> in this hyperparameter tuning guide. In machine learning literature, the <strong><em>test set</em></strong> is a separate piece of data which is used to evaluate the final engine params outputted by the evaluation process. This guarantees that no information in the training / validation set is <em>leaked</em> into the engine params and yields a biased outcome. With PredictionIO, there are multiple ways of conducting robust tuning, we will cover this topic in the coming sections.</li> </ul> </div></div></div></div><footer><div class="container"><div class="seperator"></div><div class="row"><div class="col-md-6 footer-link-column"><div class="footer-link-column-row"><h4>Community</h4><ul><li><a href="//predictionio.apache.org/install/" target="blank">Download</a></li><li><a href="//predictionio.apache.org/" target="blank">Docs
 </a></li><li><a href="//github.com/apache/predictionio" target="blank">GitHub</a></li><li><a href="mailto:user-subscribe@predictionio.apache.org" target="blank">Subscribe to User Mailing List</a></li><li><a href="//stackoverflow.com/questions/tagged/predictionio" target="blank">Stackoverflow</a></li></ul></div></div><div class="col-md-6 footer-link-column"><div class="footer-link-column-row"><h4>Contribute</h4><ul><li><a href="//predictionio.apache.org/community/contribute-code/" target="blank">Contribute</a></li><li><a href="//github.com/apache/predictionio" target="blank">Source Code</a></li><li><a href="//issues.apache.org/jira/browse/PIO" target="blank">Bug Tracker</a></li><li><a href="mailto:dev-subscribe@predictionio.apache.org" target="blank">Subscribe to Development Mailing List</a></li></ul></div></div></div><div class="row"><div class="col-md-12 footer-link-column"><p>Apache PredictionIO, PredictionIO, Apache, the Apache feather logo, and the Apache PredictionIO project lo
 go are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</p><p>All other marks mentioned may be trademarks or registered trademarks of their respective owners.</p></div></div></div><div id="footer-bottom"><div class="container"><div class="row"><div class="col-md-12"><div id="footer-logo-wrapper"><img alt="PredictionIO" src="/images/logos/logo-white-d1e9c6e6.png"/><span>®</span></div><div id="social-icons-wrapper"><a class="github-button" href="https://github.com/apache/predictionio" data-style="mega" data-count-href="/apache/predictionio/stargazers" data-count-api="/repos/apache/predictionio#stargazers_count" data-count-aria-label="# stargazers on GitHub" aria-label="Star apache/predictionio on GitHub">Star</a> <a class="github-button" href="https://github.com/apache/predictionio/fork" data-icon="octicon-git-branch" data-style="mega" data-count-href="/apache/predictionio/network" data-count-api="/repos/apache/pre
 dictionio#forks_count" data-count-aria-label="# forks on GitHub" aria-label="Fork apache/predictionio on GitHub">Fork</a> <script id="github-bjs" async="" defer="" src="https://buttons.github.io/buttons.js"></script><a href="https://twitter.com/predictionio" target="blank"><img alt="PredictionIO on Twitter" src="/images/icons/twitter-ea9dc152.png"/></a> <a href="https://www.facebook.com/predictionio" target="blank"><img alt="PredictionIO on Facebook" src="/images/icons/facebook-5c57939c.png"/></a> </div></div></div></div></div></footer></div><script>(function(w,d,t,u,n,s,e){w['SwiftypeObject']=n;w[n]=w[n]||function(){
 (w[n].q=w[n].q||[]).push(arguments);};s=d.createElement(t);
 e=d.getElementsByTagName(t)[0];s.async=1;s.src=u;e.parentNode.insertBefore(s,e);
 })(window,document,'script','//s.swiftypecdn.com/install/v1/st.js','_st');