You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hivemall.apache.org by my...@apache.org on 2018/12/26 10:22:59 UTC
[11/33] incubator-hivemall-site git commit: Update tutorial for
general classifier/regressor
http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/d9012d92/userguide/regression/e2006_arow.html
----------------------------------------------------------------------
diff --git a/userguide/regression/e2006_arow.html b/userguide/regression/e2006_arow.html
index c95408e..473cbce 100644
--- a/userguide/regression/e2006_arow.html
+++ b/userguide/regression/e2006_arow.html
@@ -100,7 +100,7 @@
<link rel="next" href="kddcup12tr2.html" />
- <link rel="prev" href="e2006_dataset.html" />
+ <link rel="prev" href="e2006_generic.html" />
</head>
@@ -972,7 +972,7 @@
<b>6.2.1.</b>
- Data preparation
+ Data Preparation
</a>
@@ -980,13 +980,28 @@
</li>
- <li class="chapter " data-level="6.2.2" data-path="../binaryclass/a9a_lr.html">
+ <li class="chapter " data-level="6.2.2" data-path="../binaryclass/a9a_generic.html">
- <a href="../binaryclass/a9a_lr.html">
+ <a href="../binaryclass/a9a_generic.html">
<b>6.2.2.</b>
+ General Binary Classifier
+
+ </a>
+
+
+
+ </li>
+
+ <li class="chapter " data-level="6.2.3" data-path="../binaryclass/a9a_lr.html">
+
+ <a href="../binaryclass/a9a_lr.html">
+
+
+ <b>6.2.3.</b>
+
Logistic Regression
</a>
@@ -995,14 +1010,14 @@
</li>
- <li class="chapter " data-level="6.2.3" data-path="../binaryclass/a9a_minibatch.html">
+ <li class="chapter " data-level="6.2.4" data-path="../binaryclass/a9a_minibatch.html">
<a href="../binaryclass/a9a_minibatch.html">
- <b>6.2.3.</b>
+ <b>6.2.4.</b>
- Mini-batch gradient descent
+ Mini-batch Gradient Descent
</a>
@@ -1038,7 +1053,7 @@
<b>6.3.1.</b>
- Data preparation
+ Data Preparation
</a>
@@ -1076,13 +1091,28 @@
</li>
- <li class="chapter " data-level="6.3.4" data-path="../binaryclass/news20_adagrad.html">
+ <li class="chapter " data-level="6.3.4" data-path="../binaryclass/news20_generic.html">
- <a href="../binaryclass/news20_adagrad.html">
+ <a href="../binaryclass/news20_generic.html">
<b>6.3.4.</b>
+ General Binary Classifier
+
+ </a>
+
+
+
+ </li>
+
+ <li class="chapter " data-level="6.3.5" data-path="../binaryclass/news20_adagrad.html">
+
+ <a href="../binaryclass/news20_adagrad.html">
+
+
+ <b>6.3.5.</b>
+
AdaGradRDA, AdaGrad, AdaDelta
</a>
@@ -1091,12 +1121,12 @@
</li>
- <li class="chapter " data-level="6.3.5" data-path="../binaryclass/news20_rf.html">
+ <li class="chapter " data-level="6.3.6" data-path="../binaryclass/news20_rf.html">
<a href="../binaryclass/news20_rf.html">
- <b>6.3.5.</b>
+ <b>6.3.6.</b>
Random Forest
@@ -1134,7 +1164,7 @@
<b>6.4.1.</b>
- Data preparation
+ Data Preparation
</a>
@@ -1185,7 +1215,7 @@
<b>6.5.1.</b>
- Data preparation
+ Data Preparation
</a>
@@ -1236,7 +1266,7 @@
<b>6.6.1.</b>
- Data pareparation
+ Data Pareparation
</a>
@@ -1302,7 +1332,7 @@
<b>6.8.1.</b>
- Data preparation
+ Data Preparation
</a>
@@ -1360,7 +1390,7 @@
<b>7.1.1.</b>
- Data preparation
+ Data Preparation
</a>
@@ -1375,7 +1405,7 @@
<b>7.1.2.</b>
- Data preparation for one-vs-the-rest classifiers
+ Data Preparation for one-vs-the-rest classifiers
</a>
@@ -1435,7 +1465,7 @@
<b>7.1.6.</b>
- one-vs-the-rest classifier
+ one-vs-the-rest Classifier
</a>
@@ -1559,7 +1589,7 @@
<b>8.2.1.</b>
- Data preparation
+ Data Preparation
</a>
@@ -1567,13 +1597,28 @@
</li>
- <li class="chapter active" data-level="8.2.2" data-path="e2006_arow.html">
+ <li class="chapter " data-level="8.2.2" data-path="e2006_generic.html">
- <a href="e2006_arow.html">
+ <a href="e2006_generic.html">
<b>8.2.2.</b>
+ General Regessor
+
+ </a>
+
+
+
+ </li>
+
+ <li class="chapter active" data-level="8.2.3" data-path="e2006_arow.html">
+
+ <a href="e2006_arow.html">
+
+
+ <b>8.2.3.</b>
+
Passive Aggressive, AROW
</a>
@@ -1610,7 +1655,7 @@
<b>8.3.1.</b>
- Data preparation
+ Data Preparation
</a>
@@ -1698,7 +1743,7 @@
<b>9.1.1.</b>
- Item-based collaborative filtering
+ Item-based Collaborative Filtering
</a>
@@ -1734,7 +1779,7 @@
<b>9.2.1.</b>
- Data preparation
+ Data Preparation
</a>
@@ -1749,7 +1794,7 @@
<b>9.2.2.</b>
- LSH/MinHash and Jaccard similarity
+ LSH/MinHash and Jaccard Similarity
</a>
@@ -1764,7 +1809,7 @@
<b>9.2.3.</b>
- LSH/MinHash and brute-force search
+ LSH/MinHash and Brute-force Search
</a>
@@ -1815,7 +1860,7 @@
<b>9.3.1.</b>
- Data preparation
+ Data Preparation
</a>
@@ -1830,7 +1875,7 @@
<b>9.3.2.</b>
- Item-based collaborative filtering
+ Item-based Collaborative Filtering
</a>
@@ -1875,7 +1920,7 @@
<b>9.3.5.</b>
- SLIM for fast top-k recommendation
+ SLIM for fast top-k Recommendation
</a>
@@ -1890,7 +1935,7 @@
<b>9.3.6.</b>
- 10-fold cross validation (Matrix Factorization)
+ 10-fold Cross Validation (Matrix Factorization)
</a>
@@ -2080,7 +2125,7 @@
<b>13.2.1.</b>
- a9a tutorial for DataFrame
+ a9a Tutorial for DataFrame
</a>
@@ -2095,7 +2140,7 @@
<b>13.2.2.</b>
- a9a tutorial for SQL
+ a9a Tutorial for SQL
</a>
@@ -2131,7 +2176,7 @@
<b>13.3.1.</b>
- E2006-tfidf regression tutorial for DataFrame
+ E2006-tfidf Regression Tutorial for DataFrame
</a>
@@ -2146,7 +2191,7 @@
<b>13.3.2.</b>
- E2006-tfidf regression tutorial for SQL
+ E2006-tfidf Regression Tutorial for SQL
</a>
@@ -2166,7 +2211,7 @@
<b>13.4.</b>
- Generic features
+ Generic Features
</a>
@@ -2182,7 +2227,7 @@
<b>13.4.1.</b>
- Top-k join processing
+ Top-k Join Processing
</a>
@@ -2197,7 +2242,7 @@
<b>13.4.2.</b>
- Other utility functions
+ Other Utility Functions
</a>
@@ -2317,11 +2362,40 @@
specific language governing permissions and limitations
under the License.
-->
-<p><a href="https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/regression.html#E2006-tfidf" target="_blank">https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/regression.html#E2006-tfidf</a></p>
-<hr>
-<h1 id="pa1a">[PA1a]</h1>
+<!-- toc --><div id="toc" class="toc">
+
+<ul>
+<li><a href="#pa1a">PA1a</a><ul>
+<li><a href="#training">Training</a></li>
+<li><a href="#prediction">prediction</a></li>
+<li><a href="#evaluation">evaluation</a></li>
+</ul>
+</li>
+<li><a href="#pa2a">PA2a</a><ul>
+<li><a href="#training-1">Training</a></li>
+<li><a href="#prediction-1">prediction</a></li>
+<li><a href="#evaluation-1">evaluation</a></li>
+</ul>
+</li>
+<li><a href="#arow">AROW</a><ul>
+<li><a href="#training-2">Training</a></li>
+<li><a href="#prediction-2">prediction</a></li>
+<li><a href="#evaluation-2">evaluation</a></li>
+</ul>
+</li>
+<li><a href="#arowe">AROWe</a><ul>
+<li><a href="#training-3">Training</a></li>
+<li><a href="#prediction-3">prediction</a></li>
+<li><a href="#evaluation-3">evaluation</a></li>
+</ul>
+</li>
+</ul>
+
+</div><!-- tocstop -->
+<h1 id="pa1a">PA1a</h1>
<h2 id="training">Training</h2>
<pre><code class="lang-sql"><span class="hljs-keyword">set</span> mapred.reduce.tasks=<span class="hljs-number">64</span>;
+
<span class="hljs-keyword">drop</span> <span class="hljs-keyword">table</span> e2006tfidf_pa1a_model ;
<span class="hljs-keyword">create</span> <span class="hljs-keyword">table</span> e2006tfidf_pa1a_model <span class="hljs-keyword">as</span>
<span class="hljs-keyword">select</span>
@@ -2334,9 +2408,11 @@
e2006tfidf_train_x3
) t
<span class="hljs-keyword">group</span> <span class="hljs-keyword">by</span> feature;
+
+<span class="hljs-comment">-- reset to the default setting</span>
<span class="hljs-keyword">set</span> mapred.reduce.tasks=<span class="hljs-number">-1</span>;
</code></pre>
-<p><em>Caution: Do not use voted_avg() for regression. voted_avg() is for classification.</em></p>
+<div class="panel panel-warning"><div class="panel-heading"><h3 class="panel-title" id="caution"><i class="fa fa-exclamation-triangle"></i> Caution</h3></div><div class="panel-body"><p>Do not use <code>voted_avg()</code> for regression. <code>voted_avg()</code> is for classification.</p></div></div>
<h2 id="prediction">prediction</h2>
<pre><code class="lang-sql"><span class="hljs-keyword">create</span> <span class="hljs-keyword">or</span> <span class="hljs-keyword">replace</span> <span class="hljs-keyword">view</span> e2006tfidf_pa1a_predict
<span class="hljs-keyword">as</span>
@@ -2350,35 +2426,42 @@
t.<span class="hljs-keyword">rowid</span>;
</code></pre>
<h2 id="evaluation">evaluation</h2>
-<pre><code class="lang-sql"><span class="hljs-keyword">drop</span> <span class="hljs-keyword">table</span> e2006tfidf_pa1a_submit;
-<span class="hljs-keyword">create</span> <span class="hljs-keyword">table</span> e2006tfidf_pa1a_submit <span class="hljs-keyword">as</span>
-<span class="hljs-keyword">select</span>
- t.target <span class="hljs-keyword">as</span> actual,
- p.predicted <span class="hljs-keyword">as</span> predicted
-<span class="hljs-keyword">from</span>
- e2006tfidf_test t <span class="hljs-keyword">JOIN</span> e2006tfidf_pa1a_predict p
- <span class="hljs-keyword">on</span> (t.<span class="hljs-keyword">rowid</span> = p.<span class="hljs-keyword">rowid</span>);
-
-<span class="hljs-keyword">select</span> <span class="hljs-keyword">avg</span>(actual), <span class="hljs-keyword">avg</span>(predicted) <span class="hljs-keyword">from</span> e2006tfidf_pa1a_submit;
-</code></pre>
-<blockquote>
-<p>-3.8200363760415414 -3.8869923258589476</p>
-</blockquote>
-<pre><code class="lang-sql"><span class="hljs-keyword">set</span> hivevar:mean_actual=<span class="hljs-number">-3.8200363760415414</span>;
-
+<pre><code class="lang-sql">WITH submit as (
+ <span class="hljs-keyword">select</span>
+ t.target <span class="hljs-keyword">as</span> actual,
+ p.predicted <span class="hljs-keyword">as</span> predicted
+ <span class="hljs-keyword">from</span>
+ e2006tfidf_test t
+ <span class="hljs-keyword">JOIN</span> e2006tfidf_pa1a_predict p
+ <span class="hljs-keyword">on</span> (t.<span class="hljs-keyword">rowid</span> = p.<span class="hljs-keyword">rowid</span>)
+)
<span class="hljs-keyword">select</span>
- <span class="hljs-keyword">sqrt</span>(<span class="hljs-keyword">sum</span>(<span class="hljs-keyword">pow</span>(predicted - actual,<span class="hljs-number">2.0</span>))/<span class="hljs-keyword">count</span>(<span class="hljs-number">1</span>)) <span class="hljs-keyword">as</span> RMSE,
- <span class="hljs-keyword">sum</span>(<span class="hljs-keyword">pow</span>(predicted - actual,<span class="hljs-number">2.0</span>))/<span class="hljs-keyword">count</span>(<span class="hljs-number">1</span>) <span class="hljs-keyword">as</span> MSE,
- <span class="hljs-keyword">sum</span>(<span class="hljs-keyword">abs</span>(predicted - actual))/<span class="hljs-keyword">count</span>(<span class="hljs-number">1</span>) <span class="hljs-keyword">as</span> MAE,
- <span class="hljs-number">1</span> - <span class="hljs-keyword">sum</span>(<span class="hljs-keyword">pow</span>(actual - predicted,<span class="hljs-number">2.0</span>)) / <span class="hljs-keyword">sum</span>(<span class="hljs-keyword">pow</span>(actual - ${mean_actual},<span class="hljs-number">2.0</span>)) <span class="hljs-keyword">as</span> R2
+ rmse(predicted, actual) <span class="hljs-keyword">as</span> RMSE,
+ mse(predicted, actual) <span class="hljs-keyword">as</span> MSE,
+ mae(predicted, actual) <span class="hljs-keyword">as</span> MAE,
+ r2(predicted, actual) <span class="hljs-keyword">as</span> R2
<span class="hljs-keyword">from</span>
- e2006tfidf_pa1a_submit;
+ submit;
</code></pre>
-<blockquote>
-<p>0.3797959864675519 0.14424499133686086 0.23846059576113587 0.5010367946980386</p>
-</blockquote>
-<hr>
-<h1 id="pa2a">[PA2a]</h1>
+<table>
+<thead>
+<tr>
+<th style="text-align:center">rmse</th>
+<th style="text-align:center">mse</th>
+<th style="text-align:center">mae</th>
+<th style="text-align:center">r2</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td style="text-align:center">0.3797959864675519</td>
+<td style="text-align:center">0.14424499133686086</td>
+<td style="text-align:center">0.23846059576113587</td>
+<td style="text-align:center">0.5010367946980386</td>
+</tr>
+</tbody>
+</table>
+<h1 id="pa2a">PA2a</h1>
<h2 id="training">Training</h2>
<pre><code class="lang-sql"><span class="hljs-keyword">set</span> mapred.reduce.tasks=<span class="hljs-number">64</span>;
<span class="hljs-keyword">drop</span> <span class="hljs-keyword">table</span> e2006tfidf_pa2a_model;
@@ -2408,35 +2491,42 @@
t.<span class="hljs-keyword">rowid</span>;
</code></pre>
<h2 id="evaluation">evaluation</h2>
-<pre><code class="lang-sql"><span class="hljs-keyword">drop</span> <span class="hljs-keyword">table</span> e2006tfidf_pa2a_submit;
-<span class="hljs-keyword">create</span> <span class="hljs-keyword">table</span> e2006tfidf_pa2a_submit <span class="hljs-keyword">as</span>
-<span class="hljs-keyword">select</span>
- t.target <span class="hljs-keyword">as</span> actual,
- pd.predicted <span class="hljs-keyword">as</span> predicted
-<span class="hljs-keyword">from</span>
- e2006tfidf_test t <span class="hljs-keyword">JOIN</span> e2006tfidf_pa2a_predict pd
- <span class="hljs-keyword">on</span> (t.<span class="hljs-keyword">rowid</span> = pd.<span class="hljs-keyword">rowid</span>);
-
-<span class="hljs-keyword">select</span> <span class="hljs-keyword">avg</span>(actual), <span class="hljs-keyword">avg</span>(predicted) <span class="hljs-keyword">from</span> e2006tfidf_pa2a_submit;
-</code></pre>
-<blockquote>
-<p>-3.8200363760415414 -3.9124877451612488</p>
-</blockquote>
-<pre><code class="lang-sql"><span class="hljs-keyword">set</span> hivevar:mean_actual=<span class="hljs-number">-3.8200363760415414</span>;
-
+<pre><code class="lang-sql">WITH submit as (
+ <span class="hljs-keyword">select</span>
+ t.target <span class="hljs-keyword">as</span> actual,
+ p.predicted <span class="hljs-keyword">as</span> predicted
+ <span class="hljs-keyword">from</span>
+ e2006tfidf_test t
+ <span class="hljs-keyword">JOIN</span> e2006tfidf_pa2a_predict p
+ <span class="hljs-keyword">on</span> (t.<span class="hljs-keyword">rowid</span> = p.<span class="hljs-keyword">rowid</span>)
+)
<span class="hljs-keyword">select</span>
- <span class="hljs-keyword">sqrt</span>(<span class="hljs-keyword">sum</span>(<span class="hljs-keyword">pow</span>(predicted - actual,<span class="hljs-number">2.0</span>))/<span class="hljs-keyword">count</span>(<span class="hljs-number">1</span>)) <span class="hljs-keyword">as</span> RMSE,
- <span class="hljs-keyword">sum</span>(<span class="hljs-keyword">pow</span>(predicted - actual,<span class="hljs-number">2.0</span>))/<span class="hljs-keyword">count</span>(<span class="hljs-number">1</span>) <span class="hljs-keyword">as</span> MSE,
- <span class="hljs-keyword">sum</span>(<span class="hljs-keyword">abs</span>(predicted - actual))/<span class="hljs-keyword">count</span>(<span class="hljs-number">1</span>) <span class="hljs-keyword">as</span> MAE,
- <span class="hljs-number">1</span> - <span class="hljs-keyword">sum</span>(<span class="hljs-keyword">pow</span>(actual - predicted,<span class="hljs-number">2.0</span>)) / <span class="hljs-keyword">sum</span>(<span class="hljs-keyword">pow</span>(actual - ${mean_actual},<span class="hljs-number">2.0</span>)) <span class="hljs-keyword">as</span> R2
+ rmse(predicted, actual) <span class="hljs-keyword">as</span> RMSE,
+ mse(predicted, actual) <span class="hljs-keyword">as</span> MSE,
+ mae(predicted, actual) <span class="hljs-keyword">as</span> MAE,
+ r2(predicted, actual) <span class="hljs-keyword">as</span> R2
<span class="hljs-keyword">from</span>
- e2006tfidf_pa2a_submit;
+ submit;
</code></pre>
-<blockquote>
-<p>0.38538660838804495 0.14852283792484033 0.2466732002711477 0.48623913673053565</p>
-</blockquote>
-<hr>
-<h1 id="arow">[AROW]</h1>
+<table>
+<thead>
+<tr>
+<th style="text-align:center">rmse</th>
+<th style="text-align:center">mse</th>
+<th style="text-align:center">mae</th>
+<th style="text-align:center">r2</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td style="text-align:center">0.38538660838804495</td>
+<td style="text-align:center">0.14852283792484033</td>
+<td style="text-align:center">0.2466732002711477</td>
+<td style="text-align:center">0.48623913673053565</td>
+</tr>
+</tbody>
+</table>
+<h1 id="arow">AROW</h1>
<h2 id="training">Training</h2>
<pre><code class="lang-sql"><span class="hljs-keyword">set</span> mapred.reduce.tasks=<span class="hljs-number">64</span>;
<span class="hljs-keyword">drop</span> <span class="hljs-keyword">table</span> e2006tfidf_arow_model ;
@@ -2468,35 +2558,42 @@
t.<span class="hljs-keyword">rowid</span>;
</code></pre>
<h2 id="evaluation">evaluation</h2>
-<pre><code class="lang-sql"><span class="hljs-keyword">drop</span> <span class="hljs-keyword">table</span> e2006tfidf_arow_submit;
-<span class="hljs-keyword">create</span> <span class="hljs-keyword">table</span> e2006tfidf_arow_submit <span class="hljs-keyword">as</span>
-<span class="hljs-keyword">select</span>
- t.target <span class="hljs-keyword">as</span> actual,
- p.predicted <span class="hljs-keyword">as</span> predicted
-<span class="hljs-keyword">from</span>
- e2006tfidf_test t <span class="hljs-keyword">JOIN</span> e2006tfidf_arow_predict p
- <span class="hljs-keyword">on</span> (t.<span class="hljs-keyword">rowid</span> = p.<span class="hljs-keyword">rowid</span>);
-
-<span class="hljs-keyword">select</span> <span class="hljs-keyword">avg</span>(actual), <span class="hljs-keyword">avg</span>(predicted) <span class="hljs-keyword">from</span> e2006tfidf_arow_submit;
-</code></pre>
-<blockquote>
-<p>-3.8200363760415414 -3.8692518911517433</p>
-</blockquote>
-<pre><code class="lang-sql"><span class="hljs-keyword">set</span> hivevar:mean_actual=<span class="hljs-number">-3.8200363760415414</span>;
-
+<pre><code class="lang-sql">WITH submit as (
+ <span class="hljs-keyword">select</span>
+ t.target <span class="hljs-keyword">as</span> actual,
+ p.predicted <span class="hljs-keyword">as</span> predicted
+ <span class="hljs-keyword">from</span>
+ e2006tfidf_test t
+ <span class="hljs-keyword">JOIN</span> e2006tfidf_arow_predict p
+ <span class="hljs-keyword">on</span> (t.<span class="hljs-keyword">rowid</span> = p.<span class="hljs-keyword">rowid</span>)
+)
<span class="hljs-keyword">select</span>
- <span class="hljs-keyword">sqrt</span>(<span class="hljs-keyword">sum</span>(<span class="hljs-keyword">pow</span>(predicted - actual,<span class="hljs-number">2.0</span>))/<span class="hljs-keyword">count</span>(<span class="hljs-number">1</span>)) <span class="hljs-keyword">as</span> RMSE,
- <span class="hljs-keyword">sum</span>(<span class="hljs-keyword">pow</span>(predicted - actual,<span class="hljs-number">2.0</span>))/<span class="hljs-keyword">count</span>(<span class="hljs-number">1</span>) <span class="hljs-keyword">as</span> MSE,
- <span class="hljs-keyword">sum</span>(<span class="hljs-keyword">abs</span>(predicted - actual))/<span class="hljs-keyword">count</span>(<span class="hljs-number">1</span>) <span class="hljs-keyword">as</span> MAE,
- <span class="hljs-number">1</span> - <span class="hljs-keyword">sum</span>(<span class="hljs-keyword">pow</span>(actual - predicted,<span class="hljs-number">2.0</span>)) / <span class="hljs-keyword">sum</span>(<span class="hljs-keyword">pow</span>(actual - ${mean_actual},<span class="hljs-number">2.0</span>)) <span class="hljs-keyword">as</span> R2
+ rmse(predicted, actual) <span class="hljs-keyword">as</span> RMSE,
+ mse(predicted, actual) <span class="hljs-keyword">as</span> MSE,
+ mae(predicted, actual) <span class="hljs-keyword">as</span> MAE,
+ r2(predicted, actual) <span class="hljs-keyword">as</span> R2
<span class="hljs-keyword">from</span>
- e2006tfidf_arow_submit;
+ submit;
</code></pre>
-<blockquote>
-<p>0.37862513029019407 0.14335698928726642 0.2368787001269389 0.5041085155590119</p>
-</blockquote>
-<hr>
-<h1 id="arowe">[AROWe]</h1>
+<table>
+<thead>
+<tr>
+<th style="text-align:center">rmse</th>
+<th style="text-align:center">mse</th>
+<th style="text-align:center">mae</th>
+<th style="text-align:center">r2</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td style="text-align:center">0.37862513029019407</td>
+<td style="text-align:center">0.14335698928726642</td>
+<td style="text-align:center">0.2368787001269389</td>
+<td style="text-align:center">0.5041085155590119</td>
+</tr>
+</tbody>
+</table>
+<h1 id="arowe">AROWe</h1>
<p>AROWe is a modified version of AROW that uses Hinge loss (epsilion = 0.1)</p>
<h2 id="training">Training</h2>
<pre><code class="lang-sql"><span class="hljs-keyword">set</span> mapred.reduce.tasks=<span class="hljs-number">64</span>;
@@ -2529,33 +2626,41 @@
t.<span class="hljs-keyword">rowid</span>;
</code></pre>
<h2 id="evaluation">evaluation</h2>
-<pre><code class="lang-sql"><span class="hljs-keyword">drop</span> <span class="hljs-keyword">table</span> e2006tfidf_arowe_submit;
-<span class="hljs-keyword">create</span> <span class="hljs-keyword">table</span> e2006tfidf_arowe_submit <span class="hljs-keyword">as</span>
-<span class="hljs-keyword">select</span>
- t.target <span class="hljs-keyword">as</span> actual,
- p.predicted <span class="hljs-keyword">as</span> predicted
-<span class="hljs-keyword">from</span>
- e2006tfidf_test t <span class="hljs-keyword">JOIN</span> e2006tfidf_arowe_predict p
- <span class="hljs-keyword">on</span> (t.<span class="hljs-keyword">rowid</span> = p.<span class="hljs-keyword">rowid</span>);
-
-<span class="hljs-keyword">select</span> <span class="hljs-keyword">avg</span>(actual), <span class="hljs-keyword">avg</span>(predicted) <span class="hljs-keyword">from</span> e2006tfidf_arowe_submit;
-</code></pre>
-<blockquote>
-<p>-3.8200363760415414 -3.86494905688414</p>
-</blockquote>
-<pre><code class="lang-sql"><span class="hljs-keyword">set</span> hivevar:mean_actual=<span class="hljs-number">-3.8200363760415414</span>;
-
+<pre><code class="lang-sql">WITH submit as (
+ <span class="hljs-keyword">select</span>
+ t.target <span class="hljs-keyword">as</span> actual,
+ p.predicted <span class="hljs-keyword">as</span> predicted
+ <span class="hljs-keyword">from</span>
+ e2006tfidf_test t
+ <span class="hljs-keyword">JOIN</span> e2006tfidf_arowe_predict p
+ <span class="hljs-keyword">on</span> (t.<span class="hljs-keyword">rowid</span> = p.<span class="hljs-keyword">rowid</span>)
+)
<span class="hljs-keyword">select</span>
- <span class="hljs-keyword">sqrt</span>(<span class="hljs-keyword">sum</span>(<span class="hljs-keyword">pow</span>(predicted - actual,<span class="hljs-number">2.0</span>))/<span class="hljs-keyword">count</span>(<span class="hljs-number">1</span>)) <span class="hljs-keyword">as</span> RMSE,
- <span class="hljs-keyword">sum</span>(<span class="hljs-keyword">pow</span>(predicted - actual,<span class="hljs-number">2.0</span>))/<span class="hljs-keyword">count</span>(<span class="hljs-number">1</span>) <span class="hljs-keyword">as</span> MSE,
- <span class="hljs-keyword">sum</span>(<span class="hljs-keyword">abs</span>(predicted - actual))/<span class="hljs-keyword">count</span>(<span class="hljs-number">1</span>) <span class="hljs-keyword">as</span> MAE,
- <span class="hljs-number">1</span> - <span class="hljs-keyword">sum</span>(<span class="hljs-keyword">pow</span>(actual - predicted,<span class="hljs-number">2.0</span>)) / <span class="hljs-keyword">sum</span>(<span class="hljs-keyword">pow</span>(actual - ${mean_actual},<span class="hljs-number">2.0</span>)) <span class="hljs-keyword">as</span> R2
+ rmse(predicted, actual) <span class="hljs-keyword">as</span> RMSE,
+ mse(predicted, actual) <span class="hljs-keyword">as</span> MSE,
+ mae(predicted, actual) <span class="hljs-keyword">as</span> MAE,
+ r2(predicted, actual) <span class="hljs-keyword">as</span> R2
<span class="hljs-keyword">from</span>
- e2006tfidf_arowe_submit;
+ submit;
</code></pre>
-<blockquote>
-<p>0.37789148212861856 0.14280197226536404 0.2357339155291536 0.5060283955470721</p>
-</blockquote>
+<table>
+<thead>
+<tr>
+<th style="text-align:center">rmse</th>
+<th style="text-align:center">mse</th>
+<th style="text-align:center">mae</th>
+<th style="text-align:center">r2</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td style="text-align:center">0.37789148212861856</td>
+<td style="text-align:center">0.14280197226536404</td>
+<td style="text-align:center">0.2357339155291536</td>
+<td style="text-align:center">0.5060283955470721</td>
+</tr>
+</tbody>
+</table>
<p><div id="page-footer" class="localized-footer"><hr><!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
@@ -2611,7 +2716,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda
<script>
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"Passive Aggressive, AROW","level":"8.2.2","depth":2,"next":{"title":"KDDCup 2012 Track 2 CTR Prediction Tutorial","level":"8.3","depth":1,"path":"regression/kddcup12tr2.md","ref":"regression/kddcup12tr2.md","articles":[{"title":"Data preparation","level":"8.3.1","depth":2,"path":"regression/kddcup12tr2_dataset.md","ref":"regression/kddcup12tr2_dataset.md","articles":[]},{"title":"Logistic Regression, Passive Aggressive","level":"8.3.2","depth":2,"path":"regression/kddcup12tr2_lr.md","ref":"regression/kddcup12tr2_lr.md","articles":[]},{"title":"Logistic Regression with amplifier","level":"8.3.3","depth":2,"path":"regression/kddcup12tr2_lr_amplify.md","ref":"regression/kddcup12tr2_lr_amplify.md","articles":[]},{"title":"AdaGrad, AdaDelta","level":"8.3.4","depth":2,"path":"regression/kddcup12tr2_adagrad.md","ref":"regression/kddcup12tr2_adagrad.md","articles":[]}]},"previous":{"title":"Data preparation","level":"8.2.1","depth":2,"pa
th":"regression/e2006_dataset.md","ref":"regression/e2006_dataset.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"the
me":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"https://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/tree/master/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"struc
ture":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"https://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"regression/e2006_arow.md","mtime":"2018-11-02T10:33:52.973Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2018-11-13T09:32:29.643Z"},"basePath":"..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"Passive Aggressive, AROW","level":"8.2.3","depth":2,"next":{"title":"KDDCup 2012 Track 2 CTR Prediction Tutorial","level":"8.3","depth":1,"path":"regression/kddcup12tr2.md","ref":"regression/kddcup12tr2.md","articles":[{"title":"Data Preparation","level":"8.3.1","depth":2,"path":"regression/kddcup12tr2_dataset.md","ref":"regression/kddcup12tr2_dataset.md","articles":[]},{"title":"Logistic Regression, Passive Aggressive","level":"8.3.2","depth":2,"path":"regression/kddcup12tr2_lr.md","ref":"regression/kddcup12tr2_lr.md","articles":[]},{"title":"Logistic Regression with amplifier","level":"8.3.3","depth":2,"path":"regression/kddcup12tr2_lr_amplify.md","ref":"regression/kddcup12tr2_lr_amplify.md","articles":[]},{"title":"AdaGrad, AdaDelta","level":"8.3.4","depth":2,"path":"regression/kddcup12tr2_adagrad.md","ref":"regression/kddcup12tr2_adagrad.md","articles":[]}]},"previous":{"title":"General Regessor","level":"8.2.2","depth":2,"pa
th":"regression/e2006_generic.md","ref":"regression/e2006_generic.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"the
me":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"https://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/tree/master/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"struc
ture":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"https://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"regression/e2006_arow.md","mtime":"2018-12-26T10:16:03.081Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2018-12-26T10:20:07.153Z"},"basePath":"..","book":{"language":""}});
});
</script>
</div>
@@ -2641,7 +2746,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda
- <script src="https://cdnjs.cloudflare.com/ajax/libs/anchor-js/4.1.1/anchor.min.js"></script>
+ <script src="../gitbook/gitbook-plugin-anchorjs/anchor.min.js"></script>
http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/d9012d92/userguide/regression/e2006_dataset.html
----------------------------------------------------------------------
diff --git a/userguide/regression/e2006_dataset.html b/userguide/regression/e2006_dataset.html
index 7305822..6215927 100644
--- a/userguide/regression/e2006_dataset.html
+++ b/userguide/regression/e2006_dataset.html
@@ -4,7 +4,7 @@
<head>
<meta charset="UTF-8">
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
- <title>Data preparation · Hivemall User Manual</title>
+ <title>Data Preparation · Hivemall User Manual</title>
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<meta name="description" content="">
<meta name="generator" content="GitBook 3.2.3">
@@ -97,7 +97,7 @@
<link rel="shortcut icon" href="../gitbook/images/favicon.ico" type="image/x-icon">
- <link rel="next" href="e2006_arow.html" />
+ <link rel="next" href="e2006_generic.html" />
<link rel="prev" href="e2006.html" />
@@ -972,7 +972,7 @@
<b>6.2.1.</b>
- Data preparation
+ Data Preparation
</a>
@@ -980,13 +980,28 @@
</li>
- <li class="chapter " data-level="6.2.2" data-path="../binaryclass/a9a_lr.html">
+ <li class="chapter " data-level="6.2.2" data-path="../binaryclass/a9a_generic.html">
- <a href="../binaryclass/a9a_lr.html">
+ <a href="../binaryclass/a9a_generic.html">
<b>6.2.2.</b>
+ General Binary Classifier
+
+ </a>
+
+
+
+ </li>
+
+ <li class="chapter " data-level="6.2.3" data-path="../binaryclass/a9a_lr.html">
+
+ <a href="../binaryclass/a9a_lr.html">
+
+
+ <b>6.2.3.</b>
+
Logistic Regression
</a>
@@ -995,14 +1010,14 @@
</li>
- <li class="chapter " data-level="6.2.3" data-path="../binaryclass/a9a_minibatch.html">
+ <li class="chapter " data-level="6.2.4" data-path="../binaryclass/a9a_minibatch.html">
<a href="../binaryclass/a9a_minibatch.html">
- <b>6.2.3.</b>
+ <b>6.2.4.</b>
- Mini-batch gradient descent
+ Mini-batch Gradient Descent
</a>
@@ -1038,7 +1053,7 @@
<b>6.3.1.</b>
- Data preparation
+ Data Preparation
</a>
@@ -1076,13 +1091,28 @@
</li>
- <li class="chapter " data-level="6.3.4" data-path="../binaryclass/news20_adagrad.html">
+ <li class="chapter " data-level="6.3.4" data-path="../binaryclass/news20_generic.html">
- <a href="../binaryclass/news20_adagrad.html">
+ <a href="../binaryclass/news20_generic.html">
<b>6.3.4.</b>
+ General Binary Classifier
+
+ </a>
+
+
+
+ </li>
+
+ <li class="chapter " data-level="6.3.5" data-path="../binaryclass/news20_adagrad.html">
+
+ <a href="../binaryclass/news20_adagrad.html">
+
+
+ <b>6.3.5.</b>
+
AdaGradRDA, AdaGrad, AdaDelta
</a>
@@ -1091,12 +1121,12 @@
</li>
- <li class="chapter " data-level="6.3.5" data-path="../binaryclass/news20_rf.html">
+ <li class="chapter " data-level="6.3.6" data-path="../binaryclass/news20_rf.html">
<a href="../binaryclass/news20_rf.html">
- <b>6.3.5.</b>
+ <b>6.3.6.</b>
Random Forest
@@ -1134,7 +1164,7 @@
<b>6.4.1.</b>
- Data preparation
+ Data Preparation
</a>
@@ -1185,7 +1215,7 @@
<b>6.5.1.</b>
- Data preparation
+ Data Preparation
</a>
@@ -1236,7 +1266,7 @@
<b>6.6.1.</b>
- Data pareparation
+ Data Pareparation
</a>
@@ -1302,7 +1332,7 @@
<b>6.8.1.</b>
- Data preparation
+ Data Preparation
</a>
@@ -1360,7 +1390,7 @@
<b>7.1.1.</b>
- Data preparation
+ Data Preparation
</a>
@@ -1375,7 +1405,7 @@
<b>7.1.2.</b>
- Data preparation for one-vs-the-rest classifiers
+ Data Preparation for one-vs-the-rest classifiers
</a>
@@ -1435,7 +1465,7 @@
<b>7.1.6.</b>
- one-vs-the-rest classifier
+ one-vs-the-rest Classifier
</a>
@@ -1559,7 +1589,7 @@
<b>8.2.1.</b>
- Data preparation
+ Data Preparation
</a>
@@ -1567,13 +1597,28 @@
</li>
- <li class="chapter " data-level="8.2.2" data-path="e2006_arow.html">
+ <li class="chapter " data-level="8.2.2" data-path="e2006_generic.html">
- <a href="e2006_arow.html">
+ <a href="e2006_generic.html">
<b>8.2.2.</b>
+ General Regessor
+
+ </a>
+
+
+
+ </li>
+
+ <li class="chapter " data-level="8.2.3" data-path="e2006_arow.html">
+
+ <a href="e2006_arow.html">
+
+
+ <b>8.2.3.</b>
+
Passive Aggressive, AROW
</a>
@@ -1610,7 +1655,7 @@
<b>8.3.1.</b>
- Data preparation
+ Data Preparation
</a>
@@ -1698,7 +1743,7 @@
<b>9.1.1.</b>
- Item-based collaborative filtering
+ Item-based Collaborative Filtering
</a>
@@ -1734,7 +1779,7 @@
<b>9.2.1.</b>
- Data preparation
+ Data Preparation
</a>
@@ -1749,7 +1794,7 @@
<b>9.2.2.</b>
- LSH/MinHash and Jaccard similarity
+ LSH/MinHash and Jaccard Similarity
</a>
@@ -1764,7 +1809,7 @@
<b>9.2.3.</b>
- LSH/MinHash and brute-force search
+ LSH/MinHash and Brute-force Search
</a>
@@ -1815,7 +1860,7 @@
<b>9.3.1.</b>
- Data preparation
+ Data Preparation
</a>
@@ -1830,7 +1875,7 @@
<b>9.3.2.</b>
- Item-based collaborative filtering
+ Item-based Collaborative Filtering
</a>
@@ -1875,7 +1920,7 @@
<b>9.3.5.</b>
- SLIM for fast top-k recommendation
+ SLIM for fast top-k Recommendation
</a>
@@ -1890,7 +1935,7 @@
<b>9.3.6.</b>
- 10-fold cross validation (Matrix Factorization)
+ 10-fold Cross Validation (Matrix Factorization)
</a>
@@ -2080,7 +2125,7 @@
<b>13.2.1.</b>
- a9a tutorial for DataFrame
+ a9a Tutorial for DataFrame
</a>
@@ -2095,7 +2140,7 @@
<b>13.2.2.</b>
- a9a tutorial for SQL
+ a9a Tutorial for SQL
</a>
@@ -2131,7 +2176,7 @@
<b>13.3.1.</b>
- E2006-tfidf regression tutorial for DataFrame
+ E2006-tfidf Regression Tutorial for DataFrame
</a>
@@ -2146,7 +2191,7 @@
<b>13.3.2.</b>
- E2006-tfidf regression tutorial for SQL
+ E2006-tfidf Regression Tutorial for SQL
</a>
@@ -2166,7 +2211,7 @@
<b>13.4.</b>
- Generic features
+ Generic Features
</a>
@@ -2182,7 +2227,7 @@
<b>13.4.1.</b>
- Top-k join processing
+ Top-k Join Processing
</a>
@@ -2197,7 +2242,7 @@
<b>13.4.2.</b>
- Other utility functions
+ Other Utility Functions
</a>
@@ -2284,7 +2329,7 @@
<!-- Title -->
<h1>
<i class="fa fa-circle-o-notch fa-spin"></i>
- <a href=".." >Data preparation</a>
+ <a href=".." >Data Preparation</a>
</h1>
</div>
@@ -2432,7 +2477,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda
<script>
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"Data preparation","level":"8.2.1","depth":2,"next":{"title":"Passive Aggressive, AROW","level":"8.2.2","depth":2,"path":"regression/e2006_arow.md","ref":"regression/e2006_arow.md","articles":[]},"previous":{"title":"E2006-tfidf Regression Tutorial","level":"8.2","depth":1,"path":"regression/e2006.md","ref":"regression/e2006.md","articles":[{"title":"Data preparation","level":"8.2.1","depth":2,"path":"regression/e2006_dataset.md","ref":"regression/e2006_dataset.md","articles":[]},{"title":"Passive Aggressive, AROW","level":"8.2.2","depth":2,"path":"regression/e2006_arow.md","ref":"regression/e2006_arow.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css
","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"https://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Ed
it","base":"https://github.com/apache/incubator-hivemall/tree/master/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"https://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"regression/e2006_dataset.md","mtime":"2018-11-02T10:33:52.973Z","type":"ma
rkdown"},"gitbook":{"version":"3.2.3","time":"2018-11-13T09:32:29.643Z"},"basePath":"..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"Data Preparation","level":"8.2.1","depth":2,"next":{"title":"General Regessor","level":"8.2.2","depth":2,"path":"regression/e2006_generic.md","ref":"regression/e2006_generic.md","articles":[]},"previous":{"title":"E2006-tfidf Regression Tutorial","level":"8.2","depth":1,"path":"regression/e2006.md","ref":"regression/e2006.md","articles":[{"title":"Data Preparation","level":"8.2.1","depth":2,"path":"regression/e2006_dataset.md","ref":"regression/e2006_dataset.md","articles":[]},{"title":"General Regessor","level":"8.2.2","depth":2,"path":"regression/e2006_generic.md","ref":"regression/e2006_generic.md","articles":[]},{"title":"Passive Aggressive, AROW","level":"8.2.3","depth":2,"path":"regression/e2006_arow.md","ref":"regression/e2006_arow.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapt
ers","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"https://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"go
ogle":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/tree/master/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"https://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x"
,"description":"User Manual for Apache Hivemall"},"file":{"path":"regression/e2006_dataset.md","mtime":"2018-12-25T07:50:37.548Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2018-12-26T10:20:07.153Z"},"basePath":"..","book":{"language":""}});
});
</script>
</div>
@@ -2462,7 +2507,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda
- <script src="https://cdnjs.cloudflare.com/ajax/libs/anchor-js/4.1.1/anchor.min.js"></script>
+ <script src="../gitbook/gitbook-plugin-anchorjs/anchor.min.js"></script>