You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hivemall.apache.org by my...@apache.org on 2018/12/26 10:22:59 UTC

[11/33] incubator-hivemall-site git commit: Update tutorial for general classifier/regressor

http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/d9012d92/userguide/regression/e2006_arow.html
----------------------------------------------------------------------
diff --git a/userguide/regression/e2006_arow.html b/userguide/regression/e2006_arow.html
index c95408e..473cbce 100644
--- a/userguide/regression/e2006_arow.html
+++ b/userguide/regression/e2006_arow.html
@@ -100,7 +100,7 @@
     <link rel="next" href="kddcup12tr2.html" />
     
     
-    <link rel="prev" href="e2006_dataset.html" />
+    <link rel="prev" href="e2006_generic.html" />
     
 
     </head>
@@ -972,7 +972,7 @@
                     
                         <b>6.2.1.</b>
                     
-                    Data preparation
+                    Data Preparation
             
                 </a>
             
@@ -980,13 +980,28 @@
             
         </li>
     
-        <li class="chapter " data-level="6.2.2" data-path="../binaryclass/a9a_lr.html">
+        <li class="chapter " data-level="6.2.2" data-path="../binaryclass/a9a_generic.html">
             
-                <a href="../binaryclass/a9a_lr.html">
+                <a href="../binaryclass/a9a_generic.html">
             
                     
                         <b>6.2.2.</b>
                     
+                    General Binary Classifier
+            
+                </a>
+            
+
+            
+        </li>
+    
+        <li class="chapter " data-level="6.2.3" data-path="../binaryclass/a9a_lr.html">
+            
+                <a href="../binaryclass/a9a_lr.html">
+            
+                    
+                        <b>6.2.3.</b>
+                    
                     Logistic Regression
             
                 </a>
@@ -995,14 +1010,14 @@
             
         </li>
     
-        <li class="chapter " data-level="6.2.3" data-path="../binaryclass/a9a_minibatch.html">
+        <li class="chapter " data-level="6.2.4" data-path="../binaryclass/a9a_minibatch.html">
             
                 <a href="../binaryclass/a9a_minibatch.html">
             
                     
-                        <b>6.2.3.</b>
+                        <b>6.2.4.</b>
                     
-                    Mini-batch gradient descent
+                    Mini-batch Gradient Descent
             
                 </a>
             
@@ -1038,7 +1053,7 @@
                     
                         <b>6.3.1.</b>
                     
-                    Data preparation
+                    Data Preparation
             
                 </a>
             
@@ -1076,13 +1091,28 @@
             
         </li>
     
-        <li class="chapter " data-level="6.3.4" data-path="../binaryclass/news20_adagrad.html">
+        <li class="chapter " data-level="6.3.4" data-path="../binaryclass/news20_generic.html">
             
-                <a href="../binaryclass/news20_adagrad.html">
+                <a href="../binaryclass/news20_generic.html">
             
                     
                         <b>6.3.4.</b>
                     
+                    General Binary Classifier
+            
+                </a>
+            
+
+            
+        </li>
+    
+        <li class="chapter " data-level="6.3.5" data-path="../binaryclass/news20_adagrad.html">
+            
+                <a href="../binaryclass/news20_adagrad.html">
+            
+                    
+                        <b>6.3.5.</b>
+                    
                     AdaGradRDA, AdaGrad, AdaDelta
             
                 </a>
@@ -1091,12 +1121,12 @@
             
         </li>
     
-        <li class="chapter " data-level="6.3.5" data-path="../binaryclass/news20_rf.html">
+        <li class="chapter " data-level="6.3.6" data-path="../binaryclass/news20_rf.html">
             
                 <a href="../binaryclass/news20_rf.html">
             
                     
-                        <b>6.3.5.</b>
+                        <b>6.3.6.</b>
                     
                     Random Forest
             
@@ -1134,7 +1164,7 @@
                     
                         <b>6.4.1.</b>
                     
-                    Data preparation
+                    Data Preparation
             
                 </a>
             
@@ -1185,7 +1215,7 @@
                     
                         <b>6.5.1.</b>
                     
-                    Data preparation
+                    Data Preparation
             
                 </a>
             
@@ -1236,7 +1266,7 @@
                     
                         <b>6.6.1.</b>
                     
-                    Data pareparation
+                    Data Pareparation
             
                 </a>
             
@@ -1302,7 +1332,7 @@
                     
                         <b>6.8.1.</b>
                     
-                    Data preparation
+                    Data Preparation
             
                 </a>
             
@@ -1360,7 +1390,7 @@
                     
                         <b>7.1.1.</b>
                     
-                    Data preparation
+                    Data Preparation
             
                 </a>
             
@@ -1375,7 +1405,7 @@
                     
                         <b>7.1.2.</b>
                     
-                    Data preparation for one-vs-the-rest classifiers
+                    Data Preparation for one-vs-the-rest classifiers
             
                 </a>
             
@@ -1435,7 +1465,7 @@
                     
                         <b>7.1.6.</b>
                     
-                    one-vs-the-rest classifier
+                    one-vs-the-rest Classifier
             
                 </a>
             
@@ -1559,7 +1589,7 @@
                     
                         <b>8.2.1.</b>
                     
-                    Data preparation
+                    Data Preparation
             
                 </a>
             
@@ -1567,13 +1597,28 @@
             
         </li>
     
-        <li class="chapter active" data-level="8.2.2" data-path="e2006_arow.html">
+        <li class="chapter " data-level="8.2.2" data-path="e2006_generic.html">
             
-                <a href="e2006_arow.html">
+                <a href="e2006_generic.html">
             
                     
                         <b>8.2.2.</b>
                     
+                    General Regessor
+            
+                </a>
+            
+
+            
+        </li>
+    
+        <li class="chapter active" data-level="8.2.3" data-path="e2006_arow.html">
+            
+                <a href="e2006_arow.html">
+            
+                    
+                        <b>8.2.3.</b>
+                    
                     Passive Aggressive, AROW
             
                 </a>
@@ -1610,7 +1655,7 @@
                     
                         <b>8.3.1.</b>
                     
-                    Data preparation
+                    Data Preparation
             
                 </a>
             
@@ -1698,7 +1743,7 @@
                     
                         <b>9.1.1.</b>
                     
-                    Item-based collaborative filtering
+                    Item-based Collaborative Filtering
             
                 </a>
             
@@ -1734,7 +1779,7 @@
                     
                         <b>9.2.1.</b>
                     
-                    Data preparation
+                    Data Preparation
             
                 </a>
             
@@ -1749,7 +1794,7 @@
                     
                         <b>9.2.2.</b>
                     
-                    LSH/MinHash and Jaccard similarity
+                    LSH/MinHash and Jaccard Similarity
             
                 </a>
             
@@ -1764,7 +1809,7 @@
                     
                         <b>9.2.3.</b>
                     
-                    LSH/MinHash and brute-force search
+                    LSH/MinHash and Brute-force Search
             
                 </a>
             
@@ -1815,7 +1860,7 @@
                     
                         <b>9.3.1.</b>
                     
-                    Data preparation
+                    Data Preparation
             
                 </a>
             
@@ -1830,7 +1875,7 @@
                     
                         <b>9.3.2.</b>
                     
-                    Item-based collaborative filtering
+                    Item-based Collaborative Filtering
             
                 </a>
             
@@ -1875,7 +1920,7 @@
                     
                         <b>9.3.5.</b>
                     
-                    SLIM for fast top-k recommendation
+                    SLIM for fast top-k Recommendation
             
                 </a>
             
@@ -1890,7 +1935,7 @@
                     
                         <b>9.3.6.</b>
                     
-                    10-fold cross validation (Matrix Factorization)
+                    10-fold Cross Validation (Matrix Factorization)
             
                 </a>
             
@@ -2080,7 +2125,7 @@
                     
                         <b>13.2.1.</b>
                     
-                    a9a tutorial for DataFrame
+                    a9a Tutorial for DataFrame
             
                 </a>
             
@@ -2095,7 +2140,7 @@
                     
                         <b>13.2.2.</b>
                     
-                    a9a tutorial for SQL
+                    a9a Tutorial for SQL
             
                 </a>
             
@@ -2131,7 +2176,7 @@
                     
                         <b>13.3.1.</b>
                     
-                    E2006-tfidf regression tutorial for DataFrame
+                    E2006-tfidf Regression Tutorial for DataFrame
             
                 </a>
             
@@ -2146,7 +2191,7 @@
                     
                         <b>13.3.2.</b>
                     
-                    E2006-tfidf regression tutorial for SQL
+                    E2006-tfidf Regression Tutorial for SQL
             
                 </a>
             
@@ -2166,7 +2211,7 @@
                     
                         <b>13.4.</b>
                     
-                    Generic features
+                    Generic Features
             
                 </a>
             
@@ -2182,7 +2227,7 @@
                     
                         <b>13.4.1.</b>
                     
-                    Top-k join processing
+                    Top-k Join Processing
             
                 </a>
             
@@ -2197,7 +2242,7 @@
                     
                         <b>13.4.2.</b>
                     
-                    Other utility functions
+                    Other Utility Functions
             
                 </a>
             
@@ -2317,11 +2362,40 @@
   specific language governing permissions and limitations
   under the License.
 -->
-<p><a href="https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/regression.html#E2006-tfidf" target="_blank">https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/regression.html#E2006-tfidf</a></p>
-<hr>
-<h1 id="pa1a">[PA1a]</h1>
+<!-- toc --><div id="toc" class="toc">
+
+<ul>
+<li><a href="#pa1a">PA1a</a><ul>
+<li><a href="#training">Training</a></li>
+<li><a href="#prediction">prediction</a></li>
+<li><a href="#evaluation">evaluation</a></li>
+</ul>
+</li>
+<li><a href="#pa2a">PA2a</a><ul>
+<li><a href="#training-1">Training</a></li>
+<li><a href="#prediction-1">prediction</a></li>
+<li><a href="#evaluation-1">evaluation</a></li>
+</ul>
+</li>
+<li><a href="#arow">AROW</a><ul>
+<li><a href="#training-2">Training</a></li>
+<li><a href="#prediction-2">prediction</a></li>
+<li><a href="#evaluation-2">evaluation</a></li>
+</ul>
+</li>
+<li><a href="#arowe">AROWe</a><ul>
+<li><a href="#training-3">Training</a></li>
+<li><a href="#prediction-3">prediction</a></li>
+<li><a href="#evaluation-3">evaluation</a></li>
+</ul>
+</li>
+</ul>
+
+</div><!-- tocstop -->
+<h1 id="pa1a">PA1a</h1>
 <h2 id="training">Training</h2>
 <pre><code class="lang-sql"><span class="hljs-keyword">set</span> mapred.reduce.tasks=<span class="hljs-number">64</span>;
+
 <span class="hljs-keyword">drop</span> <span class="hljs-keyword">table</span> e2006tfidf_pa1a_model ;
 <span class="hljs-keyword">create</span> <span class="hljs-keyword">table</span> e2006tfidf_pa1a_model <span class="hljs-keyword">as</span>
 <span class="hljs-keyword">select</span> 
@@ -2334,9 +2408,11 @@
      e2006tfidf_train_x3
  ) t 
 <span class="hljs-keyword">group</span> <span class="hljs-keyword">by</span> feature;
+
+<span class="hljs-comment">-- reset to the default setting</span>
 <span class="hljs-keyword">set</span> mapred.reduce.tasks=<span class="hljs-number">-1</span>;
 </code></pre>
-<p><em>Caution: Do not use voted_avg() for regression. voted_avg() is for classification.</em></p>
+<div class="panel panel-warning"><div class="panel-heading"><h3 class="panel-title" id="caution"><i class="fa fa-exclamation-triangle"></i> Caution</h3></div><div class="panel-body"><p>Do not use <code>voted_avg()</code> for regression. <code>voted_avg()</code> is for classification.</p></div></div>
 <h2 id="prediction">prediction</h2>
 <pre><code class="lang-sql"><span class="hljs-keyword">create</span> <span class="hljs-keyword">or</span> <span class="hljs-keyword">replace</span> <span class="hljs-keyword">view</span> e2006tfidf_pa1a_predict
 <span class="hljs-keyword">as</span>
@@ -2350,35 +2426,42 @@
   t.<span class="hljs-keyword">rowid</span>;
 </code></pre>
 <h2 id="evaluation">evaluation</h2>
-<pre><code class="lang-sql"><span class="hljs-keyword">drop</span> <span class="hljs-keyword">table</span> e2006tfidf_pa1a_submit;
-<span class="hljs-keyword">create</span> <span class="hljs-keyword">table</span> e2006tfidf_pa1a_submit <span class="hljs-keyword">as</span>
-<span class="hljs-keyword">select</span> 
-  t.target <span class="hljs-keyword">as</span> actual, 
-  p.predicted <span class="hljs-keyword">as</span> predicted
-<span class="hljs-keyword">from</span> 
-  e2006tfidf_test t <span class="hljs-keyword">JOIN</span> e2006tfidf_pa1a_predict p 
-    <span class="hljs-keyword">on</span> (t.<span class="hljs-keyword">rowid</span> = p.<span class="hljs-keyword">rowid</span>);
-
-<span class="hljs-keyword">select</span> <span class="hljs-keyword">avg</span>(actual), <span class="hljs-keyword">avg</span>(predicted) <span class="hljs-keyword">from</span> e2006tfidf_pa1a_submit;
-</code></pre>
-<blockquote>
-<p>-3.8200363760415414     -3.8869923258589476</p>
-</blockquote>
-<pre><code class="lang-sql"><span class="hljs-keyword">set</span> hivevar:mean_actual=<span class="hljs-number">-3.8200363760415414</span>;
-
+<pre><code class="lang-sql">WITH submit as (
+  <span class="hljs-keyword">select</span> 
+    t.target <span class="hljs-keyword">as</span> actual, 
+    p.predicted <span class="hljs-keyword">as</span> predicted
+  <span class="hljs-keyword">from</span> 
+    e2006tfidf_test t
+    <span class="hljs-keyword">JOIN</span> e2006tfidf_pa1a_predict p 
+      <span class="hljs-keyword">on</span> (t.<span class="hljs-keyword">rowid</span> = p.<span class="hljs-keyword">rowid</span>)
+)
 <span class="hljs-keyword">select</span> 
-   <span class="hljs-keyword">sqrt</span>(<span class="hljs-keyword">sum</span>(<span class="hljs-keyword">pow</span>(predicted - actual,<span class="hljs-number">2.0</span>))/<span class="hljs-keyword">count</span>(<span class="hljs-number">1</span>)) <span class="hljs-keyword">as</span> RMSE, 
-   <span class="hljs-keyword">sum</span>(<span class="hljs-keyword">pow</span>(predicted - actual,<span class="hljs-number">2.0</span>))/<span class="hljs-keyword">count</span>(<span class="hljs-number">1</span>) <span class="hljs-keyword">as</span> MSE, 
-   <span class="hljs-keyword">sum</span>(<span class="hljs-keyword">abs</span>(predicted - actual))/<span class="hljs-keyword">count</span>(<span class="hljs-number">1</span>) <span class="hljs-keyword">as</span> MAE,
-   <span class="hljs-number">1</span> - <span class="hljs-keyword">sum</span>(<span class="hljs-keyword">pow</span>(actual - predicted,<span class="hljs-number">2.0</span>)) / <span class="hljs-keyword">sum</span>(<span class="hljs-keyword">pow</span>(actual - ${mean_actual},<span class="hljs-number">2.0</span>)) <span class="hljs-keyword">as</span> R2
+   rmse(predicted, actual) <span class="hljs-keyword">as</span> RMSE,
+   mse(predicted, actual) <span class="hljs-keyword">as</span> MSE, 
+   mae(predicted, actual) <span class="hljs-keyword">as</span> MAE,
+   r2(predicted, actual) <span class="hljs-keyword">as</span> R2
 <span class="hljs-keyword">from</span> 
-   e2006tfidf_pa1a_submit;
+   submit;
 </code></pre>
-<blockquote>
-<p>0.3797959864675519      0.14424499133686086     0.23846059576113587     0.5010367946980386</p>
-</blockquote>
-<hr>
-<h1 id="pa2a">[PA2a]</h1>
+<table>
+<thead>
+<tr>
+<th style="text-align:center">rmse</th>
+<th style="text-align:center">mse</th>
+<th style="text-align:center">mae</th>
+<th style="text-align:center">r2</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td style="text-align:center">0.3797959864675519</td>
+<td style="text-align:center">0.14424499133686086</td>
+<td style="text-align:center">0.23846059576113587</td>
+<td style="text-align:center">0.5010367946980386</td>
+</tr>
+</tbody>
+</table>
+<h1 id="pa2a">PA2a</h1>
 <h2 id="training">Training</h2>
 <pre><code class="lang-sql"><span class="hljs-keyword">set</span> mapred.reduce.tasks=<span class="hljs-number">64</span>;
 <span class="hljs-keyword">drop</span> <span class="hljs-keyword">table</span> e2006tfidf_pa2a_model;
@@ -2408,35 +2491,42 @@
   t.<span class="hljs-keyword">rowid</span>;
 </code></pre>
 <h2 id="evaluation">evaluation</h2>
-<pre><code class="lang-sql"><span class="hljs-keyword">drop</span> <span class="hljs-keyword">table</span> e2006tfidf_pa2a_submit;
-<span class="hljs-keyword">create</span> <span class="hljs-keyword">table</span> e2006tfidf_pa2a_submit <span class="hljs-keyword">as</span>
-<span class="hljs-keyword">select</span> 
-  t.target <span class="hljs-keyword">as</span> actual, 
-  pd.predicted <span class="hljs-keyword">as</span> predicted
-<span class="hljs-keyword">from</span> 
-  e2006tfidf_test t <span class="hljs-keyword">JOIN</span> e2006tfidf_pa2a_predict pd 
-    <span class="hljs-keyword">on</span> (t.<span class="hljs-keyword">rowid</span> = pd.<span class="hljs-keyword">rowid</span>);
-
-<span class="hljs-keyword">select</span> <span class="hljs-keyword">avg</span>(actual), <span class="hljs-keyword">avg</span>(predicted) <span class="hljs-keyword">from</span> e2006tfidf_pa2a_submit;
-</code></pre>
-<blockquote>
-<p>-3.8200363760415414     -3.9124877451612488</p>
-</blockquote>
-<pre><code class="lang-sql"><span class="hljs-keyword">set</span> hivevar:mean_actual=<span class="hljs-number">-3.8200363760415414</span>;
-
+<pre><code class="lang-sql">WITH submit as (
+  <span class="hljs-keyword">select</span> 
+    t.target <span class="hljs-keyword">as</span> actual, 
+    p.predicted <span class="hljs-keyword">as</span> predicted
+  <span class="hljs-keyword">from</span> 
+    e2006tfidf_test t
+    <span class="hljs-keyword">JOIN</span> e2006tfidf_pa2a_predict p 
+      <span class="hljs-keyword">on</span> (t.<span class="hljs-keyword">rowid</span> = p.<span class="hljs-keyword">rowid</span>)
+)
 <span class="hljs-keyword">select</span> 
-   <span class="hljs-keyword">sqrt</span>(<span class="hljs-keyword">sum</span>(<span class="hljs-keyword">pow</span>(predicted - actual,<span class="hljs-number">2.0</span>))/<span class="hljs-keyword">count</span>(<span class="hljs-number">1</span>)) <span class="hljs-keyword">as</span> RMSE, 
-   <span class="hljs-keyword">sum</span>(<span class="hljs-keyword">pow</span>(predicted - actual,<span class="hljs-number">2.0</span>))/<span class="hljs-keyword">count</span>(<span class="hljs-number">1</span>) <span class="hljs-keyword">as</span> MSE, 
-   <span class="hljs-keyword">sum</span>(<span class="hljs-keyword">abs</span>(predicted - actual))/<span class="hljs-keyword">count</span>(<span class="hljs-number">1</span>) <span class="hljs-keyword">as</span> MAE,
-   <span class="hljs-number">1</span> - <span class="hljs-keyword">sum</span>(<span class="hljs-keyword">pow</span>(actual - predicted,<span class="hljs-number">2.0</span>)) / <span class="hljs-keyword">sum</span>(<span class="hljs-keyword">pow</span>(actual - ${mean_actual},<span class="hljs-number">2.0</span>)) <span class="hljs-keyword">as</span> R2
+   rmse(predicted, actual) <span class="hljs-keyword">as</span> RMSE,
+   mse(predicted, actual) <span class="hljs-keyword">as</span> MSE, 
+   mae(predicted, actual) <span class="hljs-keyword">as</span> MAE,
+   r2(predicted, actual) <span class="hljs-keyword">as</span> R2
 <span class="hljs-keyword">from</span> 
-   e2006tfidf_pa2a_submit;
+   submit;
 </code></pre>
-<blockquote>
-<p>0.38538660838804495     0.14852283792484033     0.2466732002711477      0.48623913673053565</p>
-</blockquote>
-<hr>
-<h1 id="arow">[AROW]</h1>
+<table>
+<thead>
+<tr>
+<th style="text-align:center">rmse</th>
+<th style="text-align:center">mse</th>
+<th style="text-align:center">mae</th>
+<th style="text-align:center">r2</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td style="text-align:center">0.38538660838804495</td>
+<td style="text-align:center">0.14852283792484033</td>
+<td style="text-align:center">0.2466732002711477</td>
+<td style="text-align:center">0.48623913673053565</td>
+</tr>
+</tbody>
+</table>
+<h1 id="arow">AROW</h1>
 <h2 id="training">Training</h2>
 <pre><code class="lang-sql"><span class="hljs-keyword">set</span> mapred.reduce.tasks=<span class="hljs-number">64</span>;
 <span class="hljs-keyword">drop</span> <span class="hljs-keyword">table</span> e2006tfidf_arow_model ;
@@ -2468,35 +2558,42 @@
   t.<span class="hljs-keyword">rowid</span>;
 </code></pre>
 <h2 id="evaluation">evaluation</h2>
-<pre><code class="lang-sql"><span class="hljs-keyword">drop</span> <span class="hljs-keyword">table</span> e2006tfidf_arow_submit;
-<span class="hljs-keyword">create</span> <span class="hljs-keyword">table</span> e2006tfidf_arow_submit <span class="hljs-keyword">as</span>
-<span class="hljs-keyword">select</span> 
-  t.target <span class="hljs-keyword">as</span> actual, 
-  p.predicted <span class="hljs-keyword">as</span> predicted
-<span class="hljs-keyword">from</span> 
-  e2006tfidf_test t <span class="hljs-keyword">JOIN</span> e2006tfidf_arow_predict p
-    <span class="hljs-keyword">on</span> (t.<span class="hljs-keyword">rowid</span> = p.<span class="hljs-keyword">rowid</span>);
-
-<span class="hljs-keyword">select</span> <span class="hljs-keyword">avg</span>(actual), <span class="hljs-keyword">avg</span>(predicted) <span class="hljs-keyword">from</span> e2006tfidf_arow_submit;
-</code></pre>
-<blockquote>
-<p>-3.8200363760415414     -3.8692518911517433</p>
-</blockquote>
-<pre><code class="lang-sql"><span class="hljs-keyword">set</span> hivevar:mean_actual=<span class="hljs-number">-3.8200363760415414</span>;
-
+<pre><code class="lang-sql">WITH submit as (
+  <span class="hljs-keyword">select</span> 
+    t.target <span class="hljs-keyword">as</span> actual, 
+    p.predicted <span class="hljs-keyword">as</span> predicted
+  <span class="hljs-keyword">from</span> 
+    e2006tfidf_test t
+    <span class="hljs-keyword">JOIN</span> e2006tfidf_arow_predict p 
+      <span class="hljs-keyword">on</span> (t.<span class="hljs-keyword">rowid</span> = p.<span class="hljs-keyword">rowid</span>)
+)
 <span class="hljs-keyword">select</span> 
-   <span class="hljs-keyword">sqrt</span>(<span class="hljs-keyword">sum</span>(<span class="hljs-keyword">pow</span>(predicted - actual,<span class="hljs-number">2.0</span>))/<span class="hljs-keyword">count</span>(<span class="hljs-number">1</span>)) <span class="hljs-keyword">as</span> RMSE, 
-   <span class="hljs-keyword">sum</span>(<span class="hljs-keyword">pow</span>(predicted - actual,<span class="hljs-number">2.0</span>))/<span class="hljs-keyword">count</span>(<span class="hljs-number">1</span>) <span class="hljs-keyword">as</span> MSE, 
-   <span class="hljs-keyword">sum</span>(<span class="hljs-keyword">abs</span>(predicted - actual))/<span class="hljs-keyword">count</span>(<span class="hljs-number">1</span>) <span class="hljs-keyword">as</span> MAE,
-   <span class="hljs-number">1</span> - <span class="hljs-keyword">sum</span>(<span class="hljs-keyword">pow</span>(actual - predicted,<span class="hljs-number">2.0</span>)) / <span class="hljs-keyword">sum</span>(<span class="hljs-keyword">pow</span>(actual - ${mean_actual},<span class="hljs-number">2.0</span>)) <span class="hljs-keyword">as</span> R2
+   rmse(predicted, actual) <span class="hljs-keyword">as</span> RMSE,
+   mse(predicted, actual) <span class="hljs-keyword">as</span> MSE, 
+   mae(predicted, actual) <span class="hljs-keyword">as</span> MAE,
+   r2(predicted, actual) <span class="hljs-keyword">as</span> R2
 <span class="hljs-keyword">from</span> 
-   e2006tfidf_arow_submit;
+   submit;
 </code></pre>
-<blockquote>
-<p>0.37862513029019407     0.14335698928726642     0.2368787001269389      0.5041085155590119</p>
-</blockquote>
-<hr>
-<h1 id="arowe">[AROWe]</h1>
+<table>
+<thead>
+<tr>
+<th style="text-align:center">rmse</th>
+<th style="text-align:center">mse</th>
+<th style="text-align:center">mae</th>
+<th style="text-align:center">r2</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td style="text-align:center">0.37862513029019407</td>
+<td style="text-align:center">0.14335698928726642</td>
+<td style="text-align:center">0.2368787001269389</td>
+<td style="text-align:center">0.5041085155590119</td>
+</tr>
+</tbody>
+</table>
+<h1 id="arowe">AROWe</h1>
 <p>AROWe is a modified version of AROW that uses Hinge loss (epsilion = 0.1)</p>
 <h2 id="training">Training</h2>
 <pre><code class="lang-sql"><span class="hljs-keyword">set</span> mapred.reduce.tasks=<span class="hljs-number">64</span>;
@@ -2529,33 +2626,41 @@
   t.<span class="hljs-keyword">rowid</span>;
 </code></pre>
 <h2 id="evaluation">evaluation</h2>
-<pre><code class="lang-sql"><span class="hljs-keyword">drop</span> <span class="hljs-keyword">table</span> e2006tfidf_arowe_submit;
-<span class="hljs-keyword">create</span> <span class="hljs-keyword">table</span> e2006tfidf_arowe_submit <span class="hljs-keyword">as</span>
-<span class="hljs-keyword">select</span> 
-  t.target <span class="hljs-keyword">as</span> actual, 
-  p.predicted <span class="hljs-keyword">as</span> predicted
-<span class="hljs-keyword">from</span> 
-  e2006tfidf_test t <span class="hljs-keyword">JOIN</span> e2006tfidf_arowe_predict p
-    <span class="hljs-keyword">on</span> (t.<span class="hljs-keyword">rowid</span> = p.<span class="hljs-keyword">rowid</span>);
-
-<span class="hljs-keyword">select</span> <span class="hljs-keyword">avg</span>(actual), <span class="hljs-keyword">avg</span>(predicted) <span class="hljs-keyword">from</span> e2006tfidf_arowe_submit;
-</code></pre>
-<blockquote>
-<p>-3.8200363760415414     -3.86494905688414</p>
-</blockquote>
-<pre><code class="lang-sql"><span class="hljs-keyword">set</span> hivevar:mean_actual=<span class="hljs-number">-3.8200363760415414</span>;
-
+<pre><code class="lang-sql">WITH submit as (
+  <span class="hljs-keyword">select</span> 
+    t.target <span class="hljs-keyword">as</span> actual, 
+    p.predicted <span class="hljs-keyword">as</span> predicted
+  <span class="hljs-keyword">from</span> 
+    e2006tfidf_test t
+    <span class="hljs-keyword">JOIN</span> e2006tfidf_arowe_predict p 
+      <span class="hljs-keyword">on</span> (t.<span class="hljs-keyword">rowid</span> = p.<span class="hljs-keyword">rowid</span>)
+)
 <span class="hljs-keyword">select</span> 
-   <span class="hljs-keyword">sqrt</span>(<span class="hljs-keyword">sum</span>(<span class="hljs-keyword">pow</span>(predicted - actual,<span class="hljs-number">2.0</span>))/<span class="hljs-keyword">count</span>(<span class="hljs-number">1</span>)) <span class="hljs-keyword">as</span> RMSE, 
-   <span class="hljs-keyword">sum</span>(<span class="hljs-keyword">pow</span>(predicted - actual,<span class="hljs-number">2.0</span>))/<span class="hljs-keyword">count</span>(<span class="hljs-number">1</span>) <span class="hljs-keyword">as</span> MSE, 
-   <span class="hljs-keyword">sum</span>(<span class="hljs-keyword">abs</span>(predicted - actual))/<span class="hljs-keyword">count</span>(<span class="hljs-number">1</span>) <span class="hljs-keyword">as</span> MAE,
-   <span class="hljs-number">1</span> - <span class="hljs-keyword">sum</span>(<span class="hljs-keyword">pow</span>(actual - predicted,<span class="hljs-number">2.0</span>)) / <span class="hljs-keyword">sum</span>(<span class="hljs-keyword">pow</span>(actual - ${mean_actual},<span class="hljs-number">2.0</span>)) <span class="hljs-keyword">as</span> R2
+   rmse(predicted, actual) <span class="hljs-keyword">as</span> RMSE,
+   mse(predicted, actual) <span class="hljs-keyword">as</span> MSE, 
+   mae(predicted, actual) <span class="hljs-keyword">as</span> MAE,
+   r2(predicted, actual) <span class="hljs-keyword">as</span> R2
 <span class="hljs-keyword">from</span> 
-   e2006tfidf_arowe_submit;
+   submit;
 </code></pre>
-<blockquote>
-<p>0.37789148212861856     0.14280197226536404     0.2357339155291536      0.5060283955470721</p>
-</blockquote>
+<table>
+<thead>
+<tr>
+<th style="text-align:center">rmse</th>
+<th style="text-align:center">mse</th>
+<th style="text-align:center">mae</th>
+<th style="text-align:center">r2</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td style="text-align:center">0.37789148212861856</td>
+<td style="text-align:center">0.14280197226536404</td>
+<td style="text-align:center">0.2357339155291536</td>
+<td style="text-align:center">0.5060283955470721</td>
+</tr>
+</tbody>
+</table>
 <p><div id="page-footer" class="localized-footer"><hr><!--
   Licensed to the Apache Software Foundation (ASF) under one
   or more contributor license agreements.  See the NOTICE file
@@ -2611,7 +2716,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda
     <script>
         var gitbook = gitbook || [];
         gitbook.push(function() {
-            gitbook.page.hasChanged({"page":{"title":"Passive Aggressive, AROW","level":"8.2.2","depth":2,"next":{"title":"KDDCup 2012 Track 2 CTR Prediction Tutorial","level":"8.3","depth":1,"path":"regression/kddcup12tr2.md","ref":"regression/kddcup12tr2.md","articles":[{"title":"Data preparation","level":"8.3.1","depth":2,"path":"regression/kddcup12tr2_dataset.md","ref":"regression/kddcup12tr2_dataset.md","articles":[]},{"title":"Logistic Regression, Passive Aggressive","level":"8.3.2","depth":2,"path":"regression/kddcup12tr2_lr.md","ref":"regression/kddcup12tr2_lr.md","articles":[]},{"title":"Logistic Regression with amplifier","level":"8.3.3","depth":2,"path":"regression/kddcup12tr2_lr_amplify.md","ref":"regression/kddcup12tr2_lr_amplify.md","articles":[]},{"title":"AdaGrad, AdaDelta","level":"8.3.4","depth":2,"path":"regression/kddcup12tr2_adagrad.md","ref":"regression/kddcup12tr2_adagrad.md","articles":[]}]},"previous":{"title":"Data preparation","level":"8.2.1","depth":2,"pa
 th":"regression/e2006_dataset.md","ref":"regression/e2006_dataset.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"the
 me":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"https://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/tree/master/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"struc
 ture":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"https://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"regression/e2006_arow.md","mtime":"2018-11-02T10:33:52.973Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2018-11-13T09:32:29.643Z"},"basePath":"..","book":{"language":""}});
+            gitbook.page.hasChanged({"page":{"title":"Passive Aggressive, AROW","level":"8.2.3","depth":2,"next":{"title":"KDDCup 2012 Track 2 CTR Prediction Tutorial","level":"8.3","depth":1,"path":"regression/kddcup12tr2.md","ref":"regression/kddcup12tr2.md","articles":[{"title":"Data Preparation","level":"8.3.1","depth":2,"path":"regression/kddcup12tr2_dataset.md","ref":"regression/kddcup12tr2_dataset.md","articles":[]},{"title":"Logistic Regression, Passive Aggressive","level":"8.3.2","depth":2,"path":"regression/kddcup12tr2_lr.md","ref":"regression/kddcup12tr2_lr.md","articles":[]},{"title":"Logistic Regression with amplifier","level":"8.3.3","depth":2,"path":"regression/kddcup12tr2_lr_amplify.md","ref":"regression/kddcup12tr2_lr_amplify.md","articles":[]},{"title":"AdaGrad, AdaDelta","level":"8.3.4","depth":2,"path":"regression/kddcup12tr2_adagrad.md","ref":"regression/kddcup12tr2_adagrad.md","articles":[]}]},"previous":{"title":"General Regessor","level":"8.2.2","depth":2,"pa
 th":"regression/e2006_generic.md","ref":"regression/e2006_generic.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"the
 me":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"https://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/tree/master/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"struc
 ture":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"https://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"regression/e2006_arow.md","mtime":"2018-12-26T10:16:03.081Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2018-12-26T10:20:07.153Z"},"basePath":"..","book":{"language":""}});
         });
     </script>
 </div>
@@ -2641,7 +2746,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda
         
     
         
-        <script src="https://cdnjs.cloudflare.com/ajax/libs/anchor-js/4.1.1/anchor.min.js"></script>
+        <script src="../gitbook/gitbook-plugin-anchorjs/anchor.min.js"></script>
         
     
         

http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/d9012d92/userguide/regression/e2006_dataset.html
----------------------------------------------------------------------
diff --git a/userguide/regression/e2006_dataset.html b/userguide/regression/e2006_dataset.html
index 7305822..6215927 100644
--- a/userguide/regression/e2006_dataset.html
+++ b/userguide/regression/e2006_dataset.html
@@ -4,7 +4,7 @@
     <head>
         <meta charset="UTF-8">
         <meta content="text/html; charset=utf-8" http-equiv="Content-Type">
-        <title>Data preparation · Hivemall User Manual</title>
+        <title>Data Preparation · Hivemall User Manual</title>
         <meta http-equiv="X-UA-Compatible" content="IE=edge" />
         <meta name="description" content="">
         <meta name="generator" content="GitBook 3.2.3">
@@ -97,7 +97,7 @@
     <link rel="shortcut icon" href="../gitbook/images/favicon.ico" type="image/x-icon">
 
     
-    <link rel="next" href="e2006_arow.html" />
+    <link rel="next" href="e2006_generic.html" />
     
     
     <link rel="prev" href="e2006.html" />
@@ -972,7 +972,7 @@
                     
                         <b>6.2.1.</b>
                     
-                    Data preparation
+                    Data Preparation
             
                 </a>
             
@@ -980,13 +980,28 @@
             
         </li>
     
-        <li class="chapter " data-level="6.2.2" data-path="../binaryclass/a9a_lr.html">
+        <li class="chapter " data-level="6.2.2" data-path="../binaryclass/a9a_generic.html">
             
-                <a href="../binaryclass/a9a_lr.html">
+                <a href="../binaryclass/a9a_generic.html">
             
                     
                         <b>6.2.2.</b>
                     
+                    General Binary Classifier
+            
+                </a>
+            
+
+            
+        </li>
+    
+        <li class="chapter " data-level="6.2.3" data-path="../binaryclass/a9a_lr.html">
+            
+                <a href="../binaryclass/a9a_lr.html">
+            
+                    
+                        <b>6.2.3.</b>
+                    
                     Logistic Regression
             
                 </a>
@@ -995,14 +1010,14 @@
             
         </li>
     
-        <li class="chapter " data-level="6.2.3" data-path="../binaryclass/a9a_minibatch.html">
+        <li class="chapter " data-level="6.2.4" data-path="../binaryclass/a9a_minibatch.html">
             
                 <a href="../binaryclass/a9a_minibatch.html">
             
                     
-                        <b>6.2.3.</b>
+                        <b>6.2.4.</b>
                     
-                    Mini-batch gradient descent
+                    Mini-batch Gradient Descent
             
                 </a>
             
@@ -1038,7 +1053,7 @@
                     
                         <b>6.3.1.</b>
                     
-                    Data preparation
+                    Data Preparation
             
                 </a>
             
@@ -1076,13 +1091,28 @@
             
         </li>
     
-        <li class="chapter " data-level="6.3.4" data-path="../binaryclass/news20_adagrad.html">
+        <li class="chapter " data-level="6.3.4" data-path="../binaryclass/news20_generic.html">
             
-                <a href="../binaryclass/news20_adagrad.html">
+                <a href="../binaryclass/news20_generic.html">
             
                     
                         <b>6.3.4.</b>
                     
+                    General Binary Classifier
+            
+                </a>
+            
+
+            
+        </li>
+    
+        <li class="chapter " data-level="6.3.5" data-path="../binaryclass/news20_adagrad.html">
+            
+                <a href="../binaryclass/news20_adagrad.html">
+            
+                    
+                        <b>6.3.5.</b>
+                    
                     AdaGradRDA, AdaGrad, AdaDelta
             
                 </a>
@@ -1091,12 +1121,12 @@
             
         </li>
     
-        <li class="chapter " data-level="6.3.5" data-path="../binaryclass/news20_rf.html">
+        <li class="chapter " data-level="6.3.6" data-path="../binaryclass/news20_rf.html">
             
                 <a href="../binaryclass/news20_rf.html">
             
                     
-                        <b>6.3.5.</b>
+                        <b>6.3.6.</b>
                     
                     Random Forest
             
@@ -1134,7 +1164,7 @@
                     
                         <b>6.4.1.</b>
                     
-                    Data preparation
+                    Data Preparation
             
                 </a>
             
@@ -1185,7 +1215,7 @@
                     
                         <b>6.5.1.</b>
                     
-                    Data preparation
+                    Data Preparation
             
                 </a>
             
@@ -1236,7 +1266,7 @@
                     
                         <b>6.6.1.</b>
                     
-                    Data pareparation
+                    Data Pareparation
             
                 </a>
             
@@ -1302,7 +1332,7 @@
                     
                         <b>6.8.1.</b>
                     
-                    Data preparation
+                    Data Preparation
             
                 </a>
             
@@ -1360,7 +1390,7 @@
                     
                         <b>7.1.1.</b>
                     
-                    Data preparation
+                    Data Preparation
             
                 </a>
             
@@ -1375,7 +1405,7 @@
                     
                         <b>7.1.2.</b>
                     
-                    Data preparation for one-vs-the-rest classifiers
+                    Data Preparation for one-vs-the-rest classifiers
             
                 </a>
             
@@ -1435,7 +1465,7 @@
                     
                         <b>7.1.6.</b>
                     
-                    one-vs-the-rest classifier
+                    one-vs-the-rest Classifier
             
                 </a>
             
@@ -1559,7 +1589,7 @@
                     
                         <b>8.2.1.</b>
                     
-                    Data preparation
+                    Data Preparation
             
                 </a>
             
@@ -1567,13 +1597,28 @@
             
         </li>
     
-        <li class="chapter " data-level="8.2.2" data-path="e2006_arow.html">
+        <li class="chapter " data-level="8.2.2" data-path="e2006_generic.html">
             
-                <a href="e2006_arow.html">
+                <a href="e2006_generic.html">
             
                     
                         <b>8.2.2.</b>
                     
+                    General Regessor
+            
+                </a>
+            
+
+            
+        </li>
+    
+        <li class="chapter " data-level="8.2.3" data-path="e2006_arow.html">
+            
+                <a href="e2006_arow.html">
+            
+                    
+                        <b>8.2.3.</b>
+                    
                     Passive Aggressive, AROW
             
                 </a>
@@ -1610,7 +1655,7 @@
                     
                         <b>8.3.1.</b>
                     
-                    Data preparation
+                    Data Preparation
             
                 </a>
             
@@ -1698,7 +1743,7 @@
                     
                         <b>9.1.1.</b>
                     
-                    Item-based collaborative filtering
+                    Item-based Collaborative Filtering
             
                 </a>
             
@@ -1734,7 +1779,7 @@
                     
                         <b>9.2.1.</b>
                     
-                    Data preparation
+                    Data Preparation
             
                 </a>
             
@@ -1749,7 +1794,7 @@
                     
                         <b>9.2.2.</b>
                     
-                    LSH/MinHash and Jaccard similarity
+                    LSH/MinHash and Jaccard Similarity
             
                 </a>
             
@@ -1764,7 +1809,7 @@
                     
                         <b>9.2.3.</b>
                     
-                    LSH/MinHash and brute-force search
+                    LSH/MinHash and Brute-force Search
             
                 </a>
             
@@ -1815,7 +1860,7 @@
                     
                         <b>9.3.1.</b>
                     
-                    Data preparation
+                    Data Preparation
             
                 </a>
             
@@ -1830,7 +1875,7 @@
                     
                         <b>9.3.2.</b>
                     
-                    Item-based collaborative filtering
+                    Item-based Collaborative Filtering
             
                 </a>
             
@@ -1875,7 +1920,7 @@
                     
                         <b>9.3.5.</b>
                     
-                    SLIM for fast top-k recommendation
+                    SLIM for fast top-k Recommendation
             
                 </a>
             
@@ -1890,7 +1935,7 @@
                     
                         <b>9.3.6.</b>
                     
-                    10-fold cross validation (Matrix Factorization)
+                    10-fold Cross Validation (Matrix Factorization)
             
                 </a>
             
@@ -2080,7 +2125,7 @@
                     
                         <b>13.2.1.</b>
                     
-                    a9a tutorial for DataFrame
+                    a9a Tutorial for DataFrame
             
                 </a>
             
@@ -2095,7 +2140,7 @@
                     
                         <b>13.2.2.</b>
                     
-                    a9a tutorial for SQL
+                    a9a Tutorial for SQL
             
                 </a>
             
@@ -2131,7 +2176,7 @@
                     
                         <b>13.3.1.</b>
                     
-                    E2006-tfidf regression tutorial for DataFrame
+                    E2006-tfidf Regression Tutorial for DataFrame
             
                 </a>
             
@@ -2146,7 +2191,7 @@
                     
                         <b>13.3.2.</b>
                     
-                    E2006-tfidf regression tutorial for SQL
+                    E2006-tfidf Regression Tutorial for SQL
             
                 </a>
             
@@ -2166,7 +2211,7 @@
                     
                         <b>13.4.</b>
                     
-                    Generic features
+                    Generic Features
             
                 </a>
             
@@ -2182,7 +2227,7 @@
                     
                         <b>13.4.1.</b>
                     
-                    Top-k join processing
+                    Top-k Join Processing
             
                 </a>
             
@@ -2197,7 +2242,7 @@
                     
                         <b>13.4.2.</b>
                     
-                    Other utility functions
+                    Other Utility Functions
             
                 </a>
             
@@ -2284,7 +2329,7 @@
     <!-- Title -->
     <h1>
         <i class="fa fa-circle-o-notch fa-spin"></i>
-        <a href=".." >Data preparation</a>
+        <a href=".." >Data Preparation</a>
     </h1>
 </div>
 
@@ -2432,7 +2477,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda
     <script>
         var gitbook = gitbook || [];
         gitbook.push(function() {
-            gitbook.page.hasChanged({"page":{"title":"Data preparation","level":"8.2.1","depth":2,"next":{"title":"Passive Aggressive, AROW","level":"8.2.2","depth":2,"path":"regression/e2006_arow.md","ref":"regression/e2006_arow.md","articles":[]},"previous":{"title":"E2006-tfidf Regression Tutorial","level":"8.2","depth":1,"path":"regression/e2006.md","ref":"regression/e2006.md","articles":[{"title":"Data preparation","level":"8.2.1","depth":2,"path":"regression/e2006_dataset.md","ref":"regression/e2006_dataset.md","articles":[]},{"title":"Passive Aggressive, AROW","level":"8.2.2","depth":2,"path":"regression/e2006_arow.md","ref":"regression/e2006_arow.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css
 ","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"https://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Ed
 it","base":"https://github.com/apache/incubator-hivemall/tree/master/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"https://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"regression/e2006_dataset.md","mtime":"2018-11-02T10:33:52.973Z","type":"ma
 rkdown"},"gitbook":{"version":"3.2.3","time":"2018-11-13T09:32:29.643Z"},"basePath":"..","book":{"language":""}});
+            gitbook.page.hasChanged({"page":{"title":"Data Preparation","level":"8.2.1","depth":2,"next":{"title":"General Regessor","level":"8.2.2","depth":2,"path":"regression/e2006_generic.md","ref":"regression/e2006_generic.md","articles":[]},"previous":{"title":"E2006-tfidf Regression Tutorial","level":"8.2","depth":1,"path":"regression/e2006.md","ref":"regression/e2006.md","articles":[{"title":"Data Preparation","level":"8.2.1","depth":2,"path":"regression/e2006_dataset.md","ref":"regression/e2006_dataset.md","articles":[]},{"title":"General Regessor","level":"8.2.2","depth":2,"path":"regression/e2006_generic.md","ref":"regression/e2006_generic.md","articles":[]},{"title":"Passive Aggressive, AROW","level":"8.2.3","depth":2,"path":"regression/e2006_arow.md","ref":"regression/e2006_arow.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapt
 ers","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"https://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"go
 ogle":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/tree/master/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"https://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x"
 ,"description":"User Manual for Apache Hivemall"},"file":{"path":"regression/e2006_dataset.md","mtime":"2018-12-25T07:50:37.548Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2018-12-26T10:20:07.153Z"},"basePath":"..","book":{"language":""}});
         });
     </script>
 </div>
@@ -2462,7 +2507,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda
         
     
         
-        <script src="https://cdnjs.cloudflare.com/ajax/libs/anchor-js/4.1.1/anchor.min.js"></script>
+        <script src="../gitbook/gitbook-plugin-anchorjs/anchor.min.js"></script>