You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hivemall.apache.org by my...@apache.org on 2019/09/27 19:04:53 UTC

[incubator-hivemall-site] branch asf-site updated (527a548 -> d2a01e8)

This is an automated email from the ASF dual-hosted git repository.

myui pushed a change to branch asf-site
in repository https://gitbox.apache.org/repos/asf/incubator-hivemall-site.git.


 discard 527a548  Updated the project site
     new 26f41ed  Update entry about feature binning
     new eb4c16e  Added a usage of feature_binning UDF
     new e30fb6e  Fixed ToC
     new d2a01e8  Added decision_path usage

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (527a548)
            \
             N -- N -- N   refs/heads/asf-site (d2a01e8)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 4 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 404.html                                           |  2 +-
 ci-management.html                                 |  2 +-
 contributing.html                                  |  2 +-
 dependency-convergence.html                        |  8 ++---
 dependency-info.html                               |  2 +-
 distribution-management.html                       |  2 +-
 download.html                                      |  2 +-
 faq.html                                           |  2 +-
 index.html                                         |  2 +-
 issue-management.html                              |  2 +-
 licenses.html                                      |  2 +-
 mailing-lists.html                                 |  2 +-
 modules.html                                       |  2 +-
 overview.html                                      |  2 +-
 plugins.html                                       |  2 +-
 poweredby.html                                     |  2 +-
 project-info.html                                  |  2 +-
 release-guide.html                                 |  2 +-
 release-setup.html                                 |  2 +-
 repository.html                                    |  2 +-
 scm.html                                           |  2 +-
 summary.html                                       |  2 +-
 team.html                                          |  2 +-
 userguide.html                                     |  2 +-
 userguide/anomaly/changefinder.html                |  2 +-
 userguide/anomaly/lof.html                         |  2 +-
 userguide/anomaly/sst.html                         |  2 +-
 userguide/binaryclass/a9a.html                     |  2 +-
 userguide/binaryclass/a9a_dataset.html             |  2 +-
 userguide/binaryclass/a9a_generic.html             |  2 +-
 userguide/binaryclass/a9a_lr.html                  |  2 +-
 userguide/binaryclass/a9a_minibatch.html           |  2 +-
 userguide/binaryclass/criteo.html                  |  2 +-
 userguide/binaryclass/criteo_dataset.html          |  2 +-
 userguide/binaryclass/criteo_ffm.html              |  2 +-
 userguide/binaryclass/general.html                 |  2 +-
 userguide/binaryclass/kdd2010a.html                |  2 +-
 userguide/binaryclass/kdd2010a_dataset.html        |  2 +-
 userguide/binaryclass/kdd2010a_scw.html            |  2 +-
 userguide/binaryclass/kdd2010b.html                |  2 +-
 userguide/binaryclass/kdd2010b_arow.html           |  2 +-
 userguide/binaryclass/kdd2010b_dataset.html        |  2 +-
 userguide/binaryclass/news20.html                  |  2 +-
 userguide/binaryclass/news20_adagrad.html          |  2 +-
 userguide/binaryclass/news20_dataset.html          |  2 +-
 userguide/binaryclass/news20_generic.html          |  2 +-
 userguide/binaryclass/news20_pa.html               |  2 +-
 userguide/binaryclass/news20_rf.html               |  2 +-
 userguide/binaryclass/news20_scw.html              |  2 +-
 userguide/binaryclass/titanic_rf.html              |  2 +-
 userguide/binaryclass/webspam.html                 |  2 +-
 userguide/binaryclass/webspam_dataset.html         |  2 +-
 userguide/binaryclass/webspam_scw.html             |  2 +-
 userguide/clustering/lda.html                      |  2 +-
 userguide/clustering/plsa.html                     |  2 +-
 userguide/docker/getting_started.html              |  2 +-
 userguide/eval/auc.html                            |  2 +-
 userguide/eval/binary_classification_measures.html |  2 +-
 userguide/eval/datagen.html                        |  2 +-
 userguide/eval/lr_datagen.html                     |  2 +-
 .../eval/multilabel_classification_measures.html   |  2 +-
 userguide/eval/rank.html                           |  2 +-
 userguide/eval/regression.html                     |  4 +--
 userguide/ft_engineering/binarize.html             |  2 +-
 userguide/ft_engineering/binning.html              |  2 +-
 userguide/ft_engineering/bm25.html                 |  2 +-
 userguide/ft_engineering/ft_trans.html             |  2 +-
 userguide/ft_engineering/hashing.html              |  2 +-
 userguide/ft_engineering/pairing.html              |  2 +-
 userguide/ft_engineering/polynomial.html           |  2 +-
 userguide/ft_engineering/quantify.html             |  2 +-
 userguide/ft_engineering/scaling.html              |  2 +-
 userguide/ft_engineering/selection.html            |  2 +-
 userguide/ft_engineering/term_vector.html          |  2 +-
 userguide/ft_engineering/tfidf.html                |  2 +-
 userguide/ft_engineering/vectorization.html        |  2 +-
 userguide/geospatial/latlon.html                   |  2 +-
 userguide/getting_started/index.html               |  2 +-
 userguide/getting_started/input-format.html        |  2 +-
 userguide/getting_started/installation.html        |  2 +-
 userguide/getting_started/permanent-functions.html |  2 +-
 userguide/index.html                               |  2 +-
 userguide/misc/approx.html                         |  2 +-
 userguide/misc/funcs.html                          | 34 +++++++++++++++++++++-
 userguide/misc/generic_funcs.html                  |  2 +-
 userguide/misc/tokenizer.html                      |  2 +-
 userguide/misc/topk.html                           |  4 +--
 userguide/multiclass/iris.html                     |  2 +-
 userguide/multiclass/iris_dataset.html             |  2 +-
 userguide/multiclass/iris_randomforest.html        |  2 +-
 userguide/multiclass/iris_scw.html                 |  2 +-
 userguide/multiclass/news20.html                   |  2 +-
 userguide/multiclass/news20_dataset.html           |  2 +-
 userguide/multiclass/news20_ensemble.html          |  2 +-
 userguide/multiclass/news20_one-vs-the-rest.html   |  2 +-
 .../multiclass/news20_one-vs-the-rest_dataset.html |  2 +-
 userguide/multiclass/news20_pa.html                |  2 +-
 userguide/multiclass/news20_scw.html               |  2 +-
 userguide/recommend/cf.html                        |  2 +-
 userguide/recommend/item_based_cf.html             |  2 +-
 userguide/recommend/movielens.html                 |  2 +-
 userguide/recommend/movielens_cf.html              |  2 +-
 userguide/recommend/movielens_cv.html              |  2 +-
 userguide/recommend/movielens_dataset.html         |  2 +-
 userguide/recommend/movielens_fm.html              |  2 +-
 userguide/recommend/movielens_mf.html              |  2 +-
 userguide/recommend/movielens_slim.html            |  2 +-
 userguide/recommend/news20.html                    |  2 +-
 userguide/recommend/news20_bbit_minhash.html       |  2 +-
 userguide/recommend/news20_jaccard.html            |  2 +-
 userguide/recommend/news20_knn.html                |  2 +-
 userguide/regression/e2006.html                    |  2 +-
 userguide/regression/e2006_arow.html               |  2 +-
 userguide/regression/e2006_dataset.html            |  2 +-
 userguide/regression/e2006_generic.html            |  2 +-
 userguide/regression/general.html                  |  2 +-
 userguide/regression/kddcup12tr2.html              |  2 +-
 userguide/regression/kddcup12tr2_adagrad.html      |  2 +-
 userguide/regression/kddcup12tr2_dataset.html      |  2 +-
 userguide/regression/kddcup12tr2_lr.html           |  2 +-
 userguide/regression/kddcup12tr2_lr_amplify.html   |  2 +-
 userguide/search_index.json                        |  2 +-
 userguide/spark/binaryclass/a9a_df.html            |  2 +-
 userguide/spark/binaryclass/a9a_sql.html           |  2 +-
 userguide/spark/binaryclass/index.html             |  2 +-
 userguide/spark/getting_started/index.html         |  2 +-
 userguide/spark/getting_started/installation.html  |  2 +-
 userguide/spark/misc/functions.html                |  2 +-
 userguide/spark/misc/misc.html                     |  2 +-
 userguide/spark/misc/topk_join.html                |  2 +-
 userguide/spark/regression/e2006_df.html           |  2 +-
 userguide/spark/regression/e2006_sql.html          |  2 +-
 userguide/supervised_learning/prediction.html      |  2 +-
 userguide/supervised_learning/tutorial.html        |  2 +-
 userguide/tips/addbias.html                        |  2 +-
 userguide/tips/emr.html                            |  2 +-
 userguide/tips/ensemble_learning.html              |  2 +-
 userguide/tips/general_tips.html                   |  2 +-
 userguide/tips/hadoop_tuning.html                  |  2 +-
 userguide/tips/index.html                          |  2 +-
 userguide/tips/mixserver.html                      |  2 +-
 userguide/tips/rand_amplify.html                   |  2 +-
 userguide/tips/rowid.html                          |  2 +-
 userguide/tips/rt_prediction.html                  |  2 +-
 userguide/troubleshooting/asterisk.html            |  2 +-
 userguide/troubleshooting/index.html               |  2 +-
 userguide/troubleshooting/mapjoin_classcastex.html |  2 +-
 userguide/troubleshooting/mapjoin_task_error.html  |  2 +-
 userguide/troubleshooting/num_mappers.html         |  2 +-
 userguide/troubleshooting/oom.html                 |  2 +-
 verify_artifacts.html                              |  2 +-
 151 files changed, 188 insertions(+), 156 deletions(-)


[incubator-hivemall-site] 03/04: Fixed ToC

Posted by my...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

myui pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/incubator-hivemall-site.git

commit e30fb6e9c22c05e418a640616e1e5fe713598f49
Author: Makoto Yui <my...@apache.org>
AuthorDate: Sat Jun 29 01:58:49 2019 +0900

    Fixed ToC
---
 userguide/ft_engineering/binning.html | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/userguide/ft_engineering/binning.html b/userguide/ft_engineering/binning.html
index c0102b3..5608e84 100644
--- a/userguide/ft_engineering/binning.html
+++ b/userguide/ft_engineering/binning.html
@@ -2382,10 +2382,11 @@
 <!-- toc --><div id="toc" class="toc">
 
 <ul>
-<li><a href="#data-preparation">Data Preparation</a><ul>
+<li><a href="#data-preparation">Data Preparation</a></li>
+<li><a href="#usage">Usage</a><ul>
 <li><a href="#custom-rule-for-binning">Custom rule for binning</a></li>
-<li><a href="#binning-based-on-quantiles">Binning based on quantiles</a></li>
-<li><a href="#practical-example">Practical Example</a></li>
+<li><a href="#binning-based-on-quantiles">Binning based on Quantiles</a></li>
+<li><a href="#concrete-example">Concrete Example</a></li>
 <li><a href="#create-a-mapping-table-by-feature-binning">Create a mapping table by Feature Binning</a></li>
 </ul>
 </li>
@@ -2449,6 +2450,7 @@
 </tr>
 </tbody>
 </table>
+<h1 id="usage">Usage</h1>
 <h2 id="custom-rule-for-binning">Custom rule for binning</h2>
 <p>You can provide a custom rule for binning as follows:</p>
 <pre><code class="lang-sql"><span class="hljs-keyword">select</span> 
@@ -2499,7 +2501,7 @@
 </tr>
 </tbody>
 </table>
-<h2 id="binning-based-on-quantiles">Binning based on quantiles</h2>
+<h2 id="binning-based-on-quantiles">Binning based on Quantiles</h2>
 <p>You can apply feature binning based on <a href="https://en.wikipedia.org/wiki/Quantile" target="_blank">quantiles</a>. </p>
 <p>Suppose converting <code>age</code> values into 3 bins:</p>
 <pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span>
@@ -2577,7 +2579,7 @@ ge:2&quot;,&quot;age:2&quot;,&quot;age:2&quot;,&quot;age:2&quot;]  [&quot;age:3&
 </tr>
 </tbody>
 </table>
-<h2 id="practical-example">Practical Example</h2>
+<h2 id="concrete-example">Concrete Example</h2>
 <p>Here, we show a more practical usage of <code>feature_binning</code> UDF that applied feature binning for given feature vectors.</p>
 <pre><code class="lang-sql">WITH extracted as (
   <span class="hljs-keyword">select</span> 
@@ -2829,7 +2831,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda
     <script>
         var gitbook = gitbook || [];
         gitbook.push(function() {
-            gitbook.page.hasChanged({"page":{"title":"Feature Binning","level":"3.4","depth":1,"next":{"title":"Feature Paring","level":"3.5","depth":1,"path":"ft_engineering/pairing.md","ref":"ft_engineering/pairing.md","articles":[{"title":"Polynomial features","level":"3.5.1","depth":2,"path":"ft_engineering/polynomial.md","ref":"ft_engineering/polynomial.md","articles":[]}]},"previous":{"title":"Feature Selection","level":"3.3","depth":1,"path":"ft_engineering/selection.md","ref":"ft [...]
+            gitbook.page.hasChanged({"page":{"title":"Feature Binning","level":"3.4","depth":1,"next":{"title":"Feature Paring","level":"3.5","depth":1,"path":"ft_engineering/pairing.md","ref":"ft_engineering/pairing.md","articles":[{"title":"Polynomial features","level":"3.5.1","depth":2,"path":"ft_engineering/polynomial.md","ref":"ft_engineering/polynomial.md","articles":[]}]},"previous":{"title":"Feature Selection","level":"3.3","depth":1,"path":"ft_engineering/selection.md","ref":"ft [...]
         });
     </script>
 </div>


[incubator-hivemall-site] 02/04: Added a usage of feature_binning UDF

Posted by my...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

myui pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/incubator-hivemall-site.git

commit eb4c16ed01465b18176f43a018b4fdf07b7015a8
Author: Makoto Yui <my...@apache.org>
AuthorDate: Sat Jun 29 01:56:26 2019 +0900

    Added a usage of feature_binning UDF
---
 userguide/ft_engineering/binning.html | 70 ++++++++++++++++++++++++++++++-----
 1 file changed, 61 insertions(+), 9 deletions(-)

diff --git a/userguide/ft_engineering/binning.html b/userguide/ft_engineering/binning.html
index 1d4f235..c0102b3 100644
--- a/userguide/ft_engineering/binning.html
+++ b/userguide/ft_engineering/binning.html
@@ -2382,10 +2382,11 @@
 <!-- toc --><div id="toc" class="toc">
 
 <ul>
-<li><a href="#usage">Usage</a><ul>
-<li><a href="#feature-vector-trasformation-by-applying-feature-binning">Feature Vector trasformation by applying Feature Binning</a></li>
+<li><a href="#data-preparation">Data Preparation</a><ul>
+<li><a href="#custom-rule-for-binning">Custom rule for binning</a></li>
+<li><a href="#binning-based-on-quantiles">Binning based on quantiles</a></li>
 <li><a href="#practical-example">Practical Example</a></li>
-<li><a href="#get-a-mapping-table-by-feature-binning">Get a mapping table by Feature Binning</a></li>
+<li><a href="#create-a-mapping-table-by-feature-binning">Create a mapping table by Feature Binning</a></li>
 </ul>
 </li>
 <li><a href="#function-signatures">Function Signatures</a><ul>
@@ -2397,7 +2398,7 @@
 </ul>
 
 </div><!-- tocstop -->
-<h1 id="usage">Usage</h1>
+<h1 id="data-preparation">Data Preparation</h1>
 <p>Prepare sample data (<em>users</em> table) first as follows:</p>
 <pre><code class="lang-sql"><span class="hljs-keyword">CREATE</span> <span class="hljs-keyword">TABLE</span> <span class="hljs-keyword">users</span> (
   <span class="hljs-keyword">rowid</span> <span class="hljs-built_in">int</span>, <span class="hljs-keyword">name</span> <span class="hljs-keyword">string</span>, age <span class="hljs-built_in">int</span>, gender <span class="hljs-keyword">string</span>
@@ -2448,8 +2449,59 @@
 </tr>
 </tbody>
 </table>
-<h2 id="feature-vector-trasformation-by-applying-feature-binning">Feature Vector trasformation by applying Feature Binning</h2>
-<p>Now, converting <code>age</code> values into 3 bins.</p>
+<h2 id="custom-rule-for-binning">Custom rule for binning</h2>
+<p>You can provide a custom rule for binning as follows:</p>
+<pre><code class="lang-sql"><span class="hljs-keyword">select</span> 
+  features <span class="hljs-keyword">as</span> original,
+  feature_binning(
+    features,
+    <span class="hljs-comment">-- [-INF-10.0], (10.0-20.0], (20.0-30.0], (30.0-40.0], (40.0-INF]</span>
+    <span class="hljs-keyword">map</span>(<span class="hljs-string">&apos;age&apos;</span>, <span class="hljs-built_in">array</span>(-infinity(), <span class="hljs-number">10.0</span>, <span class="hljs-number">20.0</span>, <span class="hljs-number">30.0</span>, <span class="hljs-number">40.0</span>, infinity()))
+  ) <span class="hljs-keyword">as</span> binned
+<span class="hljs-keyword">from</span>
+  <span class="hljs-keyword">input</span>;
+</code></pre>
+<table>
+<thead>
+<tr>
+<th style="text-align:left">original</th>
+<th style="text-align:left">binned</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td style="text-align:left">[&quot;name#Jacob&quot;,&quot;gender#Male&quot;,&quot;age:20.0&quot;]</td>
+<td style="text-align:left">[&quot;name#Jacob&quot;,&quot;gender#Male&quot;,&quot;age:1&quot;]</td>
+</tr>
+<tr>
+<td style="text-align:left">[&quot;name#Mason&quot;,&quot;gender#Male&quot;,&quot;age:22.0&quot;]</td>
+<td style="text-align:left">[&quot;name#Mason&quot;,&quot;gender#Male&quot;,&quot;age:2&quot;]</td>
+</tr>
+<tr>
+<td style="text-align:left">[&quot;name#Sophia&quot;,&quot;gender#Female&quot;,&quot;age:35.0&quot;]</td>
+<td style="text-align:left">[&quot;name#Sophia&quot;,&quot;gender#Female&quot;,&quot;age:3&quot;]</td>
+</tr>
+<tr>
+<td style="text-align:left">[&quot;name#Ethan&quot;,&quot;gender#Male&quot;,&quot;age:55.0&quot;]</td>
+<td style="text-align:left">[&quot;name#Ethan&quot;,&quot;gender#Male&quot;,&quot;age:4&quot;]</td>
+</tr>
+<tr>
+<td style="text-align:left">[&quot;name#Emma&quot;,&quot;gender#Female&quot;,&quot;age:15.0&quot;]</td>
+<td style="text-align:left">[&quot;name#Emma&quot;,&quot;gender#Female&quot;,&quot;age:1&quot;]</td>
+</tr>
+<tr>
+<td style="text-align:left">[&quot;name#Noah&quot;,&quot;gender#Male&quot;,&quot;age:46.0&quot;]</td>
+<td style="text-align:left">[&quot;name#Noah&quot;,&quot;gender#Male&quot;,&quot;age:4&quot;]</td>
+</tr>
+<tr>
+<td style="text-align:left">[&quot;name#Isabella&quot;,&quot;gender#Female&quot;,&quot;age:20.0&quot;]</td>
+<td style="text-align:left">[&quot;name#Isabella&quot;,&quot;gender#Female&quot;,&quot;age:1&quot;]</td>
+</tr>
+</tbody>
+</table>
+<h2 id="binning-based-on-quantiles">Binning based on quantiles</h2>
+<p>You can apply feature binning based on <a href="https://en.wikipedia.org/wiki/Quantile" target="_blank">quantiles</a>. </p>
+<p>Suppose converting <code>age</code> values into 3 bins:</p>
 <pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span>
   <span class="hljs-keyword">map</span>(<span class="hljs-string">&apos;age&apos;</span>, build_bins(age, <span class="hljs-number">3</span>)) <span class="hljs-keyword">AS</span> quantiles_map
 <span class="hljs-keyword">FROM</span>
@@ -2458,7 +2510,7 @@
 <blockquote>
 <p>{&quot;age&quot;:[-Infinity,18.333333333333332,30.666666666666657,Infinity]}</p>
 </blockquote>
-<p>In the above query result, you can find 4 values for age in <code>quantiles_map</code>. It&apos;s a threshold of 3 bins. </p>
+<p>In the above query result, you can find 4 values for age in <code>quantiles_map</code>. It&apos;s a threshold for 3 bins.</p>
 <pre><code class="lang-sql">WITH bins as (
   <span class="hljs-keyword">SELECT</span>
     <span class="hljs-keyword">map</span>(<span class="hljs-string">&apos;age&apos;</span>, build_bins(age, <span class="hljs-number">3</span>)) <span class="hljs-keyword">AS</span> quantiles_map
@@ -2582,7 +2634,7 @@ bins <span class="hljs-keyword">as</span> (
 </tr>
 </tbody>
 </table>
-<h2 id="get-a-mapping-table-by-feature-binning">Get a mapping table by Feature Binning</h2>
+<h2 id="create-a-mapping-table-by-feature-binning">Create a mapping table by Feature Binning</h2>
 <pre><code class="lang-sql">WITH bins AS (
   <span class="hljs-keyword">SELECT</span> build_bins(age, <span class="hljs-number">3</span>) <span class="hljs-keyword">AS</span> quantiles
   <span class="hljs-keyword">FROM</span> <span class="hljs-keyword">users</span>
@@ -2777,7 +2829,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda
     <script>
         var gitbook = gitbook || [];
         gitbook.push(function() {
-            gitbook.page.hasChanged({"page":{"title":"Feature Binning","level":"3.4","depth":1,"next":{"title":"Feature Paring","level":"3.5","depth":1,"path":"ft_engineering/pairing.md","ref":"ft_engineering/pairing.md","articles":[{"title":"Polynomial features","level":"3.5.1","depth":2,"path":"ft_engineering/polynomial.md","ref":"ft_engineering/polynomial.md","articles":[]}]},"previous":{"title":"Feature Selection","level":"3.3","depth":1,"path":"ft_engineering/selection.md","ref":"ft [...]
+            gitbook.page.hasChanged({"page":{"title":"Feature Binning","level":"3.4","depth":1,"next":{"title":"Feature Paring","level":"3.5","depth":1,"path":"ft_engineering/pairing.md","ref":"ft_engineering/pairing.md","articles":[{"title":"Polynomial features","level":"3.5.1","depth":2,"path":"ft_engineering/polynomial.md","ref":"ft_engineering/polynomial.md","articles":[]}]},"previous":{"title":"Feature Selection","level":"3.3","depth":1,"path":"ft_engineering/selection.md","ref":"ft [...]
         });
     </script>
 </div>


[incubator-hivemall-site] 04/04: Added decision_path usage

Posted by my...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

myui pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/incubator-hivemall-site.git

commit d2a01e8dab22ec75a9c230ca8a262a80f7b17a8d
Author: Makoto Yui <my...@apache.org>
AuthorDate: Sat Sep 28 04:04:09 2019 +0900

    Added decision_path usage
---
 userguide/misc/funcs.html | 34 +++++++++++++++++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/userguide/misc/funcs.html b/userguide/misc/funcs.html
index 74adf17..be58736 100644
--- a/userguide/misc/funcs.html
+++ b/userguide/misc/funcs.html
@@ -2955,6 +2955,38 @@ bloom <span class="hljs-keyword">as</span> (
 </li>
 <li><p><code>train_randomforest_regressor(array&lt;double|string&gt; features, double target [, string options])</code> - Returns a relation consists of &lt;int model_id, int model_type, string model, array&lt;double&gt; var_importance, double oob_errors, int oob_tests&gt;</p>
 </li>
+<li><p><code>decision_path(string modelId, string model, array&lt;double|string&gt; features [, const string options] [, optional array&lt;string&gt; featureNames=null, optional array&lt;string&gt; classNames=null])</code> - Returns a decision path for each prediction in array&lt;string&gt;</p>
+<pre><code class="lang-sql">SELECT
+  t.passengerid,
+  decision_path(m.model_id, m.model, t.features, &apos;-classification&apos;)
+FROM
+  model_rf m
+  LEFT OUTER JOIN
+  test_rf t;
+&gt; | 892 | [&quot;2 [0.0] = 0.0&quot;,&quot;0 [3.0] = 3.0&quot;,&quot;1 [696.0] != 107.0&quot;,&quot;7 [7.8292] &lt;= 7.9104&quot;,&quot;1 [696.0] != 828.0&quot;,&quot;1 [696.0] != 391.0&quot;,&quot;0 [0.961038961038961, 0.03896103896103896]&quot;] |
+
+-- Show 100 frequent branches
+WITH tmp as (
+  SELECT
+    decision_path(m.model_id, m.model, t.features, &apos;-classification -no_verbose -no_leaf&apos;, array(&apos;pclass&apos;,&apos;name&apos;,&apos;sex&apos;,&apos;age&apos;,&apos;sibsp&apos;,&apos;parch&apos;,&apos;ticket&apos;,&apos;fare&apos;,&apos;cabin&apos;,&apos;embarked&apos;), array(&apos;no&apos;,&apos;yes&apos;)) as path
+  FROM
+    model_rf m
+    LEFT OUTER JOIN -- CROSS JOIN
+    test_rf t
+)
+select
+  r.branch,
+  count(1) as cnt
+from
+  tmp l
+  LATERAL VIEW explode(l.path) r as branch
+group by
+  r.branch
+order by
+  cnt desc
+limit 100;
+</code></pre>
+</li>
 <li><p><code>guess_attribute_types(ANY, ...)</code> - Returns attribute types</p>
 <pre><code class="lang-sql">select guess_attribute_types(*) from train limit 1;
 &gt; Q,Q,C,C,C,C,Q,C,C,C,Q,C,Q,Q,Q,Q,C,Q
@@ -3057,7 +3089,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda
     <script>
         var gitbook = gitbook || [];
         gitbook.push(function() {
-            gitbook.page.hasChanged({"page":{"title":"List of Functions","level":"1.3","depth":1,"next":{"title":"Tips for Effective Hivemall","level":"1.4","depth":1,"path":"tips/README.md","ref":"tips/README.md","articles":[{"title":"Explicit add_bias() for better prediction","level":"1.4.1","depth":2,"path":"tips/addbias.md","ref":"tips/addbias.md","articles":[]},{"title":"Use rand_amplify() to better prediction results","level":"1.4.2","depth":2,"path":"tips/rand_amplify.md","ref":"t [...]
+            gitbook.page.hasChanged({"page":{"title":"List of Functions","level":"1.3","depth":1,"next":{"title":"Tips for Effective Hivemall","level":"1.4","depth":1,"path":"tips/README.md","ref":"tips/README.md","articles":[{"title":"Explicit add_bias() for better prediction","level":"1.4.1","depth":2,"path":"tips/addbias.md","ref":"tips/addbias.md","articles":[]},{"title":"Use rand_amplify() to better prediction results","level":"1.4.2","depth":2,"path":"tips/rand_amplify.md","ref":"t [...]
         });
     </script>
 </div>


[incubator-hivemall-site] 01/04: Update entry about feature binning

Posted by my...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

myui pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/incubator-hivemall-site.git

commit 26f41edc32f58b335f2798bbbca1237b41de893a
Author: Makoto Yui <my...@apache.org>
AuthorDate: Sat Jun 29 01:28:27 2019 +0900

    Update entry about feature binning
---
 userguide/ft_engineering/binning.html | 233 ++++++++++++++++++++++++----------
 userguide/misc/funcs.html             |  37 +++++-
 userguide/misc/generic_funcs.html     |   2 +-
 3 files changed, 204 insertions(+), 68 deletions(-)

diff --git a/userguide/ft_engineering/binning.html b/userguide/ft_engineering/binning.html
index 5d75620..1d4f235 100644
--- a/userguide/ft_engineering/binning.html
+++ b/userguide/ft_engineering/binning.html
@@ -2377,28 +2377,21 @@
   specific language governing permissions and limitations
   under the License.
 -->
-<p>Feature binning is a method of dividing quantitative variables into categorical values.
-It groups quantitative values into a pre-defined number of bins.</p>
-<p><em>Note: This feature is supported from Hivemall v0.5-rc.1 or later.</em></p>
+<p>Feature binning is a method of dividing quantitative variables into categorical values. It groups quantitative values into a pre-defined number of bins.</p>
+<p>If the number of bins is set to 3, the bin ranges become something like <code>[-Inf, 1], (1, 10], (10, Inf]</code>.</p>
 <!-- toc --><div id="toc" class="toc">
 
 <ul>
 <li><a href="#usage">Usage</a><ul>
-<li><a href="#a-feature-vector-trasformation-by-applying-feature-binning">A. Feature Vector trasformation by applying Feature Binning</a></li>
-<li><a href="#b-get-a-mapping-table-by-feature-binning">B. Get a mapping table by Feature Binning</a></li>
-</ul>
-</li>
-<li><a href="#function-signature">Function Signature</a><ul>
-<li><a href="#udaf-buildbinsweight-numofbins-autoshrink">[UDAF] <code>build_bins(weight, num_of_bins[, auto_shrink])</code></a><ul>
-<li><a href="#input">Input</a></li>
-<li><a href="#output">Output</a></li>
-</ul>
-</li>
-<li><a href="#udf-featurebinningfeatures-quantilesmapweight-quantiles">[UDF] <code>feature_binning(features, quantiles_map)/(weight, quantiles)</code></a><ul>
-<li><a href="#variation-a">Variation: A</a></li>
-<li><a href="#variation-b">Variation: B</a></li>
+<li><a href="#feature-vector-trasformation-by-applying-feature-binning">Feature Vector trasformation by applying Feature Binning</a></li>
+<li><a href="#practical-example">Practical Example</a></li>
+<li><a href="#get-a-mapping-table-by-feature-binning">Get a mapping table by Feature Binning</a></li>
 </ul>
 </li>
+<li><a href="#function-signatures">Function Signatures</a><ul>
+<li><a href="#udaf-buildbinsweight-numofbins--autoshrinkfalse">UDAF <code>build_bins(weight num_of_bins [, auto_shrink=false])</code></a></li>
+<li><a href="#udf-featurebinningfeatures-quantilesmap">UDF <code>feature_binning(features, quantiles_map)</code></a></li>
+<li><a href="#udf-featurebinningweight-quantiles">UDF <code>feature_binning(weight, quantiles)</code></a></li>
 </ul>
 </li>
 </ul>
@@ -2407,35 +2400,96 @@ It groups quantitative values into a pre-defined number of bins.</p>
 <h1 id="usage">Usage</h1>
 <p>Prepare sample data (<em>users</em> table) first as follows:</p>
 <pre><code class="lang-sql"><span class="hljs-keyword">CREATE</span> <span class="hljs-keyword">TABLE</span> <span class="hljs-keyword">users</span> (
-  <span class="hljs-keyword">name</span> <span class="hljs-keyword">string</span>, age <span class="hljs-built_in">int</span>, gender <span class="hljs-keyword">string</span>
+  <span class="hljs-keyword">rowid</span> <span class="hljs-built_in">int</span>, <span class="hljs-keyword">name</span> <span class="hljs-keyword">string</span>, age <span class="hljs-built_in">int</span>, gender <span class="hljs-keyword">string</span>
 );
-
 <span class="hljs-keyword">INSERT</span> <span class="hljs-keyword">INTO</span> <span class="hljs-keyword">users</span> <span class="hljs-keyword">VALUES</span>
-  (<span class="hljs-string">&apos;Jacob&apos;</span>, <span class="hljs-number">20</span>, <span class="hljs-string">&apos;Male&apos;</span>),
-  (<span class="hljs-string">&apos;Mason&apos;</span>, <span class="hljs-number">22</span>, <span class="hljs-string">&apos;Male&apos;</span>),
-  (<span class="hljs-string">&apos;Sophia&apos;</span>, <span class="hljs-number">35</span>, <span class="hljs-string">&apos;Female&apos;</span>),
-  (<span class="hljs-string">&apos;Ethan&apos;</span>, <span class="hljs-number">55</span>, <span class="hljs-string">&apos;Male&apos;</span>),
-  (<span class="hljs-string">&apos;Emma&apos;</span>, <span class="hljs-number">15</span>, <span class="hljs-string">&apos;Female&apos;</span>),
-  (<span class="hljs-string">&apos;Noah&apos;</span>, <span class="hljs-number">46</span>, <span class="hljs-string">&apos;Male&apos;</span>),
-  (<span class="hljs-string">&apos;Isabella&apos;</span>, <span class="hljs-number">20</span>, <span class="hljs-string">&apos;Female&apos;</span>);
+  (<span class="hljs-number">1</span>, <span class="hljs-string">&apos;Jacob&apos;</span>, <span class="hljs-number">20</span>, <span class="hljs-string">&apos;Male&apos;</span>),
+  (<span class="hljs-number">2</span>, <span class="hljs-string">&apos;Mason&apos;</span>, <span class="hljs-number">22</span>, <span class="hljs-string">&apos;Male&apos;</span>),
+  (<span class="hljs-number">3</span>, <span class="hljs-string">&apos;Sophia&apos;</span>, <span class="hljs-number">35</span>, <span class="hljs-string">&apos;Female&apos;</span>),
+  (<span class="hljs-number">4</span>, <span class="hljs-string">&apos;Ethan&apos;</span>, <span class="hljs-number">55</span>, <span class="hljs-string">&apos;Male&apos;</span>),
+  (<span class="hljs-number">5</span>, <span class="hljs-string">&apos;Emma&apos;</span>, <span class="hljs-number">15</span>, <span class="hljs-string">&apos;Female&apos;</span>),
+  (<span class="hljs-number">6</span>, <span class="hljs-string">&apos;Noah&apos;</span>, <span class="hljs-number">46</span>, <span class="hljs-string">&apos;Male&apos;</span>),
+  (<span class="hljs-number">7</span>, <span class="hljs-string">&apos;Isabella&apos;</span>, <span class="hljs-number">20</span>, <span class="hljs-string">&apos;Female&apos;</span>)
+;
+
+<span class="hljs-keyword">CREATE</span> <span class="hljs-keyword">TABLE</span> <span class="hljs-keyword">input</span> <span class="hljs-keyword">as</span>
+<span class="hljs-keyword">SELECT</span>
+  <span class="hljs-keyword">rowid</span>,
+  array_concat(
+    categorical_features(
+      <span class="hljs-built_in">array</span>(<span class="hljs-string">&apos;name&apos;</span>, <span class="hljs-string">&apos;gender&apos;</span>),
+      <span class="hljs-keyword">name</span>, gender
+    ),
+    quantitative_features(
+      <span class="hljs-built_in">array</span>(<span class="hljs-string">&apos;age&apos;</span>),
+      age
+    )
+  ) <span class="hljs-keyword">AS</span> features
+<span class="hljs-keyword">FROM</span>
+  <span class="hljs-keyword">users</span>;
+
+<span class="hljs-keyword">select</span> * <span class="hljs-keyword">from</span> <span class="hljs-keyword">input</span> <span class="hljs-keyword">limit</span> <span class="hljs-number">2</span>;
 </code></pre>
-<h2 id="a-feature-vector-trasformation-by-applying-feature-binning">A. Feature Vector trasformation by applying Feature Binning</h2>
-<pre><code class="lang-sql">WITH t AS (
+<table>
+<thead>
+<tr>
+<th style="text-align:left">input.rowid</th>
+<th style="text-align:left">input.features</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td style="text-align:left">1</td>
+<td style="text-align:left">[&quot;name#Jacob&quot;,&quot;gender#Male&quot;,&quot;age:20.0&quot;]</td>
+</tr>
+<tr>
+<td style="text-align:left">2</td>
+<td style="text-align:left">[&quot;name#Mason&quot;,&quot;gender#Male&quot;,&quot;age:22.0&quot;]</td>
+</tr>
+</tbody>
+</table>
+<h2 id="feature-vector-trasformation-by-applying-feature-binning">Feature Vector trasformation by applying Feature Binning</h2>
+<p>Now, converting <code>age</code> values into 3 bins.</p>
+<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span>
+  <span class="hljs-keyword">map</span>(<span class="hljs-string">&apos;age&apos;</span>, build_bins(age, <span class="hljs-number">3</span>)) <span class="hljs-keyword">AS</span> quantiles_map
+<span class="hljs-keyword">FROM</span>
+  <span class="hljs-keyword">users</span>
+</code></pre>
+<blockquote>
+<p>{&quot;age&quot;:[-Infinity,18.333333333333332,30.666666666666657,Infinity]}</p>
+</blockquote>
+<p>In the above query result, you can find 4 values for age in <code>quantiles_map</code>. It&apos;s a threshold of 3 bins. </p>
+<pre><code class="lang-sql">WITH bins as (
   <span class="hljs-keyword">SELECT</span>
-    array_concat(
-      categorical_features(
-        <span class="hljs-built_in">array</span>(<span class="hljs-string">&apos;name&apos;</span>, <span class="hljs-string">&apos;gender&apos;</span>),
-    <span class="hljs-keyword">name</span>, gender
-      ),
-      quantitative_features(
-    <span class="hljs-built_in">array</span>(<span class="hljs-string">&apos;age&apos;</span>),
-    age
-      )
-    ) <span class="hljs-keyword">AS</span> features
+    <span class="hljs-keyword">map</span>(<span class="hljs-string">&apos;age&apos;</span>, build_bins(age, <span class="hljs-number">3</span>)) <span class="hljs-keyword">AS</span> quantiles_map
   <span class="hljs-keyword">FROM</span>
     <span class="hljs-keyword">users</span>
-),
-bins <span class="hljs-keyword">AS</span> (
+)
+<span class="hljs-keyword">select</span>
+  feature_binning(
+    <span class="hljs-built_in">array</span>(<span class="hljs-string">&apos;age:-Infinity&apos;</span>, <span class="hljs-string">&apos;age:-1&apos;</span>, <span class="hljs-string">&apos;age:0&apos;</span>, <span class="hljs-string">&apos;age:1&apos;</span>, <span class="hljs-string">&apos;age:18.333333333333331&apos;</span>, <span class="hljs-string">&apos;age:18.333333333333332&apos;</span>), quantiles_map
+  ),
+  feature_binning(
+    <span class="hljs-built_in">array</span>(<span class="hljs-string">&apos;age:18.3333333333333333&apos;</span>, <span class="hljs-string">&apos;age:18.33333333333334&apos;</span>, <span class="hljs-string">&apos;age:19&apos;</span>, <span class="hljs-string">&apos;age:30&apos;</span>, <span class="hljs-string">&apos;age:30.666666666666656&apos;</span>, <span class="hljs-string">&apos;age:30.666666666666657&apos;</span>), quantiles_map
+  ),
+  feature_binning(
+    <span class="hljs-built_in">array</span>(<span class="hljs-string">&apos;age:666666666666658&apos;</span>, <span class="hljs-string">&apos;age:30.66666666666666&apos;</span>, <span class="hljs-string">&apos;age:31&apos;</span>, <span class="hljs-string">&apos;age:99&apos;</span>, <span class="hljs-string">&apos;age:Infinity&apos;</span>), quantiles_map
+  ),
+  feature_binning(
+    <span class="hljs-built_in">array</span>(<span class="hljs-string">&apos;age:NaN&apos;</span>), quantiles_map
+  ),
+  feature_binning( <span class="hljs-comment">-- not in map</span>
+    <span class="hljs-built_in">array</span>(<span class="hljs-string">&apos;weight:60.3&apos;</span>), quantiles_map
+  )
+<span class="hljs-keyword">from</span>
+  bins
+</code></pre>
+<blockquote>
+<p>[&quot;age:0&quot;,&quot;age:0&quot;,&quot;age:0&quot;,&quot;age:0&quot;,&quot;age:0&quot;,&quot;age:0&quot;]       [&quot;age:0&quot;,&quot;age:1&quot;,&quot;age:1&quot;,&quot;age:1&quot;,&quot;age:1&quot;,&quot;age:1&quot;]       [&quot;age:2&quot;,&quot;a
+ge:2&quot;,&quot;age:2&quot;,&quot;age:2&quot;,&quot;age:2&quot;]  [&quot;age:3&quot;]       [&quot;weight:60.3&quot;]</p>
+</blockquote>
+<p>The following query shows more practical usage:</p>
+<pre><code class="lang-sql">WITH bins AS (
   <span class="hljs-keyword">SELECT</span>
     <span class="hljs-keyword">map</span>(<span class="hljs-string">&apos;age&apos;</span>, build_bins(age, <span class="hljs-number">3</span>)) <span class="hljs-keyword">AS</span> quantiles_map
   <span class="hljs-keyword">FROM</span>
@@ -2444,40 +2498,91 @@ bins <span class="hljs-keyword">AS</span> (
 <span class="hljs-keyword">SELECT</span>
   feature_binning(features, quantiles_map) <span class="hljs-keyword">AS</span> features
 <span class="hljs-keyword">FROM</span>
-  t <span class="hljs-keyword">CROSS</span> <span class="hljs-keyword">JOIN</span> bins;
+  <span class="hljs-keyword">input</span>
+  <span class="hljs-keyword">CROSS</span> <span class="hljs-keyword">JOIN</span> bins;
 </code></pre>
-<p><em>Result</em></p>
 <table>
 <thead>
 <tr>
-<th style="text-align:center">features: <code>array&lt;features::string&gt;</code></th>
+<th style="text-align:left">features: <code>array&lt;features::string&gt;</code></th>
 </tr>
 </thead>
 <tbody>
 <tr>
-<td style="text-align:center">[&quot;name#Jacob&quot;,&quot;gender#Male&quot;,&quot;age:1&quot;]</td>
+<td style="text-align:left">[&quot;name#Jacob&quot;,&quot;gender#Male&quot;,&quot;age:1&quot;]</td>
 </tr>
 <tr>
-<td style="text-align:center">[&quot;name#Mason&quot;,&quot;gender#Male&quot;,&quot;age:1&quot;]</td>
+<td style="text-align:left">[&quot;name#Mason&quot;,&quot;gender#Male&quot;,&quot;age:1&quot;]</td>
 </tr>
 <tr>
-<td style="text-align:center">[&quot;name#Sophia&quot;,&quot;gender#Female&quot;,&quot;age:2&quot;]</td>
+<td style="text-align:left">[&quot;name#Sophia&quot;,&quot;gender#Female&quot;,&quot;age:2&quot;]</td>
 </tr>
 <tr>
-<td style="text-align:center">[&quot;name#Ethan&quot;,&quot;gender#Male&quot;,&quot;age:2&quot;]</td>
+<td style="text-align:left">[&quot;name#Ethan&quot;,&quot;gender#Male&quot;,&quot;age:2&quot;]</td>
+</tr>
+<tr>
+<td style="text-align:left">...</td>
+</tr>
+</tbody>
+</table>
+<h2 id="practical-example">Practical Example</h2>
+<p>Here, we show a more practical usage of <code>feature_binning</code> UDF that applied feature binning for given feature vectors.</p>
+<pre><code class="lang-sql">WITH extracted as (
+  <span class="hljs-keyword">select</span> 
+    extract_feature(feature) <span class="hljs-keyword">as</span> <span class="hljs-keyword">index</span>,
+    extract_weight(feature) <span class="hljs-keyword">as</span> <span class="hljs-keyword">value</span>
+  <span class="hljs-keyword">from</span>
+    <span class="hljs-keyword">input</span> l
+    LATERAL <span class="hljs-keyword">VIEW</span> explode(features) r <span class="hljs-keyword">as</span> feature
+  <span class="hljs-keyword">where</span>
+    <span class="hljs-keyword">instr</span>(feature, <span class="hljs-string">&apos;:&apos;</span>) &gt; <span class="hljs-number">0</span> <span class="hljs-comment">-- filter out categorical features</span>
+),
+<span class="hljs-keyword">mapping</span> <span class="hljs-keyword">as</span> (
+  <span class="hljs-keyword">select</span>
+    <span class="hljs-keyword">index</span>, 
+    build_bins(<span class="hljs-keyword">value</span>, <span class="hljs-number">5</span>, <span class="hljs-literal">true</span>) <span class="hljs-keyword">as</span> quantiles <span class="hljs-comment">-- 5 bins with auto bin shrinking</span>
+  <span class="hljs-keyword">from</span>
+    extracted
+  <span class="hljs-keyword">group</span> <span class="hljs-keyword">by</span>
+    <span class="hljs-keyword">index</span>
+),
+bins <span class="hljs-keyword">as</span> (
+   <span class="hljs-keyword">select</span> 
+    to_map(<span class="hljs-keyword">index</span>, quantiles) <span class="hljs-keyword">as</span> quantiles 
+   <span class="hljs-keyword">from</span>
+    <span class="hljs-keyword">mapping</span>
+)
+<span class="hljs-keyword">select</span>
+  l.features <span class="hljs-keyword">as</span> original,
+  feature_binning(l.features, r.quantiles) <span class="hljs-keyword">as</span> features
+<span class="hljs-keyword">from</span>
+  <span class="hljs-keyword">input</span> l
+  <span class="hljs-keyword">cross</span> <span class="hljs-keyword">join</span> bins r
+<span class="hljs-comment">-- limit 10;</span>
+</code></pre>
+<table>
+<thead>
+<tr>
+<th style="text-align:left">original</th>
+<th style="text-align:left">features</th>
 </tr>
+</thead>
+<tbody>
 <tr>
-<td style="text-align:center">[&quot;name#Emma&quot;,&quot;gender#Female&quot;,&quot;age:0&quot;]</td>
+<td style="text-align:left">[&quot;name#Jacob&quot;,&quot;gender#Male&quot;,&quot;age:20.0&quot;]</td>
+<td style="text-align:left">[&quot;name#Jacob&quot;,&quot;gender#Male&quot;,&quot;age:2&quot;]</td>
 </tr>
 <tr>
-<td style="text-align:center">[&quot;name#Noah&quot;,&quot;gender#Male&quot;,&quot;age:2&quot;]</td>
+<td style="text-align:left">[&quot;name#Isabella&quot;,&quot;gender#Female&quot;,&quot;age:20.0&quot;]</td>
+<td style="text-align:left">[&quot;name#Isabella&quot;,&quot;gender#Female&quot;,&quot;age:2&quot;]</td>
 </tr>
 <tr>
-<td style="text-align:center">[&quot;name#Isabella&quot;,&quot;gender#Female&quot;,&quot;age:1&quot;]</td>
+<td style="text-align:left">...</td>
+<td style="text-align:left">...</td>
 </tr>
 </tbody>
 </table>
-<h2 id="b-get-a-mapping-table-by-feature-binning">B. Get a mapping table by Feature Binning</h2>
+<h2 id="get-a-mapping-table-by-feature-binning">Get a mapping table by Feature Binning</h2>
 <pre><code class="lang-sql">WITH bins AS (
   <span class="hljs-keyword">SELECT</span> build_bins(age, <span class="hljs-number">3</span>) <span class="hljs-keyword">AS</span> quantiles
   <span class="hljs-keyword">FROM</span> <span class="hljs-keyword">users</span>
@@ -2487,7 +2592,6 @@ bins <span class="hljs-keyword">AS</span> (
 <span class="hljs-keyword">FROM</span>
   <span class="hljs-keyword">users</span> <span class="hljs-keyword">CROSS</span> <span class="hljs-keyword">JOIN</span> bins;
 </code></pre>
-<p><em>Result</em></p>
 <table>
 <thead>
 <tr>
@@ -2526,9 +2630,9 @@ bins <span class="hljs-keyword">AS</span> (
 </tr>
 </tbody>
 </table>
-<h1 id="function-signature">Function Signature</h1>
-<h2 id="udaf-buildbinsweight-numofbins-autoshrink">[UDAF] <code>build_bins(weight, num_of_bins[, auto_shrink])</code></h2>
-<h3 id="input">Input</h3>
+<h1 id="function-signatures">Function Signatures</h1>
+<h3 id="udaf-buildbinsweight-numofbins--autoshrinkfalse">UDAF <code>build_bins(weight num_of_bins [, auto_shrink=false])</code></h3>
+<h4 id="input">Input</h4>
 <table>
 <thead>
 <tr>
@@ -2540,12 +2644,12 @@ bins <span class="hljs-keyword">AS</span> (
 <tbody>
 <tr>
 <td style="text-align:center">weight</td>
-<td style="text-align:center">2 &lt;=</td>
+<td style="text-align:center">greather than or equals to 2</td>
 <td style="text-align:center">behavior when separations are repeated: T=&gt;skip, F=&gt;exception</td>
 </tr>
 </tbody>
 </table>
-<h3 id="output">Output</h3>
+<h4 id="output">Output</h4>
 <table>
 <thead>
 <tr>
@@ -2554,14 +2658,13 @@ bins <span class="hljs-keyword">AS</span> (
 </thead>
 <tbody>
 <tr>
-<td style="text-align:center">array of separation value</td>
+<td style="text-align:center">thresholds of bins based on quantiles</td>
 </tr>
 </tbody>
 </table>
 <div class="panel panel-primary"><div class="panel-heading"><h3 class="panel-title" id="note"><i class="fa fa-edit"></i> Note</h3></div><div class="panel-body"><p>There is the possibility quantiles are repeated because of too many <code>num_of_bins</code> or too few data.
-If <code>auto_shrink</code> is true, skip duplicated quantiles. If not, throw an exception.</p></div></div>
-<h2 id="udf-featurebinningfeatures-quantilesmapweight-quantiles">[UDF] <code>feature_binning(features, quantiles_map)/(weight, quantiles)</code></h2>
-<h3 id="variation-a">Variation: A</h3>
+If <code>auto_shrink</code> is set to true, skip duplicated quantiles. If not, throw an exception.</p></div></div>
+<h3 id="udf-featurebinningfeatures-quantilesmap">UDF <code>feature_binning(features, quantiles_map)</code></h3>
 <h4 id="input">Input</h4>
 <table>
 <thead>
@@ -2572,8 +2675,8 @@ If <code>auto_shrink</code> is true, skip duplicated quantiles. If not, throw an
 </thead>
 <tbody>
 <tr>
-<td style="text-align:center">serialized feature</td>
-<td style="text-align:center">entry:: key: col name, val: quantiles</td>
+<td style="text-align:center">feature vector</td>
+<td style="text-align:center">a map where key=column name and value=quantiles</td>
 </tr>
 </tbody>
 </table>
@@ -2586,11 +2689,11 @@ If <code>auto_shrink</code> is true, skip duplicated quantiles. If not, throw an
 </thead>
 <tbody>
 <tr>
-<td style="text-align:center">serialized and binned features</td>
+<td style="text-align:center">binned features</td>
 </tr>
 </tbody>
 </table>
-<h3 id="variation-b">Variation: B</h3>
+<h3 id="udf-featurebinningweight-quantiles">UDF <code>feature_binning(weight, quantiles)</code></h3>
 <h4 id="input">Input</h4>
 <table>
 <thead>
@@ -2674,7 +2777,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda
     <script>
         var gitbook = gitbook || [];
         gitbook.push(function() {
-            gitbook.page.hasChanged({"page":{"title":"Feature Binning","level":"3.4","depth":1,"next":{"title":"Feature Paring","level":"3.5","depth":1,"path":"ft_engineering/pairing.md","ref":"ft_engineering/pairing.md","articles":[{"title":"Polynomial features","level":"3.5.1","depth":2,"path":"ft_engineering/polynomial.md","ref":"ft_engineering/polynomial.md","articles":[]}]},"previous":{"title":"Feature Selection","level":"3.3","depth":1,"path":"ft_engineering/selection.md","ref":"ft [...]
+            gitbook.page.hasChanged({"page":{"title":"Feature Binning","level":"3.4","depth":1,"next":{"title":"Feature Paring","level":"3.5","depth":1,"path":"ft_engineering/pairing.md","ref":"ft_engineering/pairing.md","articles":[{"title":"Polynomial features","level":"3.5.1","depth":2,"path":"ft_engineering/polynomial.md","ref":"ft_engineering/polynomial.md","articles":[]}]},"previous":{"title":"Feature Selection","level":"3.3","depth":1,"path":"ft_engineering/selection.md","ref":"ft [...]
         });
     </script>
 </div>
diff --git a/userguide/misc/funcs.html b/userguide/misc/funcs.html
index a77222d..74adf17 100644
--- a/userguide/misc/funcs.html
+++ b/userguide/misc/funcs.html
@@ -2628,7 +2628,40 @@ Reference: <a href="https://papers.nips.cc/paper/3848-adaptive-regularization-of
 <ul>
 <li><p><code>build_bins(number weight, const int num_of_bins[, const boolean auto_shrink = false])</code> - Return quantiles representing bins: array&lt;double&gt;</p>
 </li>
-<li><p><code>feature_binning(array&lt;features::string&gt; features, const map&lt;string, array&lt;number&gt;&gt; quantiles_map)</code> / <em>FUNC</em>(number weight, const array&lt;number&gt; quantiles) - Returns binned features as an array&lt;features::string&gt; / bin ID as int</p>
+<li><p><code>feature_binning(array&lt;features::string&gt; features, map&lt;string, array&lt;number&gt;&gt; quantiles_map)</code> - returns a binned feature vector as an array&lt;features::string&gt; <em>FUNC</em>(number weight, array&lt;number&gt; quantiles) - returns bin ID as int</p>
+<pre><code class="lang-sql">WITH extracted as (
+  <span class="hljs-keyword">select</span> 
+    extract_feature(feature) <span class="hljs-keyword">as</span> <span class="hljs-keyword">index</span>,
+    extract_weight(feature) <span class="hljs-keyword">as</span> <span class="hljs-keyword">value</span>
+  <span class="hljs-keyword">from</span>
+    <span class="hljs-keyword">input</span> l
+    LATERAL <span class="hljs-keyword">VIEW</span> explode(features) r <span class="hljs-keyword">as</span> feature
+),
+<span class="hljs-keyword">mapping</span> <span class="hljs-keyword">as</span> (
+  <span class="hljs-keyword">select</span>
+    <span class="hljs-keyword">index</span>, 
+    build_bins(<span class="hljs-keyword">value</span>, <span class="hljs-number">5</span>, <span class="hljs-literal">true</span>) <span class="hljs-keyword">as</span> quantiles <span class="hljs-comment">-- 5 bins with auto bin shrinking</span>
+  <span class="hljs-keyword">from</span>
+    extracted
+  <span class="hljs-keyword">group</span> <span class="hljs-keyword">by</span>
+    <span class="hljs-keyword">index</span>
+),
+bins <span class="hljs-keyword">as</span> (
+   <span class="hljs-keyword">select</span> 
+    to_map(<span class="hljs-keyword">index</span>, quantiles) <span class="hljs-keyword">as</span> quantiles 
+   <span class="hljs-keyword">from</span>
+    <span class="hljs-keyword">mapping</span>
+)
+<span class="hljs-keyword">select</span>
+  l.features <span class="hljs-keyword">as</span> original,
+  feature_binning(l.features, r.quantiles) <span class="hljs-keyword">as</span> features
+<span class="hljs-keyword">from</span>
+  <span class="hljs-keyword">input</span> l
+  <span class="hljs-keyword">cross</span> <span class="hljs-keyword">join</span> bins r
+
+&gt; [<span class="hljs-string">&quot;name#Jacob&quot;</span>,<span class="hljs-string">&quot;gender#Male&quot;</span>,<span class="hljs-string">&quot;age:20.0&quot;</span>] [<span class="hljs-string">&quot;name#Jacob&quot;</span>,<span class="hljs-string">&quot;gender#Male&quot;</span>,<span class="hljs-string">&quot;age:2&quot;</span>]
+&gt; [<span class="hljs-string">&quot;name#Isabella&quot;</span>,<span class="hljs-string">&quot;gender#Female&quot;</span>,<span class="hljs-string">&quot;age:20.0&quot;</span>]    [<span class="hljs-string">&quot;name#Isabella&quot;</span>,<span class="hljs-string">&quot;gender#Female&quot;</span>,<span class="hljs-string">&quot;age:2&quot;</span>]
+</code></pre>
 </li>
 </ul>
 <h2 id="feature-format-conversion">Feature format conversion</h2>
@@ -3024,7 +3057,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda
     <script>
         var gitbook = gitbook || [];
         gitbook.push(function() {
-            gitbook.page.hasChanged({"page":{"title":"List of Functions","level":"1.3","depth":1,"next":{"title":"Tips for Effective Hivemall","level":"1.4","depth":1,"path":"tips/README.md","ref":"tips/README.md","articles":[{"title":"Explicit add_bias() for better prediction","level":"1.4.1","depth":2,"path":"tips/addbias.md","ref":"tips/addbias.md","articles":[]},{"title":"Use rand_amplify() to better prediction results","level":"1.4.2","depth":2,"path":"tips/rand_amplify.md","ref":"t [...]
+            gitbook.page.hasChanged({"page":{"title":"List of Functions","level":"1.3","depth":1,"next":{"title":"Tips for Effective Hivemall","level":"1.4","depth":1,"path":"tips/README.md","ref":"tips/README.md","articles":[{"title":"Explicit add_bias() for better prediction","level":"1.4.1","depth":2,"path":"tips/addbias.md","ref":"tips/addbias.md","articles":[]},{"title":"Use rand_amplify() to better prediction results","level":"1.4.2","depth":2,"path":"tips/rand_amplify.md","ref":"t [...]
         });
     </script>
 </div>
diff --git a/userguide/misc/generic_funcs.html b/userguide/misc/generic_funcs.html
index a5fbe95..8246823 100644
--- a/userguide/misc/generic_funcs.html
+++ b/userguide/misc/generic_funcs.html
@@ -3183,7 +3183,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda
     <script>
         var gitbook = gitbook || [];
         gitbook.push(function() {
-            gitbook.page.hasChanged({"page":{"title":"List of Generic Hivemall Functions","level":"2.1","depth":1,"next":{"title":"Efficient Top-K Query Processing","level":"2.2","depth":1,"path":"misc/topk.md","ref":"misc/topk.md","articles":[]},"previous":{"title":"Map-side join causes ClassCastException on Tez","level":"1.6.5","depth":2,"path":"troubleshooting/mapjoin_classcastex.md","ref":"troubleshooting/mapjoin_classcastex.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme [...]
+            gitbook.page.hasChanged({"page":{"title":"List of Generic Hivemall Functions","level":"2.1","depth":1,"next":{"title":"Efficient Top-K Query Processing","level":"2.2","depth":1,"path":"misc/topk.md","ref":"misc/topk.md","articles":[]},"previous":{"title":"Map-side join causes ClassCastException on Tez","level":"1.6.5","depth":2,"path":"troubleshooting/mapjoin_classcastex.md","ref":"troubleshooting/mapjoin_classcastex.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme [...]
         });
     </script>
 </div>