You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hivemall.apache.org by my...@apache.org on 2019/06/28 06:26:12 UTC

[incubator-hivemall-site] branch asf-site updated: Fixed about feature binning

This is an automated email from the ASF dual-hosted git repository.

myui pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/incubator-hivemall-site.git


The following commit(s) were added to refs/heads/asf-site by this push:
     new cfedbde  Fixed about feature binning
cfedbde is described below

commit cfedbdef73df3f2c8e5f13b4b0c59cabb0a5c308
Author: Makoto Yui <my...@apache.org>
AuthorDate: Fri Jun 28 15:25:54 2019 +0900

    Fixed about feature binning
---
 userguide/ft_engineering/binning.html |  2 +-
 userguide/ft_engineering/scaling.html | 77 ++++++++++-------------------------
 userguide/misc/funcs.html             | 32 ++++++++++++---
 userguide/misc/generic_funcs.html     | 41 ++++++++++++++++++-
 4 files changed, 89 insertions(+), 63 deletions(-)

diff --git a/userguide/ft_engineering/binning.html b/userguide/ft_engineering/binning.html
index 1839374..5d75620 100644
--- a/userguide/ft_engineering/binning.html
+++ b/userguide/ft_engineering/binning.html
@@ -2674,7 +2674,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda
     <script>
         var gitbook = gitbook || [];
         gitbook.push(function() {
-            gitbook.page.hasChanged({"page":{"title":"Feature Binning","level":"3.4","depth":1,"next":{"title":"Feature Paring","level":"3.5","depth":1,"path":"ft_engineering/pairing.md","ref":"ft_engineering/pairing.md","articles":[{"title":"Polynomial features","level":"3.5.1","depth":2,"path":"ft_engineering/polynomial.md","ref":"ft_engineering/polynomial.md","articles":[]}]},"previous":{"title":"Feature Selection","level":"3.3","depth":1,"path":"ft_engineering/selection.md","ref":"ft [...]
+            gitbook.page.hasChanged({"page":{"title":"Feature Binning","level":"3.4","depth":1,"next":{"title":"Feature Paring","level":"3.5","depth":1,"path":"ft_engineering/pairing.md","ref":"ft_engineering/pairing.md","articles":[{"title":"Polynomial features","level":"3.5.1","depth":2,"path":"ft_engineering/polynomial.md","ref":"ft_engineering/polynomial.md","articles":[]}]},"previous":{"title":"Feature Selection","level":"3.3","depth":1,"path":"ft_engineering/selection.md","ref":"ft [...]
         });
     </script>
 </div>
diff --git a/userguide/ft_engineering/scaling.html b/userguide/ft_engineering/scaling.html
index 02c273d..9b30f6e 100644
--- a/userguide/ft_engineering/scaling.html
+++ b/userguide/ft_engineering/scaling.html
@@ -2384,7 +2384,6 @@
 <li><a href="#min-max-normalization">Min-Max Normalization</a></li>
 <li><a href="#feature-scaling-by-zscore">Feature scaling by zscore</a></li>
 <li><a href="#apply-normalization-to-more-complex-feature-vector">Apply Normalization to more complex feature vector</a></li>
-<li><a href="#tips-for-using-both-min-max-and-zscore-normalization">Tips for using both min-max and zscore normalization</a></li>
 </ul>
 
 </div><!-- tocstop -->
@@ -2439,7 +2438,17 @@
 </code></pre>
 <h1 id="apply-normalization-to-more-complex-feature-vector">Apply Normalization to more complex feature vector</h1>
 <p>Apply normalization to the following data.</p>
-<pre><code class="lang-sql"><span class="hljs-keyword">select</span> <span class="hljs-keyword">rowid</span>, features <span class="hljs-keyword">from</span> train <span class="hljs-keyword">limit</span> <span class="hljs-number">3</span>;
+<pre><code class="lang-sql"><span class="hljs-keyword">create</span> <span class="hljs-keyword">table</span> train <span class="hljs-keyword">as</span> 
+<span class="hljs-keyword">select</span> 
+  <span class="hljs-number">1</span> <span class="hljs-keyword">as</span> <span class="hljs-keyword">rowid</span>, <span class="hljs-built_in">array</span>(<span class="hljs-string">&quot;weight:69.613&quot;</span>,<span class="hljs-string">&quot;specific_heat:129.07&quot;</span>,<span class="hljs-string">&quot;reflectance:52.111&quot;</span>) <span class="hljs-keyword">as</span> features
+<span class="hljs-keyword">UNION</span> ALL
+<span class="hljs-keyword">select</span> 
+  <span class="hljs-number">2</span> <span class="hljs-keyword">as</span> <span class="hljs-keyword">rowid</span>, <span class="hljs-built_in">array</span>(<span class="hljs-string">&quot;weight:70.67&quot;</span>,<span class="hljs-string">&quot;specific_heat:128.161&quot;</span>,<span class="hljs-string">&quot;reflectance:52.446&quot;</span>) <span class="hljs-keyword">as</span> features
+<span class="hljs-keyword">UNION</span> ALL
+<span class="hljs-keyword">select</span> 
+  <span class="hljs-number">3</span> <span class="hljs-keyword">as</span> <span class="hljs-keyword">rowid</span>, <span class="hljs-built_in">array</span>(<span class="hljs-string">&quot;weight:72.303&quot;</span>,<span class="hljs-string">&quot;specific_heat:128.45&quot;</span>,<span class="hljs-string">&quot;reflectance:52.853&quot;</span>) <span class="hljs-keyword">as</span> features
+
+<span class="hljs-keyword">select</span> <span class="hljs-keyword">rowid</span>, features <span class="hljs-keyword">from</span> train;
 </code></pre>
 <pre><code>1       [&quot;weight:69.613&quot;,&quot;specific_heat:129.07&quot;,&quot;reflectance:52.111&quot;]
 2       [&quot;weight:70.67&quot;,&quot;specific_heat:128.161&quot;,&quot;reflectance:52.446&quot;]
@@ -2458,10 +2467,10 @@
 ), 
 scaled <span class="hljs-keyword">as</span> (
   <span class="hljs-keyword">select</span> 
-    <span class="hljs-keyword">rowid</span>, 
-    feature, 
-    rescale(<span class="hljs-keyword">value</span>, <span class="hljs-keyword">min</span>(<span class="hljs-keyword">value</span>) <span class="hljs-keyword">over</span> (), <span class="hljs-keyword">max</span>(<span class="hljs-keyword">value</span>) <span class="hljs-keyword">over</span> ()) <span class="hljs-keyword">as</span> minmax,
-    zscore(<span class="hljs-keyword">value</span>, <span class="hljs-keyword">avg</span>(<span class="hljs-keyword">value</span>) <span class="hljs-keyword">over</span> (), <span class="hljs-keyword">stddev_pop</span>(<span class="hljs-keyword">value</span>) <span class="hljs-keyword">over</span> ()) <span class="hljs-keyword">as</span> zscore
+    <span class="hljs-keyword">rowid</span>,
+    feature,
+    rescale(<span class="hljs-keyword">value</span>, <span class="hljs-keyword">min</span>(<span class="hljs-keyword">value</span>) <span class="hljs-keyword">over</span> (<span class="hljs-keyword">partition</span> <span class="hljs-keyword">by</span> feature), <span class="hljs-keyword">max</span>(<span class="hljs-keyword">value</span>) <span class="hljs-keyword">over</span> (<span class="hljs-keyword">partition</span> <span class="hljs-keyword">by</span> feature)) <span class="hljs-k [...]
+    zscore(<span class="hljs-keyword">value</span>, <span class="hljs-keyword">avg</span>(<span class="hljs-keyword">value</span>) <span class="hljs-keyword">over</span> (<span class="hljs-keyword">partition</span> <span class="hljs-keyword">by</span> feature), <span class="hljs-keyword">stddev_pop</span>(<span class="hljs-keyword">value</span>) <span class="hljs-keyword">over</span> (<span class="hljs-keyword">partition</span> <span class="hljs-keyword">by</span> feature)) <span class=" [...]
   <span class="hljs-keyword">from</span> 
     exploded
 )
@@ -2471,57 +2480,13 @@ scaled <span class="hljs-keyword">as</span> (
 <span class="hljs-keyword">from</span>
   scaled
 <span class="hljs-keyword">group</span> <span class="hljs-keyword">by</span>
-  <span class="hljs-keyword">rowid</span>
-;
+  <span class="hljs-keyword">rowid</span>;
 </code></pre>
-<pre><code>1       [&quot;reflectance:0.5252967&quot;,&quot;specific_heat:0.19863537&quot;,&quot;weight:0.0&quot;]
-2       [&quot;reflectance:0.5950446&quot;,&quot;specific_heat:0.09166764&quot;,&quot;weight:0.052084323&quot;]
-3       [&quot;reflectance:0.6797837&quot;,&quot;specific_heat:0.12567581&quot;,&quot;weight:0.13255163&quot;]
+<pre><code>1       [&quot;reflectance:0.0&quot;,&quot;specific_heat:1.0&quot;,&quot;weight:0.0&quot;]
+2       [&quot;reflectance:0.4514809&quot;,&quot;specific_heat:0.0&quot;,&quot;weight:0.39293614&quot;]
+3       [&quot;reflectance:1.0&quot;,&quot;specific_heat:0.31792927&quot;,&quot;weight:1.0&quot;]
 ...
-</code></pre><h1 id="tips-for-using-both-min-max-and-zscore-normalization">Tips for using both min-max and zscore normalization</h1>
-<pre><code class="lang-sql">WITH quantative as (
-  <span class="hljs-keyword">select</span> <span class="hljs-keyword">id</span>, <span class="hljs-literal">true</span> <span class="hljs-keyword">as</span> minmax, <span class="hljs-string">&quot;age&quot;</span> <span class="hljs-keyword">as</span> feature, age <span class="hljs-keyword">as</span> <span class="hljs-keyword">value</span> <span class="hljs-keyword">from</span> train
-  <span class="hljs-keyword">union</span> all
-  <span class="hljs-keyword">select</span> <span class="hljs-keyword">id</span>, <span class="hljs-literal">false</span> <span class="hljs-keyword">as</span> minmax, <span class="hljs-string">&quot;balance&quot;</span> <span class="hljs-keyword">as</span> feature, balance <span class="hljs-keyword">as</span> <span class="hljs-keyword">value</span> <span class="hljs-keyword">from</span> train
-  <span class="hljs-keyword">union</span> all
-  <span class="hljs-keyword">select</span> <span class="hljs-keyword">id</span>, <span class="hljs-literal">true</span> <span class="hljs-keyword">as</span> minmax, <span class="hljs-string">&quot;day&quot;</span> <span class="hljs-keyword">as</span> feature, <span class="hljs-keyword">day</span> <span class="hljs-keyword">as</span> <span class="hljs-keyword">value</span> <span class="hljs-keyword">from</span> train
-  <span class="hljs-keyword">union</span> all
-  <span class="hljs-keyword">select</span> <span class="hljs-keyword">id</span>, <span class="hljs-literal">false</span> <span class="hljs-keyword">as</span> minmax, <span class="hljs-string">&quot;duration&quot;</span> <span class="hljs-keyword">as</span> feature, <span class="hljs-keyword">duration</span> <span class="hljs-keyword">as</span> <span class="hljs-keyword">value</span> <span class="hljs-keyword">from</span> train
-  <span class="hljs-keyword">union</span> all
-  <span class="hljs-keyword">select</span> <span class="hljs-keyword">id</span>, <span class="hljs-literal">false</span> <span class="hljs-keyword">as</span> minmax, <span class="hljs-string">&quot;campaign&quot;</span> <span class="hljs-keyword">as</span> feature, campaign <span class="hljs-keyword">as</span> <span class="hljs-keyword">value</span> <span class="hljs-keyword">from</span> train
-  <span class="hljs-keyword">union</span> all
-  <span class="hljs-keyword">select</span> <span class="hljs-keyword">id</span>, <span class="hljs-literal">false</span> <span class="hljs-keyword">as</span> minmax, <span class="hljs-string">&quot;pdays&quot;</span> <span class="hljs-keyword">as</span> feature, <span class="hljs-keyword">if</span>(pdays = <span class="hljs-number">-1</span>, <span class="hljs-number">0</span>, pdays) <span class="hljs-keyword">as</span> <span class="hljs-keyword">value</span> <span class="hljs-keyword"> [...]
-  <span class="hljs-keyword">union</span> all
-  <span class="hljs-keyword">select</span> <span class="hljs-keyword">id</span>, <span class="hljs-literal">false</span> <span class="hljs-keyword">as</span> minmax,  <span class="hljs-string">&quot;previous&quot;</span> <span class="hljs-keyword">as</span> feature, previous <span class="hljs-keyword">as</span> <span class="hljs-keyword">value</span> <span class="hljs-keyword">from</span> train  
-),
-quantative_stats <span class="hljs-keyword">as</span> (
-<span class="hljs-keyword">select</span>
-  feature,
-  <span class="hljs-keyword">avg</span>(<span class="hljs-keyword">value</span>) <span class="hljs-keyword">as</span> mean, <span class="hljs-keyword">stddev_pop</span>(<span class="hljs-keyword">value</span>) <span class="hljs-keyword">as</span> <span class="hljs-keyword">stddev</span>,
-  <span class="hljs-keyword">min</span>(<span class="hljs-keyword">value</span>) <span class="hljs-keyword">as</span> <span class="hljs-keyword">min</span>, <span class="hljs-keyword">max</span>(<span class="hljs-keyword">value</span>) <span class="hljs-keyword">as</span> <span class="hljs-keyword">max</span>
-<span class="hljs-keyword">from</span>
-  quantative
-<span class="hljs-keyword">group</span> <span class="hljs-keyword">by</span>
-  feature
-), 
-quantative_norm <span class="hljs-keyword">as</span> (
-<span class="hljs-keyword">select</span> 
-  t1.<span class="hljs-keyword">id</span>,
-  collect_list(
-   feature(
-      t1.feature, 
-      <span class="hljs-keyword">if</span>(t1.minmax,rescale(t1.<span class="hljs-keyword">value</span>, t2.<span class="hljs-keyword">min</span>, t2.<span class="hljs-keyword">max</span>),zscore(t1.<span class="hljs-keyword">value</span>, t2.mean, t2.<span class="hljs-keyword">stddev</span>))
-    )
-  ) <span class="hljs-keyword">as</span> features
-<span class="hljs-keyword">from</span> 
-  quantative t1
-  <span class="hljs-keyword">JOIN</span> quantative_stats t2 <span class="hljs-keyword">ON</span> (t1.feature = t2.feature)   
-<span class="hljs-keyword">group</span> <span class="hljs-keyword">by</span>
-  t1.<span class="hljs-keyword">id</span>
-)
-...
-</code></pre>
-<p><div id="page-footer" class="localized-footer"><hr><!--
+</code></pre><p><div id="page-footer" class="localized-footer"><hr><!--
   Licensed to the Apache Software Foundation (ASF) under one
   or more contributor license agreements.  See the NOTICE file
   distributed with this work for additional information
@@ -2576,7 +2541,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda
     <script>
         var gitbook = gitbook || [];
         gitbook.push(function() {
-            gitbook.page.hasChanged({"page":{"title":"Feature Scaling","level":"3.1","depth":1,"next":{"title":"Feature Hashing","level":"3.2","depth":1,"path":"ft_engineering/hashing.md","ref":"ft_engineering/hashing.md","articles":[]},"previous":{"title":"Approximate Aggregate Functions","level":"2.4","depth":1,"path":"misc/approx.md","ref":"misc/approx.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-c [...]
+            gitbook.page.hasChanged({"page":{"title":"Feature Scaling","level":"3.1","depth":1,"next":{"title":"Feature Hashing","level":"3.2","depth":1,"path":"ft_engineering/hashing.md","ref":"ft_engineering/hashing.md","articles":[]},"previous":{"title":"Approximate Aggregate Functions","level":"2.4","depth":1,"path":"misc/approx.md","ref":"misc/approx.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-c [...]
         });
     </script>
 </div>
diff --git a/userguide/misc/funcs.html b/userguide/misc/funcs.html
index f732c7b..a77222d 100644
--- a/userguide/misc/funcs.html
+++ b/userguide/misc/funcs.html
@@ -2461,13 +2461,25 @@ Reference: <a href="https://papers.nips.cc/paper/3848-adaptive-regularization-of
 <span class="hljs-keyword">GROUP</span> <span class="hljs-keyword">BY</span> feature
 </code></pre>
 </li>
-<li><p><code>train_pa1_regr(array&lt;int|bigint|string&gt; features, float target [, constant string options])</code> - PA-1 regressor that returns a relation consists of <code>&amp;lt;int|bigint|string&amp;gt; feature, float weight</code>. Find PA-1 algorithm detail in <a href="http://jmlr.csail.mit.edu/papers/volume7/crammer06a/crammer06a.pdf" target="_blank">http://jmlr.csail.mit.edu/papers/volume7/crammer06a/crammer06a.pdf</a></p>
+<li><p><code>train_pa1_regr(array&lt;int|bigint|string&gt; features, float target [, constant string options])</code> - PA-1 regressor that returns a relation consists of <code>(int|bigint|string) feature, float weight</code>.</p>
+<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> 
+ feature,
+ <span class="hljs-keyword">avg</span>(weight) <span class="hljs-keyword">as</span> weight
+<span class="hljs-keyword">FROM</span> 
+ (<span class="hljs-keyword">SELECT</span> 
+     train_pa1_regr(features,label) <span class="hljs-keyword">as</span> (feature,weight)
+  <span class="hljs-keyword">FROM</span> 
+     training_data
+ ) t 
+<span class="hljs-keyword">GROUP</span> <span class="hljs-keyword">BY</span> feature
+</code></pre>
+<p>Reference: <a href="http://jmlr.csail.mit.edu/papers/volume7/crammer06a/crammer06a.pdf" target="_blank">Koby Crammer et.al., Online Passive-Aggressive Algorithms. Journal of Machine Learning Research, 2006.</a><br></p>
 </li>
-<li><p><code>train_pa1a_regr(array&lt;int|bigint|string&gt; features, float target [, constant string options])</code> - Returns a relation consists of <code>&amp;lt;int|bigint|string&amp;gt; feature, float weight</code>.</p>
+<li><p><code>train_pa1a_regr(array&lt;int|bigint|string&gt; features, float target [, constant string options])</code> - Returns a relation consists of <code>(int|bigint|string) feature, float weight</code>.</p>
 </li>
-<li><p><code>train_pa2_regr(array&lt;int|bigint|string&gt; features, float target [, constant string options])</code> - Returns a relation consists of <code>&amp;lt;int|bigint|string&amp;gt; feature, float weight</code>.</p>
+<li><p><code>train_pa2_regr(array&lt;int|bigint|string&gt; features, float target [, constant string options])</code> - Returns a relation consists of <code>(int|bigint|string) feature, float weight</code>.</p>
 </li>
-<li><p><code>train_pa2a_regr(array&lt;int|bigint|string&gt; features, float target [, constant string options])</code> - Returns a relation consists of <code>&amp;lt;int|bigint|string&amp;gt; feature, float weight</code>.</p>
+<li><p><code>train_pa2a_regr(array&lt;int|bigint|string&gt; features, float target [, constant string options])</code> - Returns a relation consists of <code>(int|bigint|string) feature, float weight</code>.</p>
 </li>
 <li><p><code>train_regressor(list&lt;string|int|bigint&gt; features, double label [, const string options])</code> - Returns a relation consists of &lt;string|int|bigint feature, float weight&gt;</p>
 <pre><code>Build a prediction model by a generic regressor
@@ -2627,6 +2639,16 @@ Reference: <a href="https://papers.nips.cc/paper/3848-adaptive-regularization-of
 </li>
 <li><p><code>to_dense_features(array&lt;string&gt; feature_vector, int dimensions)</code> - Returns a dense feature in array&lt;float&gt;</p>
 </li>
+<li><p><code>to_libsvm_format(array&lt;string&gt; feautres [, double/integer target, const string options])</code> - Returns a string representation of libsvm</p>
+<pre><code class="lang-sql">Usage:
+ <span class="hljs-keyword">select</span> to_libsvm_format(<span class="hljs-built_in">array</span>(<span class="hljs-string">&apos;apple:3.4&apos;</span>,<span class="hljs-string">&apos;orange:2.1&apos;</span>))
+ &gt; <span class="hljs-number">6284535</span>:<span class="hljs-number">3.4</span> <span class="hljs-number">8104713</span>:<span class="hljs-number">2.1</span>
+ <span class="hljs-keyword">select</span> to_libsvm_format(<span class="hljs-built_in">array</span>(<span class="hljs-string">&apos;apple:3.4&apos;</span>,<span class="hljs-string">&apos;orange:2.1&apos;</span>), <span class="hljs-string">&apos;-features 10&apos;</span>)
+ &gt; <span class="hljs-number">3</span>:<span class="hljs-number">2.1</span> <span class="hljs-number">7</span>:<span class="hljs-number">3.4</span>
+ <span class="hljs-keyword">select</span> to_libsvm_format(<span class="hljs-built_in">array</span>(<span class="hljs-string">&apos;7:3.4&apos;</span>,<span class="hljs-string">&apos;3:2.1&apos;</span>), <span class="hljs-number">5.0</span>)
+ &gt; <span class="hljs-number">5.0</span> <span class="hljs-number">3</span>:<span class="hljs-number">2.1</span> <span class="hljs-number">7</span>:<span class="hljs-number">3.4</span>
+</code></pre>
+</li>
 <li><p><code>to_sparse_features(array&lt;float&gt; feature_vector)</code> - Returns a sparse feature in array&lt;string&gt;</p>
 </li>
 </ul>
@@ -3002,7 +3024,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda
     <script>
         var gitbook = gitbook || [];
         gitbook.push(function() {
-            gitbook.page.hasChanged({"page":{"title":"List of Functions","level":"1.3","depth":1,"next":{"title":"Tips for Effective Hivemall","level":"1.4","depth":1,"path":"tips/README.md","ref":"tips/README.md","articles":[{"title":"Explicit add_bias() for better prediction","level":"1.4.1","depth":2,"path":"tips/addbias.md","ref":"tips/addbias.md","articles":[]},{"title":"Use rand_amplify() to better prediction results","level":"1.4.2","depth":2,"path":"tips/rand_amplify.md","ref":"t [...]
+            gitbook.page.hasChanged({"page":{"title":"List of Functions","level":"1.3","depth":1,"next":{"title":"Tips for Effective Hivemall","level":"1.4","depth":1,"path":"tips/README.md","ref":"tips/README.md","articles":[{"title":"Explicit add_bias() for better prediction","level":"1.4.1","depth":2,"path":"tips/addbias.md","ref":"tips/addbias.md","articles":[]},{"title":"Use rand_amplify() to better prediction results","level":"1.4.2","depth":2,"path":"tips/rand_amplify.md","ref":"t [...]
         });
     </script>
 </div>
diff --git a/userguide/misc/generic_funcs.html b/userguide/misc/generic_funcs.html
index 556f1e4..a5fbe95 100644
--- a/userguide/misc/generic_funcs.html
+++ b/userguide/misc/generic_funcs.html
@@ -2833,6 +2833,45 @@ FROM tmp;
 &gt; [{&quot;key&quot;:&quot;one&quot;,&quot;value&quot;:1},{&quot;key&quot;:&quot;two&quot;,&quot;value&quot;:2}]
 </code></pre>
 </li>
+<li><p><code>map_roulette(Map&lt;K, number&gt; map [, (const)</code> int/bigint seed]) - Returns a map key based on weighted random sampling of map values. Average of values is used for null values</p>
+<pre><code class="lang-sql">-- `map_roulette(map&lt;key, number&gt; [, integer seed])` returns key by weighted random selection
+SELECT 
+  map_roulette(to_map(a, b)) -- 25% Tom, 21% Zhang, 54% Wang
+FROM ( -- see https://issues.apache.org/jira/browse/HIVE-17406
+  select &apos;Wang&apos; as a, 54 as b
+  union all
+  select &apos;Zhang&apos; as a, 21 as b
+  union all
+  select &apos;Tom&apos; as a, 25 as b
+) tmp;
+&gt; Wang
+
+-- Weight random selection with using filling nulls with the average value
+SELECT
+  map_roulette(map(1, 0.5, &apos;Wang&apos;, null)), -- 50% Wang, 50% 1
+  map_roulette(map(1, 0.5, &apos;Wang&apos;, null, &apos;Zhang&apos;, null)) -- 1/3 Wang, 1/3 1, 1/3 Zhang
+;
+
+-- NULL will be returned if every key is null
+SELECT 
+  map_roulette(map()),
+  map_roulette(map(null, null, null, null));
+&gt; NULL    NULL
+
+-- Return NULL if all weights are zero
+SELECT
+  map_roulette(map(1, 0)),
+  map_roulette(map(1, 0, &apos;5&apos;, 0))
+;
+&gt; NULL    NULL
+
+-- map_roulette does not support non-numeric weights or negative weights.
+SELECT map_roulette(map(&apos;Wong&apos;, &apos;A string&apos;, &apos;Zhao&apos;, 2));
+&gt; HiveException: Error evaluating map_roulette(map(&apos;Wong&apos;:&apos;A string&apos;,&apos;Zhao&apos;:2))
+SELECT map_roulette(map(&apos;Wong&apos;, &apos;A string&apos;, &apos;Zhao&apos;, 2));
+&gt; UDFArgumentException: Map value must be greather than or equals to zero: -2
+</code></pre>
+</li>
 <li><p><code>map_tail_n(map SRC, int N)</code> - Returns the last N elements from a sorted array of SRC</p>
 </li>
 <li><p><code>merge_maps(Map x)</code> - Returns a map which contains the union of an aggregation of maps. Note that an existing value of a key can be replaced with the other duplicate key entry.</p>
@@ -3144,7 +3183,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda
     <script>
         var gitbook = gitbook || [];
         gitbook.push(function() {
-            gitbook.page.hasChanged({"page":{"title":"List of Generic Hivemall Functions","level":"2.1","depth":1,"next":{"title":"Efficient Top-K Query Processing","level":"2.2","depth":1,"path":"misc/topk.md","ref":"misc/topk.md","articles":[]},"previous":{"title":"Map-side join causes ClassCastException on Tez","level":"1.6.5","depth":2,"path":"troubleshooting/mapjoin_classcastex.md","ref":"troubleshooting/mapjoin_classcastex.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme [...]
+            gitbook.page.hasChanged({"page":{"title":"List of Generic Hivemall Functions","level":"2.1","depth":1,"next":{"title":"Efficient Top-K Query Processing","level":"2.2","depth":1,"path":"misc/topk.md","ref":"misc/topk.md","articles":[]},"previous":{"title":"Map-side join causes ClassCastException on Tez","level":"1.6.5","depth":2,"path":"troubleshooting/mapjoin_classcastex.md","ref":"troubleshooting/mapjoin_classcastex.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme [...]
         });
     </script>
 </div>