You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hivemall.apache.org by my...@apache.org on 2019/06/27 18:02:45 UTC
[incubator-hivemall] branch master updated: [HIVEMALL-259][DOC] Refactor feature_binning UDF

This is an automated email from the ASF dual-hosted git repository.

myui pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-hivemall.git


The following commit(s) were added to refs/heads/master by this push:
     new 028306e  [HIVEMALL-259][DOC] Refactor feature_binning UDF
028306e is described below

commit 028306e77cba45e587429412405b71339cab411b
Author: Makoto Yui <my...@apache.org>
AuthorDate: Fri Jun 28 03:02:38 2019 +0900

    [HIVEMALL-259][DOC] Refactor feature_binning UDF
    
    ## What changes were proposed in this pull request?
    
    Refactor feature_binning UDF and update the function usage
    
    ## What type of PR is it?
    
    Documentation, Refactoring
    
    ## What is the Jira issue?
    
    https://issues.apache.org/jira/browse/HIVEMALL-259
    
    ## How was this patch tested?
    
    unit tests, manual tests on EMR
    
    ## How to use this feature?
    
    ```
    WITH extracted as (
      select
        extract_feature(feature) as index,
        extract_weight(feature) as value
      from
        input l
        LATERAL VIEW explode(features) r as feature
    ),
    mapping as (
      select
        index,
        build_bins(value, 5, true) as quantiles -- 5 bins with auto bin shrinking
      from
        extracted
      group by
        index
    ),
    bins as (
       select
        to_map(index, quantiles) as quantiles
       from
        mapping
    )
    select
      l.features as original,
      feature_binning(l.features, r.quantiles) as features
    from
      input l
      cross join bins r
    ```
    
    see https://gist.github.com/myui/f943fa3ce1a7e1ac3f2dd9a7f9fa703b
    
    ## Checklist
    
    (Please remove this section if not needed; check `x` for YES, blank for NO)
    
    - [x] Did you apply source code formatter, i.e., `./bin/format_code.sh`, for your commit?
    - [x] Did you run system tests on Hive (or Spark)?
    
    Author: Makoto Yui <my...@apache.org>
    
    Closes #195 from myui/HIVEMALL-259.
---
 .../hivemall/ftvec/binning/FeatureBinningUDF.java  | 187 ++++++++++++---------
 .../ftvec/binning/FeatureBinningUDFTest.java       |  81 +++++++++
 docs/gitbook/ft_engineering/binning.md             | 178 +++++++++++++++-----
 docs/gitbook/misc/funcs.md                         |  36 +++-
 4 files changed, 360 insertions(+), 122 deletions(-)

diff --git a/core/src/main/java/hivemall/ftvec/binning/FeatureBinningUDF.java b/core/src/main/java/hivemall/ftvec/binning/FeatureBinningUDF.java
index f713937..2498154 100644
--- a/core/src/main/java/hivemall/ftvec/binning/FeatureBinningUDF.java
+++ b/core/src/main/java/hivemall/ftvec/binning/FeatureBinningUDF.java
@@ -18,7 +18,18 @@
  */
 package hivemall.ftvec.binning;
 
+import hivemall.annotations.VisibleForTesting;
 import hivemall.utils.hadoop.HiveUtils;
+import hivemall.utils.lang.StringUtils;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import javax.annotation.Nonnull;
+
 import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
@@ -37,12 +48,43 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspe
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.Text;
 
-import java.util.*;
-
+// @formatter:off
 @Description(name = "feature_binning",
-        value = "_FUNC_(array<features::string> features, const map<string, array<number>> quantiles_map)"
-                + " / _FUNC_(number weight, const array<number> quantiles)"
-                + " - Returns binned features as an array<features::string> / bin ID as int")
+        value = "_FUNC_(array<features::string> features, map<string, array<number>> quantiles_map)"
+                + " - returns a binned feature vector as an array<features::string>\n"
+                + "_FUNC_(number weight, array<number> quantiles) - returns bin ID as int",
+                extended = "WITH extracted as (\n" + 
+                        "  select \n" + 
+                        "    extract_feature(feature) as index,\n" + 
+                        "    extract_weight(feature) as value\n" + 
+                        "  from\n" + 
+                        "    input l\n" + 
+                        "    LATERAL VIEW explode(features) r as feature\n" + 
+                        "),\n" + 
+                        "mapping as (\n" + 
+                        "  select\n" + 
+                        "    index, \n" + 
+                        "    build_bins(value, 5, true) as quantiles -- 5 bins with auto bin shrinking\n" + 
+                        "  from\n" + 
+                        "    extracted\n" + 
+                        "  group by\n" + 
+                        "    index\n" + 
+                        "),\n" + 
+                        "bins as (\n" + 
+                        "   select \n" + 
+                        "    to_map(index, quantiles) as quantiles \n" + 
+                        "   from\n" + 
+                        "    mapping\n" + 
+                        ")\n" + 
+                        "select\n" + 
+                        "  l.features as original,\n" + 
+                        "  feature_binning(l.features, r.quantiles) as features\n" + 
+                        "from\n" + 
+                        "  input l\n" + 
+                        "  cross join bins r\n\n" +
+                        "> [\"name#Jacob\",\"gender#Male\",\"age:20.0\"] [\"name#Jacob\",\"gender#Male\",\"age:2\"]\n" +
+                        "> [\"name#Isabella\",\"gender#Female\",\"age:20.0\"]    [\"name#Isabella\",\"gender#Female\",\"age:2\"]")
+// @formatter:on
 @UDFType(deterministic = true, stateful = false)
 public final class FeatureBinningUDF extends GenericUDF {
     private boolean multiple = true;
@@ -53,38 +95,34 @@ public final class FeatureBinningUDF extends GenericUDF {
     private StringObjectInspector keyOI;
     private ListObjectInspector quantilesOI;
     private PrimitiveObjectInspector quantileOI;
-
     private PrimitiveObjectInspector weightOI;
 
-    private Map<Text, double[]> quantilesMap = null;
-    private double[] quantiles = null;
-
     @Override
-    public ObjectInspector initialize(ObjectInspector[] OIs) throws UDFArgumentException {
-        if (OIs.length != 2) {
-            throw new UDFArgumentLengthException("Specify two arguments");
+    public ObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
+        if (argOIs.length != 2) {
+            throw new UDFArgumentLengthException("Specify two arguments :" + argOIs.length);
         }
 
-        if (HiveUtils.isListOI(OIs[0]) && HiveUtils.isMapOI(OIs[1])) {
-            // for (array<features::string> features, const map<string, array<number>> quantiles_map)
+        if (HiveUtils.isListOI(argOIs[0]) && HiveUtils.isMapOI(argOIs[1])) {
+            // feature_binning(array<features::string> features, map<string, array<number>> quantiles_map)
 
             if (!HiveUtils.isStringOI(
-                ((ListObjectInspector) OIs[0]).getListElementObjectInspector())) {
+                ((ListObjectInspector) argOIs[0]).getListElementObjectInspector())) {
                 throw new UDFArgumentTypeException(0,
-                    "Only array<string> type argument is acceptable but " + OIs[0].getTypeName()
-                            + " was passed as `features`");
+                    "Only array<string> type argument can be accepted but "
+                            + argOIs[0].getTypeName() + " was passed as `features`");
             }
-            featuresOI = HiveUtils.asListOI(OIs[0]);
+            featuresOI = HiveUtils.asListOI(argOIs[0]);
             featureOI = HiveUtils.asStringOI(featuresOI.getListElementObjectInspector());
 
-            quantilesMapOI = HiveUtils.asMapOI(OIs[1]);
+            quantilesMapOI = HiveUtils.asMapOI(argOIs[1]);
             if (!HiveUtils.isStringOI(quantilesMapOI.getMapKeyObjectInspector())
                     || !HiveUtils.isListOI(quantilesMapOI.getMapValueObjectInspector())
                     || !HiveUtils.isNumberOI(
                         ((ListObjectInspector) quantilesMapOI.getMapValueObjectInspector()).getListElementObjectInspector())) {
                 throw new UDFArgumentTypeException(1,
-                    "Only map<string, array<number>> type argument is acceptable but "
-                            + OIs[1].getTypeName() + " was passed as `quantiles_map`");
+                    "Only map<string, array<number>> type argument can be accepted but "
+                            + argOIs[1].getTypeName() + " was passed as `quantiles_map`");
             }
             keyOI = HiveUtils.asStringOI(quantilesMapOI.getMapKeyObjectInspector());
             quantilesOI = HiveUtils.asListOI(quantilesMapOI.getMapValueObjectInspector());
@@ -95,16 +133,16 @@ public final class FeatureBinningUDF extends GenericUDF {
 
             return ObjectInspectorFactory.getStandardListObjectInspector(
                 PrimitiveObjectInspectorFactory.writableStringObjectInspector);
-        } else if (HiveUtils.isPrimitiveOI(OIs[0]) && HiveUtils.isListOI(OIs[1])) {
-            // for (number weight, const array<number> quantiles)
+        } else if (HiveUtils.isPrimitiveOI(argOIs[0]) && HiveUtils.isListOI(argOIs[1])) {
+            // feature_binning(number weight, array<number> quantiles)
 
-            weightOI = HiveUtils.asDoubleCompatibleOI(OIs[0]);
+            weightOI = HiveUtils.asDoubleCompatibleOI(argOIs[0]);
 
-            quantilesOI = HiveUtils.asListOI(OIs[1]);
+            quantilesOI = HiveUtils.asListOI(argOIs[1]);
             if (!HiveUtils.isNumberOI(quantilesOI.getListElementObjectInspector())) {
                 throw new UDFArgumentTypeException(1,
-                    "Only array<number> type argument is acceptable but " + OIs[1].getTypeName()
-                            + " was passed as `quantiles`");
+                    "Only array<number> type argument can be accepted but "
+                            + argOIs[1].getTypeName() + " was passed as `quantiles`");
             }
             quantileOI =
                     HiveUtils.asDoubleCompatibleOI(quantilesOI.getListElementObjectInspector());
@@ -115,86 +153,81 @@ public final class FeatureBinningUDF extends GenericUDF {
         } else {
             throw new UDFArgumentTypeException(0,
                 "Only <array<features::string>, map<string, array<number>>> "
-                        + "or <number, array<number>> type arguments are accepted but <"
-                        + OIs[0].getTypeName() + ", " + OIs[1].getTypeName() + "> was passed.");
+                        + "or <number, array<number>> type arguments can be accepted but <"
+                        + argOIs[0].getTypeName() + ", " + argOIs[1].getTypeName()
+                        + "> was passed.");
         }
     }
 
+    private transient Map<String, double[]> quantilesMap;
+    private transient double[] quantilesArray;
+
     @Override
-    public Object evaluate(DeferredObject[] dObj) throws HiveException {
+    public Object evaluate(DeferredObject[] args) throws HiveException {
+        final Object arg0 = args[0].get();
+        if (arg0 == null) {
+            return null;
+        }
+        final Object arg1 = args[1].get();
+        if (arg1 == null) {
+            throw new UDFArgumentException(
+                "The second argument (i.e., quantiles) MUST be non-null value");
+        }
+
         if (multiple) {
-            // init quantilesMap
             if (quantilesMap == null) {
-                quantilesMap = new HashMap<Text, double[]>();
-                final Map<?, ?> _quantilesMap = quantilesMapOI.getMap(dObj[1].get());
-
-                for (Object _key : _quantilesMap.keySet()) {
-                    final Text key = new Text(keyOI.getPrimitiveJavaObject(_key));
-                    final double[] val = HiveUtils.asDoubleArray(_quantilesMap.get(key),
-                        quantilesOI, quantileOI);
-                    quantilesMap.put(key, val);
+                final Map<?, ?> map = quantilesMapOI.getMap(arg1);
+                quantilesMap = new HashMap<String, double[]>(map.size() * 2);
+                for (Map.Entry<?, ?> e : map.entrySet()) {
+                    String k = keyOI.getPrimitiveJavaObject(e.getKey());
+                    double[] v = HiveUtils.asDoubleArray(e.getValue(), quantilesOI, quantileOI);
+                    quantilesMap.put(k, v);
                 }
             }
 
-            final List<?> fs = featuresOI.getList(dObj[0].get());
+            final List<?> features = featuresOI.getList(arg0);
             final List<Text> result = new ArrayList<Text>();
-            for (Object f : fs) {
+            for (Object f : features) {
                 final String entry = featureOI.getPrimitiveJavaObject(f);
-                final int pos = entry.indexOf(":");
 
-                if (pos < 0) {
-                    // categorical
+                final int pos = entry.indexOf(':');
+                if (pos < 0) { // categorical
                     result.add(new Text(entry));
-                } else {
-                    // quantitative
-                    final Text key = new Text(entry.substring(0, pos));
-                    String val = entry.substring(pos + 1);
-
-                    // binning
-                    if (quantilesMap.containsKey(key)) {
-                        val = String.valueOf(
-                            findBin(quantilesMap.get(key), Double.parseDouble(val)));
+                } else { // quantitative
+                    final String k = entry.substring(0, pos);
+                    String v = entry.substring(pos + 1);
+                    final double[] bins = quantilesMap.get(k);
+                    if (bins != null) { // binning
+                        v = String.valueOf(findBin(bins, Double.parseDouble(v)));
                     }
-                    result.add(new Text(key + ":" + val));
+                    result.add(new Text(k + ':' + v));
                 }
             }
-
             return result;
         } else {
-            // init quantiles
-            if (quantiles == null) {
-                quantiles = HiveUtils.asDoubleArray(dObj[1].get(), quantilesOI, quantileOI);
+            if (quantilesArray == null) {
+                quantilesArray = HiveUtils.asDoubleArray(arg1, quantilesOI, quantileOI);
             }
 
-            return new IntWritable(findBin(quantiles,
-                PrimitiveObjectInspectorUtils.getDouble(dObj[0].get(), weightOI)));
+            return new IntWritable(
+                findBin(quantilesArray, PrimitiveObjectInspectorUtils.getDouble(arg0, weightOI)));
         }
     }
 
-    private int findBin(double[] _quantiles, double d) throws HiveException {
-        if (_quantiles.length < 3) {
+    @VisibleForTesting
+    static int findBin(@Nonnull final double[] quantiles, final double value) throws HiveException {
+        if (quantiles.length < 3) {
             throw new HiveException(
                 "Length of `quantiles` should be greater than or equal to three but "
-                        + _quantiles.length + ".");
+                        + quantiles.length + ".");
         }
 
-        int res = Arrays.binarySearch(_quantiles, d);
-        return (res < 0) ? ~res - 1 : (res == 0) ? 0 : res - 1;
+        final int pos = Arrays.binarySearch(quantiles, value);
+        return (pos < 0) ? ~pos - 1 : (pos == 0) ? 0 : pos - 1;
     }
 
     @Override
     public String getDisplayString(String[] children) {
-        final StringBuilder sb = new StringBuilder();
-        sb.append("feature_binning");
-        sb.append("(");
-        if (children.length > 0) {
-            sb.append(children[0]);
-            for (int i = 1; i < children.length; i++) {
-                sb.append(", ");
-                sb.append(children[i]);
-            }
-        }
-        sb.append(")");
-        return sb.toString();
+        return "feature_binning(" + StringUtils.join(children, ',') + ')';
     }
 }
diff --git a/core/src/test/java/hivemall/ftvec/binning/FeatureBinningUDFTest.java b/core/src/test/java/hivemall/ftvec/binning/FeatureBinningUDFTest.java
new file mode 100644
index 0000000..651f90c
--- /dev/null
+++ b/core/src/test/java/hivemall/ftvec/binning/FeatureBinningUDFTest.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.ftvec.binning;
+
+import static java.lang.Double.NEGATIVE_INFINITY;
+import static java.lang.Double.NaN;
+import static java.lang.Double.POSITIVE_INFINITY;
+import static org.junit.Assert.assertEquals;
+
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.junit.Test;
+
+public class FeatureBinningUDFTest {
+
+    @Test
+    public void testNaN() throws HiveException {
+        // If num_bins = 3, the bins become something like  [-Inf, 1], (1, 10], (10, Inf]. 
+        final double[] bin = new double[] {NEGATIVE_INFINITY, 1.d, 10.d, POSITIVE_INFINITY};
+        assertEquals(2, FeatureBinningUDF.findBin(bin, POSITIVE_INFINITY));
+        assertEquals(3, FeatureBinningUDF.findBin(bin, NaN));
+    }
+
+    @Test
+    public void test3Bins() throws HiveException {
+        // If num_bins = 3, the bins become something like  [-Inf, 1], (1, 10], (10, Inf]. 
+        final double[] bin = new double[] {NEGATIVE_INFINITY, 1.d, 10.d, POSITIVE_INFINITY};
+        assertEquals(0, FeatureBinningUDF.findBin(bin, NEGATIVE_INFINITY));
+        assertEquals(0, FeatureBinningUDF.findBin(bin, 1.d));
+
+        assertEquals(1, FeatureBinningUDF.findBin(bin, 1.1d));
+        assertEquals(1, FeatureBinningUDF.findBin(bin, 10.d));
+
+        assertEquals(2, FeatureBinningUDF.findBin(bin, 10.1d));
+        assertEquals(2, FeatureBinningUDF.findBin(bin, POSITIVE_INFINITY));
+    }
+
+    @Test
+    public void test4Bins() throws HiveException {
+        // If num_bins = 4, the bins become something like [-Inf, 0.111], (0.111, 0.222], (0.222, 0.333], (0.333, Inf]. 
+        final double[] bin =
+                new double[] {NEGATIVE_INFINITY, 0.111d, 0.222d, 0.333d, POSITIVE_INFINITY};
+        assertEquals(0, FeatureBinningUDF.findBin(bin, NEGATIVE_INFINITY));
+        assertEquals(0, FeatureBinningUDF.findBin(bin, -1.d));
+        assertEquals(0, FeatureBinningUDF.findBin(bin, 0.110d));
+        assertEquals(0, FeatureBinningUDF.findBin(bin, 0.111d));
+
+        assertEquals(1, FeatureBinningUDF.findBin(bin, 0.112d));
+        assertEquals(1, FeatureBinningUDF.findBin(bin, 0.2d));
+        assertEquals(1, FeatureBinningUDF.findBin(bin, 0.222d));
+        assertEquals(1, FeatureBinningUDF.findBin(bin, 0.2220d));
+
+        assertEquals(2, FeatureBinningUDF.findBin(bin, 0.223d));
+        assertEquals(2, FeatureBinningUDF.findBin(bin, 0.3d));
+        assertEquals(2, FeatureBinningUDF.findBin(bin, 0.332d));
+        assertEquals(2, FeatureBinningUDF.findBin(bin, 0.333d));
+
+        assertEquals(3, FeatureBinningUDF.findBin(bin, 0.334d));
+        assertEquals(3, FeatureBinningUDF.findBin(bin, 0.4d));
+        assertEquals(3, FeatureBinningUDF.findBin(bin, 10000d));
+        assertEquals(3, FeatureBinningUDF.findBin(bin, POSITIVE_INFINITY));
+
+        assertEquals(4, FeatureBinningUDF.findBin(bin, NaN));
+    }
+
+}
diff --git a/docs/gitbook/ft_engineering/binning.md b/docs/gitbook/ft_engineering/binning.md
index cd1ecbb..4634f92 100644
--- a/docs/gitbook/ft_engineering/binning.md
+++ b/docs/gitbook/ft_engineering/binning.md
@@ -17,8 +17,9 @@
   under the License.
 -->
 
-Feature binning is a method of dividing quantitative variables into categorical values.
-It groups quantitative values into a pre-defined number of bins.
+Feature binning is a method of dividing quantitative variables into categorical values. It groups quantitative values into a pre-defined number of bins.
+
+If the number of bins is set to 3, the bin ranges become something like `[-Inf, 1], (1, 10], (10, Inf]`.
 
 *Note: This feature is supported from Hivemall v0.5-rc.1 or later.*
 
@@ -30,38 +31,91 @@ Prepare sample data (*users* table) first as follows:
 
 ``` sql
 CREATE TABLE users (
-  name string, age int, gender string
+  rowid int, name string, age int, gender string
 );
-
 INSERT INTO users VALUES
-  ('Jacob', 20, 'Male'),
-  ('Mason', 22, 'Male'),
-  ('Sophia', 35, 'Female'),
-  ('Ethan', 55, 'Male'),
-  ('Emma', 15, 'Female'),
-  ('Noah', 46, 'Male'),
-  ('Isabella', 20, 'Female');
+  (1, 'Jacob', 20, 'Male'),
+  (2, 'Mason', 22, 'Male'),
+  (3, 'Sophia', 35, 'Female'),
+  (4, 'Ethan', 55, 'Male'),
+  (5, 'Emma', 15, 'Female'),
+  (6, 'Noah', 46, 'Male'),
+  (7, 'Isabella', 20, 'Female')
+;
+
+CREATE TABLE input as
+SELECT
+  rowid,
+  array_concat(
+    categorical_features(
+      array('name', 'gender'),
+      name, gender
+    ),
+    quantitative_features(
+      array('age'),
+      age
+    )
+  ) AS features
+FROM
+  users;
+  
+select * from input limit 2;
 ```
 
-## A. Feature Vector trasformation by applying Feature Binning
+| input.rowid | input.features |
+|:--|:--|
+|1 | ["name#Jacob","gender#Male","age:20.0"] |
+|2 | ["name#Mason","gender#Male","age:22.0"] |
 
-``` sql
-WITH t AS (
+## Feature Vector trasformation by applying Feature Binning
+
+Now, converting `age` values into 3 bins.
+
+```sql
+SELECT
+  map('age', build_bins(age, 3)) AS quantiles_map
+FROM
+  users
+```
+
+> {"age":[-Infinity,18.333333333333332,30.666666666666657,Infinity]}
+
+In the above query result, you can find 4 values for age in `quantiles_map`. It's a threshold of 3 bins. 
+
+```sql
+WITH bins as (
   SELECT
-    array_concat(
-      categorical_features(
-        array('name', 'gender'),
-	name, gender
-      ),
-      quantitative_features(
-	array('age'),
-	age
-      )
-    ) AS features
+    map('age', build_bins(age, 3)) AS quantiles_map
   FROM
     users
-),
-bins AS (
+)
+select
+  feature_binning(
+    array('age:-Infinity', 'age:-1', 'age:0', 'age:1', 'age:18.333333333333331', 'age:18.333333333333332'), quantiles_map
+  ),
+  feature_binning(
+    array('age:18.3333333333333333', 'age:18.33333333333334', 'age:19', 'age:30', 'age:30.666666666666656', 'age:30.666666666666657'), quantiles_map
+  ),
+  feature_binning(
+    array('age:666666666666658', 'age:30.66666666666666', 'age:31', 'age:99', 'age:Infinity'), quantiles_map
+  ),
+  feature_binning(
+    array('age:NaN'), quantiles_map
+  ),
+  feature_binning( -- not in map
+    array('weight:60.3'), quantiles_map
+  )
+from
+  bins
+```
+
+> ["age:0","age:0","age:0","age:0","age:0","age:0"]       ["age:0","age:1","age:1","age:1","age:1","age:1"]       ["age:2","a
+ge:2","age:2","age:2","age:2"]  ["age:3"]       ["weight:60.3"]
+
+The following query shows more practical usage:
+
+``` sql
+WITH bins AS (
   SELECT
     map('age', build_bins(age, 3)) AS quantiles_map
   FROM
@@ -70,23 +124,63 @@ bins AS (
 SELECT
   feature_binning(features, quantiles_map) AS features
 FROM
-  t CROSS JOIN bins;
+  input
+  CROSS JOIN bins;
 ```
 
-*Result*
-
 | features: `array<features::string>` |
-| :-: |
+| :-- |
 | ["name#Jacob","gender#Male","age:1"] |
 | ["name#Mason","gender#Male","age:1"] |
 | ["name#Sophia","gender#Female","age:2"] |
 | ["name#Ethan","gender#Male","age:2"] |
-| ["name#Emma","gender#Female","age:0"] |
-| ["name#Noah","gender#Male","age:2"] |
-| ["name#Isabella","gender#Female","age:1"] |
+| ... |
 
+## More practical Example
 
-## B. Get a mapping table by Feature Binning
+Hivemall's 
+
+```sql
+WITH extracted as (
+  select 
+    extract_feature(feature) as index,
+    extract_weight(feature) as value
+  from
+    input l
+    LATERAL VIEW explode(features) r as feature
+),
+mapping as (
+  select
+    index, 
+    build_bins(value, 5, true) as quantiles -- 5 bins with auto bin shrinking
+  from
+    extracted
+  group by
+    index
+),
+bins as (
+   select 
+    to_map(index, quantiles) as quantiles 
+   from
+    mapping
+)
+select
+  l.features as original,
+  feature_binning(l.features, r.quantiles) as features
+from
+  input l
+  cross join bins r
+-- limit 10;
+```
+
+| original | features |
+|:--|:--|
+| ["name#Jacob","gender#Male","age:20.0"] | ["name#Jacob","gender#Male","age:2"] |
+| ["name#Isabella","gender#Female","age:20.0"] | ["name#Isabella","gender#Female","age:2"] |
+| ... | ... |
+
+
+## Get a mapping table by Feature Binning
 
 ```sql
 WITH bins AS (
@@ -99,8 +193,6 @@ FROM
   users CROSS JOIN bins;
 ```
 
-*Result*
-
 | age:` int` | bin: `int` |
 |:-:|:-:|
 | 20 | 1 |
@@ -111,17 +203,17 @@ FROM
 | 46 | 2 |
 | 20 | 1 |
 
-# Function Signature
+# Function Signatures
 
-## [UDAF] `build_bins(weight, num_of_bins[, auto_shrink])`
+### UDAF `build_bins(weight, num_of_bins[, auto_shrink])`
 
-### Input
+#### Input
 
 | weight: int&#124;bigint&#124;float&#124;double | num\_of\_bins: `int` | [auto\_shrink: `boolean` = false] |
 | :-: | :-: | :-: |
 | weight | 2 <= | behavior when separations are repeated: T=\>skip, F=\>exception |
 
-### Output
+#### Output
 
 | quantiles: `array<double>` |
 | :-: |
@@ -131,9 +223,7 @@ FROM
 > There is the possibility quantiles are repeated because of too many `num_of_bins` or too few data.
 > If `auto_shrink` is true, skip duplicated quantiles. If not, throw an exception.
 
-## [UDF] `feature_binning(features, quantiles_map)/(weight, quantiles)`
-
-### Variation: A
+### UDF `feature_binning(features, quantiles_map)`
 
 #### Input 
 
@@ -147,7 +237,7 @@ FROM
 | :-: |
 | serialized and binned features |
 
-### Variation: B
+### UDF `feature_binning((weight, quantiles)`
 
 #### Input
 
diff --git a/docs/gitbook/misc/funcs.md b/docs/gitbook/misc/funcs.md
index 1b1b280..d860dba 100644
--- a/docs/gitbook/misc/funcs.md
+++ b/docs/gitbook/misc/funcs.md
@@ -263,7 +263,41 @@ Reference: <a href="https://papers.nips.cc/paper/3848-adaptive-regularization-of
 
 - `build_bins(number weight, const int num_of_bins[, const boolean auto_shrink = false])` - Return quantiles representing bins: array&lt;double&gt;
 
-- `feature_binning(array<features::string> features, const map<string, array<number>> quantiles_map)` / _FUNC_(number weight, const array&lt;number&gt; quantiles) - Returns binned features as an array&lt;features::string&gt; / bin ID as int
+- `feature_binning(array<features::string> features, map<string, array<number>> quantiles_map)` - returns a binned feature vector as an array&lt;features::string&gt; _FUNC_(number weight, array&lt;number&gt; quantiles) - returns bin ID as int
+  ```sql
+  WITH extracted as (
+    select 
+      extract_feature(feature) as index,
+      extract_weight(feature) as value
+    from
+      input l
+      LATERAL VIEW explode(features) r as feature
+  ),
+  mapping as (
+    select
+      index, 
+      build_bins(value, 5, true) as quantiles -- 5 bins with auto bin shrinking
+    from
+      extracted
+    group by
+      index
+  ),
+  bins as (
+     select 
+      to_map(index, quantiles) as quantiles 
+     from
+      mapping
+  )
+  select
+    l.features as original,
+    feature_binning(l.features, r.quantiles) as features
+  from
+    input l
+    cross join bins r
+
+  > ["name#Jacob","gender#Male","age:20.0"] ["name#Jacob","gender#Male","age:2"]
+  > ["name#Isabella","gender#Female","age:20.0"]    ["name#Isabella","gender#Female","age:2"]
+  ```
 
 ## Feature format conversion