You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hivemall.apache.org by my...@apache.org on 2019/02/08 06:11:00 UTC

[incubator-hivemall] branch master updated (ab1ce5d -> 29147ef)

This is an automated email from the ASF dual-hosted git repository.

myui pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-hivemall.git.


    from ab1ce5d  Renamed map_index UDF to map_get
     new 763aa56  Applied refactoring and documentation improvement
     new 29147ef  Refined tutorial documents

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../main/java/hivemall/tools/map/MapGetUDF.java    |  6 +-
 .../java/hivemall/tools/map/MapKeyValuesUDF.java   |  4 +-
 .../java/hivemall/tools/map/MergeMapsUDAF.java     |  2 +-
 .../main/java/hivemall/tools/map/UDAFToMap.java    | 13 ++++-
 .../main/java/hivemall/tools/math/IsFiniteUDF.java |  3 +-
 .../main/java/hivemall/tools/math/L2NormUDAF.java  | 10 +++-
 core/src/main/java/hivemall/tools/math/NanUDF.java |  3 +-
 .../hivemall/tools/math/SigmoidGenericUDF.java     | 16 ++++-
 .../hivemall/tools/matrix/TransposeAndDotUDAF.java | 68 +++++++++++++---------
 .../java/hivemall/docs/FuncsListGeneratorMojo.java |  3 +-
 10 files changed, 89 insertions(+), 39 deletions(-)


[incubator-hivemall] 02/02: Refined tutorial documents

Posted by my...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

myui pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-hivemall.git

commit 29147ef4566aed483e7e4ca575e95377c131fe31
Author: Makoto Yui <my...@apache.org>
AuthorDate: Fri Feb 8 15:10:54 2019 +0900

    Refined tutorial documents
---
 core/src/main/java/hivemall/tools/map/MapGetUDF.java     |  6 ++++--
 .../main/java/hivemall/tools/map/MapKeyValuesUDF.java    |  4 ++--
 core/src/main/java/hivemall/tools/map/MergeMapsUDAF.java |  2 +-
 core/src/main/java/hivemall/tools/map/UDAFToMap.java     | 13 ++++++++++++-
 core/src/main/java/hivemall/tools/math/IsFiniteUDF.java  |  3 ++-
 core/src/main/java/hivemall/tools/math/L2NormUDAF.java   | 10 +++++++++-
 core/src/main/java/hivemall/tools/math/NanUDF.java       |  3 ++-
 .../main/java/hivemall/tools/math/SigmoidGenericUDF.java | 16 +++++++++++++++-
 .../main/java/hivemall/docs/FuncsListGeneratorMojo.java  |  3 +--
 9 files changed, 48 insertions(+), 12 deletions(-)

diff --git a/core/src/main/java/hivemall/tools/map/MapGetUDF.java b/core/src/main/java/hivemall/tools/map/MapGetUDF.java
index 3ea1138..27e1c22 100644
--- a/core/src/main/java/hivemall/tools/map/MapGetUDF.java
+++ b/core/src/main/java/hivemall/tools/map/MapGetUDF.java
@@ -35,8 +35,10 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
 
 //@formatter:off
 @Description(name = "map_get",
-        value = "_FUNC_(MAP<K> a, K n) - Returns the value corresponding to the key in the map",
-        extended = "WITH tmp as (\n" +
+        value = "_FUNC_(MAP<K> a, K n) - Returns the value corresponding to the key in the map.",
+        extended = "Note this is a workaround for a Hive issue that non-constant expression for map indexes not supported.\n" +
+                "See https://issues.apache.org/jira/browse/HIVE-1955\n\n" +
+                "WITH tmp as (\n" +
                 "  SELECT \"one\" as key\n" +
                 "  UNION ALL\n" +
                 "  SELECT \"two\" as key\n" +
diff --git a/core/src/main/java/hivemall/tools/map/MapKeyValuesUDF.java b/core/src/main/java/hivemall/tools/map/MapKeyValuesUDF.java
index 3992f9e..b2c0c75 100644
--- a/core/src/main/java/hivemall/tools/map/MapKeyValuesUDF.java
+++ b/core/src/main/java/hivemall/tools/map/MapKeyValuesUDF.java
@@ -38,9 +38,9 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
 
 @Description(name = "map_key_values",
-        value = "_FUNC_(map) - " + "Returns a array of key-value pairs.",
+        value = "_FUNC_(MAP<K, V> map) - " + "Returns a array of key-value pairs in array<named_struct<key,value>>",
         extended = "SELECT map_key_values(map(\"one\",1,\"two\",2));\n\n"
-                + "[{\"key\":\"one\",\"value\":1},{\"key\":\"two\",\"value\":2}]")
+                + "> [{\"key\":\"one\",\"value\":1},{\"key\":\"two\",\"value\":2}]")
 @UDFType(deterministic = true, stateful = false)
 public final class MapKeyValuesUDF extends GenericUDF {
 
diff --git a/core/src/main/java/hivemall/tools/map/MergeMapsUDAF.java b/core/src/main/java/hivemall/tools/map/MergeMapsUDAF.java
index e4a2516..fc25326 100644
--- a/core/src/main/java/hivemall/tools/map/MergeMapsUDAF.java
+++ b/core/src/main/java/hivemall/tools/map/MergeMapsUDAF.java
@@ -41,7 +41,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 
 //@formatter:off
 @Description(name = "merge_maps",
-        value = "_FUNC_(x) - Returns a map which contains the union of an aggregation of maps."
+        value = "_FUNC_(Map x) - Returns a map which contains the union of an aggregation of maps."
                 + " Note that an existing value of a key can be replaced with the other duplicate key entry.",
         extended = "SELECT \n" + 
                 "  merge_maps(m) \n" + 
diff --git a/core/src/main/java/hivemall/tools/map/UDAFToMap.java b/core/src/main/java/hivemall/tools/map/UDAFToMap.java
index b203909..c19852d 100644
--- a/core/src/main/java/hivemall/tools/map/UDAFToMap.java
+++ b/core/src/main/java/hivemall/tools/map/UDAFToMap.java
@@ -44,8 +44,19 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
  * 
  * @see https://cwiki.apache.org/Hive/genericudafcasestudy.html
  */
+// @formatter:off
 @Description(name = "to_map",
-        value = "_FUNC_(key, value) - Convert two aggregated columns into a key-value map")
+        value = "_FUNC_(key, value) - Convert two aggregated columns into a key-value map",
+        extended = "WITH input as (\n" + 
+                "  select 'aaa' as key, 111 as value\n" + 
+                "  UNION all\n" + 
+                "  select 'bbb' as key, 222 as value\n" + 
+                ")\n" + 
+                "select to_map(key, value)\n" + 
+                "from input;\n" + 
+                "\n" + 
+                "> {\"bbb\":222,\"aaa\":111}")
+// @formatter:on
 public class UDAFToMap extends AbstractGenericUDAFResolver {
 
     @Override
diff --git a/core/src/main/java/hivemall/tools/math/IsFiniteUDF.java b/core/src/main/java/hivemall/tools/math/IsFiniteUDF.java
index 8c1f83c..4d6550f 100644
--- a/core/src/main/java/hivemall/tools/math/IsFiniteUDF.java
+++ b/core/src/main/java/hivemall/tools/math/IsFiniteUDF.java
@@ -21,7 +21,8 @@ package hivemall.tools.math;
 import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.UDF;
 
-@Description(name = "is_finite", value = "_FUNC_(x) - Determine if x is infinite.")
+@Description(name = "is_finite", value = "_FUNC_(x) - Determine if x is finite.",
+        extended = "SELECT is_finite(333), is_finite(infinity());\n" + "> true false")
 public final class IsFiniteUDF extends UDF {
     public Boolean evaluate(Double num) {
         if (num == null) {
diff --git a/core/src/main/java/hivemall/tools/math/L2NormUDAF.java b/core/src/main/java/hivemall/tools/math/L2NormUDAF.java
index 921272a..dc65801 100644
--- a/core/src/main/java/hivemall/tools/math/L2NormUDAF.java
+++ b/core/src/main/java/hivemall/tools/math/L2NormUDAF.java
@@ -24,9 +24,17 @@ import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
 
+// @formatter:off
 @SuppressWarnings("deprecation")
 @Description(name = "l2_norm",
-        value = "_FUNC_(double xi) - Return L2 norm of a vector which has the given values in each dimension")
+        value = "_FUNC_(double x) - Return a L2 norm of the given input x.",
+        extended = "WITH input as (\n" + 
+                "  select generate_series(1,3) as v\n" + 
+                ")\n" + 
+                "select l2_norm(v) as l2norm\n" + 
+                "from input;\n" + 
+                "> 3.7416573867739413 = sqrt(1^2+2^2+3^2))")
+// @formatter:on
 public final class L2NormUDAF extends UDAF {
 
     public static class Evaluator implements UDAFEvaluator {
diff --git a/core/src/main/java/hivemall/tools/math/NanUDF.java b/core/src/main/java/hivemall/tools/math/NanUDF.java
index 51a6c1a..f00a5ba 100644
--- a/core/src/main/java/hivemall/tools/math/NanUDF.java
+++ b/core/src/main/java/hivemall/tools/math/NanUDF.java
@@ -21,7 +21,8 @@ package hivemall.tools.math;
 import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.UDF;
 
-@Description(name = "nan", value = "_FUNC_() - Returns the constant representing not-a-number.")
+@Description(name = "nan", value = "_FUNC_() - Returns the constant representing not-a-number.",
+        extended = "SELECT nan(), is_nan(nan());\n" + "> NaN true")
 public final class NanUDF extends UDF {
     public double evaluate() {
         return Double.NaN;
diff --git a/core/src/main/java/hivemall/tools/math/SigmoidGenericUDF.java b/core/src/main/java/hivemall/tools/math/SigmoidGenericUDF.java
index 881d94d..d097a18 100644
--- a/core/src/main/java/hivemall/tools/math/SigmoidGenericUDF.java
+++ b/core/src/main/java/hivemall/tools/math/SigmoidGenericUDF.java
@@ -37,7 +37,21 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectIn
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
 import org.apache.hadoop.io.FloatWritable;
 
-@Description(name = "sigmoid", value = "_FUNC_(x) - Returns 1.0 / (1.0 + exp(-x))")
+// @formatter:off
+@Description(name = "sigmoid", value = "_FUNC_(x) - Returns 1.0 / (1.0 + exp(-x))", 
+            extended = "WITH input as (\n" + 
+                    "  SELECT 3.0 as x\n" + 
+                    "  UNION ALL\n" + 
+                    "  SELECT -3.0 as x\n" + 
+                    ")\n" + 
+                    "select \n" + 
+                    "  1.0 / (1.0 + exp(-x)),\n" + 
+                    "  sigmoid(x)\n" + 
+                    "from\n" + 
+                    "  input;\n" + 
+                    "> 0.04742587317756678   0.04742587357759476\n" + 
+                    "> 0.9525741268224334    0.9525741338729858")
+// @formatter:on
 @UDFType(deterministic = true, stateful = false)
 public final class SigmoidGenericUDF extends GenericUDF {
 
diff --git a/tools/hivemall-docs/src/main/java/hivemall/docs/FuncsListGeneratorMojo.java b/tools/hivemall-docs/src/main/java/hivemall/docs/FuncsListGeneratorMojo.java
index 0d58b3f..72645c0 100644
--- a/tools/hivemall-docs/src/main/java/hivemall/docs/FuncsListGeneratorMojo.java
+++ b/tools/hivemall-docs/src/main/java/hivemall/docs/FuncsListGeneratorMojo.java
@@ -88,14 +88,13 @@ public class FuncsListGeneratorMojo extends AbstractMojo {
         genericFuncsHeaders.put("# Map", Collections.singletonList("hivemall.tools.map"));
         genericFuncsHeaders.put("# MapReduce", Collections.singletonList("hivemall.tools.mapred"));
         genericFuncsHeaders.put("# Math", Collections.singletonList("hivemall.tools.math"));
-        genericFuncsHeaders.put("# Matrix", Collections.singletonList("hivemall.tools.matrix"));
+        genericFuncsHeaders.put("# Vector/Matrix", Arrays.asList("hivemall.tools.matrix", "hivemall.tools.vector"));
         genericFuncsHeaders.put("# Sanity Checks",
             Collections.singletonList("hivemall.tools.sanity"));
         genericFuncsHeaders.put("# Text processing",
             Collections.singletonList("hivemall.tools.text"));
         genericFuncsHeaders.put("# Timeseries",
             Collections.singletonList("hivemall.tools.timeseries"));
-        genericFuncsHeaders.put("# Vector", Collections.singletonList("hivemall.tools.vector"));
         genericFuncsHeaders.put("# Others", Collections.singletonList("hivemall.tools"));
     }
 


[incubator-hivemall] 01/02: Applied refactoring and documentation improvement

Posted by my...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

myui pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-hivemall.git

commit 763aa5678cf922cb0e2340ba0f7b5358d4bfcffe
Author: Makoto Yui <my...@apache.org>
AuthorDate: Fri Feb 8 15:10:29 2019 +0900

    Applied refactoring and documentation improvement
---
 .../hivemall/tools/matrix/TransposeAndDotUDAF.java | 68 +++++++++++++---------
 1 file changed, 41 insertions(+), 27 deletions(-)

diff --git a/core/src/main/java/hivemall/tools/matrix/TransposeAndDotUDAF.java b/core/src/main/java/hivemall/tools/matrix/TransposeAndDotUDAF.java
index d04ddc1..1b448cb 100644
--- a/core/src/main/java/hivemall/tools/matrix/TransposeAndDotUDAF.java
+++ b/core/src/main/java/hivemall/tools/matrix/TransposeAndDotUDAF.java
@@ -44,9 +44,23 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
 
+// @formatter:off
 @Description(name = "transpose_and_dot",
-        value = "_FUNC_(array<number> matrix0_row, array<number> matrix1_row)"
-                + " - Returns dot(matrix0.T, matrix1) as array<array<double>>, shape = (matrix0.#cols, matrix1.#cols)")
+        value = "_FUNC_(array<number> X, array<number> Y)"
+                + " - Returns dot(X.T, Y) as array<array<double>>, shape = (X.#cols, Y.#cols)",
+        extended = "WITH input as (\n" + 
+                "  select array(1.0, 2.0, 3.0, 4.0) as x, array(1, 2) as y\n" + 
+                "  UNION ALL\n" + 
+                "  select array(2.0, 3.0, 4.0, 5.0) as x, array(1, 2) as y\n" + 
+                ")\n" + 
+                "select\n" + 
+                "  transpose_and_dot(x, y) as xy,\n" + 
+                "  transpose_and_dot(y, x) as yx\n" + 
+                "from \n" + 
+                "  input;\n\n" + 
+                "> [[\"3.0\",\"6.0\"],[\"5.0\",\"10.0\"],[\"7.0\",\"14.0\"],[\"9.0\",\"18.0\"]]" + 
+                "   [[\"3.0\",\"5.0\",\"7.0\",\"9.0\"],[\"6.0\",\"10.0\",\"14.0\",\"18.0\"]]\n")
+// @formatter:on
 public final class TransposeAndDotUDAF extends AbstractGenericUDAFResolver {
 
     @Override
@@ -75,18 +89,18 @@ public final class TransposeAndDotUDAF extends AbstractGenericUDAFResolver {
 
     static final class TransposeAndDotUDAFEvaluator extends GenericUDAFEvaluator {
         // PARTIAL1 and COMPLETE
-        private ListObjectInspector matrix0RowOI;
-        private PrimitiveObjectInspector matrix0ElOI;
-        private ListObjectInspector matrix1RowOI;
-        private PrimitiveObjectInspector matrix1ElOI;
+        private ListObjectInspector xRowOI;
+        private PrimitiveObjectInspector xElemOI;
+        private ListObjectInspector yRowOI;
+        private PrimitiveObjectInspector yElemOI;
 
         // PARTIAL2 and FINAL
         private ListObjectInspector aggMatrixOI;
         private ListObjectInspector aggMatrixRowOI;
-        private DoubleObjectInspector aggMatrixElOI;
+        private DoubleObjectInspector aggMatrixElemOI;
 
-        private double[] matrix0Row;
-        private double[] matrix1Row;
+        private double[] xRow;
+        private double[] yRow;
 
         @AggregationType(estimable = true)
         static class TransposeAndDotAggregationBuffer extends AbstractAggregationBuffer {
@@ -116,17 +130,17 @@ public final class TransposeAndDotUDAF extends AbstractGenericUDAFResolver {
             super.init(mode, OIs);
 
             if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
-                this.matrix0RowOI = HiveUtils.asListOI(OIs[0]);
-                this.matrix0ElOI = HiveUtils.asDoubleCompatibleOI(
-                    matrix0RowOI.getListElementObjectInspector());
-                this.matrix1RowOI = HiveUtils.asListOI(OIs[1]);
-                this.matrix1ElOI = HiveUtils.asDoubleCompatibleOI(
-                    matrix1RowOI.getListElementObjectInspector());
+                this.xRowOI = HiveUtils.asListOI(OIs[0]);
+                this.xElemOI = HiveUtils.asDoubleCompatibleOI(
+                    xRowOI.getListElementObjectInspector());
+                this.yRowOI = HiveUtils.asListOI(OIs[1]);
+                this.yElemOI = HiveUtils.asDoubleCompatibleOI(
+                    yRowOI.getListElementObjectInspector());
             } else {
                 this.aggMatrixOI = HiveUtils.asListOI(OIs[0]);
                 this.aggMatrixRowOI =
                         HiveUtils.asListOI(aggMatrixOI.getListElementObjectInspector());
-                this.aggMatrixElOI =
+                this.aggMatrixElemOI =
                         HiveUtils.asDoubleOI(aggMatrixRowOI.getListElementObjectInspector());
             }
 
@@ -160,23 +174,23 @@ public final class TransposeAndDotUDAF extends AbstractGenericUDAFResolver {
 
             final TransposeAndDotAggregationBuffer myAgg = (TransposeAndDotAggregationBuffer) agg;
 
-            if (matrix0Row == null) {
-                matrix0Row = new double[matrix0RowOI.getListLength(matrix0RowObj)];
+            if (xRow == null) {
+                xRow = new double[xRowOI.getListLength(matrix0RowObj)];
             }
-            if (matrix1Row == null) {
-                matrix1Row = new double[matrix1RowOI.getListLength(matrix1RowObj)];
+            if (yRow == null) {
+                yRow = new double[yRowOI.getListLength(matrix1RowObj)];
             }
 
-            HiveUtils.toDoubleArray(matrix0RowObj, matrix0RowOI, matrix0ElOI, matrix0Row, false);
-            HiveUtils.toDoubleArray(matrix1RowObj, matrix1RowOI, matrix1ElOI, matrix1Row, false);
+            HiveUtils.toDoubleArray(matrix0RowObj, xRowOI, xElemOI, xRow, false);
+            HiveUtils.toDoubleArray(matrix1RowObj, yRowOI, yElemOI, yRow, false);
 
             if (myAgg.aggMatrix == null) {
-                myAgg.init(matrix0Row.length, matrix1Row.length);
+                myAgg.init(xRow.length, yRow.length);
             }
 
-            for (int i = 0; i < matrix0Row.length; i++) {
-                for (int j = 0; j < matrix1Row.length; j++) {
-                    myAgg.aggMatrix[i][j] += matrix0Row[i] * matrix1Row[j];
+            for (int i = 0; i < xRow.length; i++) {
+                for (int j = 0; j < yRow.length; j++) {
+                    myAgg.aggMatrix[i][j] += xRow[i] * yRow[j];
                 }
             }
         }
@@ -194,7 +208,7 @@ public final class TransposeAndDotUDAF extends AbstractGenericUDAFResolver {
             final int n = matrix.size();
             final double[] row = new double[aggMatrixRowOI.getListLength(matrix.get(0))];
             for (int i = 0; i < n; i++) {
-                HiveUtils.toDoubleArray(matrix.get(i), aggMatrixRowOI, aggMatrixElOI, row, false);
+                HiveUtils.toDoubleArray(matrix.get(i), aggMatrixRowOI, aggMatrixElemOI, row, false);
 
                 if (myAgg.aggMatrix == null) {
                     myAgg.init(n, row.length);