You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hivemall.apache.org by my...@apache.org on 2019/11/28 07:46:11 UTC

[incubator-hivemall] branch master updated: Minor refactoring and fixed function docs

This is an automated email from the ASF dual-hosted git repository.

myui pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-hivemall.git


The following commit(s) were added to refs/heads/master by this push:
     new a8d4865  Minor refactoring and fixed function docs
a8d4865 is described below

commit a8d4865eaf56dfda5bca74421f1fe41362373bab
Author: Makoto Yui <my...@apache.org>
AuthorDate: Thu Nov 28 16:46:02 2019 +0900

    Minor refactoring and fixed function docs
---
 core/src/main/java/hivemall/LearnerBaseUDTF.java   |  2 +-
 .../java/hivemall/tools/GenerateSeriesUDTF.java    | 18 +++----------
 core/src/main/java/hivemall/tools/TryCastUDF.java  |  2 +-
 .../java/hivemall/tools/aggr/MajorityVoteUDAF.java |  2 +-
 .../main/java/hivemall/tools/aggr/MaxByUDAF.java   |  2 +-
 .../main/java/hivemall/tools/aggr/MinByUDAF.java   |  2 +-
 .../main/java/hivemall/tools/array/ArangeUDF.java  |  2 +-
 .../main/java/hivemall/tools/array/ArgmaxUDF.java  |  2 +-
 .../main/java/hivemall/tools/array/ArgminUDF.java  |  2 +-
 .../main/java/hivemall/tools/array/ArgrankUDF.java |  2 +-
 .../main/java/hivemall/tools/array/ArgsortUDF.java |  4 +--
 .../main/java/hivemall/tools/math/IsFiniteUDF.java |  2 +-
 .../main/java/hivemall/tools/math/L2NormUDAF.java  |  2 +-
 core/src/main/java/hivemall/tools/math/NanUDF.java |  2 +-
 .../hivemall/tools/math/SigmoidGenericUDF.java     |  4 +--
 .../hivemall/tools/matrix/TransposeAndDotUDAF.java |  2 +-
 docs/gitbook/misc/generic_funcs.md                 | 30 +++++++++++-----------
 17 files changed, 35 insertions(+), 47 deletions(-)

diff --git a/core/src/main/java/hivemall/LearnerBaseUDTF.java b/core/src/main/java/hivemall/LearnerBaseUDTF.java
index 5407d0e..62a816f 100644
--- a/core/src/main/java/hivemall/LearnerBaseUDTF.java
+++ b/core/src/main/java/hivemall/LearnerBaseUDTF.java
@@ -117,7 +117,7 @@ public abstract class LearnerBaseUDTF extends UDTFWithOptions {
 
         CommandLine cl = null;
         if (argOIs.length >= 3) {
-            String rawArgs = HiveUtils.getConstString(argOIs[2]);
+            String rawArgs = HiveUtils.getConstString(argOIs, 2);
             cl = parseOptions(rawArgs);
 
             denseModel = cl.hasOption("dense");
diff --git a/core/src/main/java/hivemall/tools/GenerateSeriesUDTF.java b/core/src/main/java/hivemall/tools/GenerateSeriesUDTF.java
index bd4088f..8bcd40e 100644
--- a/core/src/main/java/hivemall/tools/GenerateSeriesUDTF.java
+++ b/core/src/main/java/hivemall/tools/GenerateSeriesUDTF.java
@@ -99,23 +99,11 @@ public final class GenerateSeriesUDTF extends GenericUDTF {
             throw new UDFArgumentException(
                 "Expected number of arguments is 2 or 3: " + argOIs.length);
         }
-        if (!HiveUtils.isIntegerOI(argOIs[0])) {
-            throw new UDFArgumentException(
-                "Expected Integer type for the first argument: " + argOIs[0].getTypeName());
-        }
-        if (!HiveUtils.isIntegerOI(argOIs[1])) {
-            throw new UDFArgumentException(
-                "Expected Integer type for the second argument: " + argOIs[1].getTypeName());
-        }
-        this.startOI = HiveUtils.asIntegerOI(argOIs[0]);
-        this.endOI = HiveUtils.asIntegerOI(argOIs[1]);
+        this.startOI = HiveUtils.asIntegerOI(argOIs, 0);
+        this.endOI = HiveUtils.asIntegerOI(argOIs, 1);
 
         if (argOIs.length == 3) {
-            if (!HiveUtils.isIntegerOI(argOIs[2])) {
-                throw new UDFArgumentException(
-                    "Expected Integer type for the third argument: " + argOIs[2].getTypeName());
-            }
-            this.stepOI = HiveUtils.asIntegerOI(argOIs[2]);
+            this.stepOI = HiveUtils.asIntegerOI(argOIs, 2);
         }
 
         this.returnLong = HiveUtils.isBigIntOI(startOI) || HiveUtils.isBigIntOI(endOI);
diff --git a/core/src/main/java/hivemall/tools/TryCastUDF.java b/core/src/main/java/hivemall/tools/TryCastUDF.java
index adb8328..a3df115 100644
--- a/core/src/main/java/hivemall/tools/TryCastUDF.java
+++ b/core/src/main/java/hivemall/tools/TryCastUDF.java
@@ -51,7 +51,7 @@ public final class TryCastUDF extends GenericUDF {
         }
 
         this.inputOI = argOIs[0];
-        String typeString = HiveUtils.getConstString(argOIs[1]);
+        String typeString = HiveUtils.getConstString(argOIs, 1);
 
         ObjectInspector outputOI = HiveUtils.getObjectInspector(typeString, true);
         this.converter = ObjectInspectorConverters.getConverter(inputOI, outputOI);
diff --git a/core/src/main/java/hivemall/tools/aggr/MajorityVoteUDAF.java b/core/src/main/java/hivemall/tools/aggr/MajorityVoteUDAF.java
index 89f0409..e2899fd 100644
--- a/core/src/main/java/hivemall/tools/aggr/MajorityVoteUDAF.java
+++ b/core/src/main/java/hivemall/tools/aggr/MajorityVoteUDAF.java
@@ -60,7 +60,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
                    "  majority_vote(k) as k\n" + 
                    "from \n" + 
                    "  data;\n" + 
-                   "> 2")
+                   "2")
 //@formatter:on
 public final class MajorityVoteUDAF extends AbstractGenericUDAFResolver {
 
diff --git a/core/src/main/java/hivemall/tools/aggr/MaxByUDAF.java b/core/src/main/java/hivemall/tools/aggr/MaxByUDAF.java
index ae89a70..0dd1250 100644
--- a/core/src/main/java/hivemall/tools/aggr/MaxByUDAF.java
+++ b/core/src/main/java/hivemall/tools/aggr/MaxByUDAF.java
@@ -57,7 +57,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
                 "  max_by(name, age) as name\n" + 
                 "from\n" + 
                 "  data;\n" + 
-                "> tom")
+                "tom")
 //@formatter:on
 public final class MaxByUDAF extends AbstractGenericUDAFResolver {
 
diff --git a/core/src/main/java/hivemall/tools/aggr/MinByUDAF.java b/core/src/main/java/hivemall/tools/aggr/MinByUDAF.java
index 1c001ea..23499cd 100644
--- a/core/src/main/java/hivemall/tools/aggr/MinByUDAF.java
+++ b/core/src/main/java/hivemall/tools/aggr/MinByUDAF.java
@@ -58,7 +58,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
                 "from\n" + 
                 "  data;\n" + 
                 "\n" + 
-                "> jake")
+                "jake")
 //@formatter:on
 public final class MinByUDAF extends AbstractGenericUDAFResolver {
 
diff --git a/core/src/main/java/hivemall/tools/array/ArangeUDF.java b/core/src/main/java/hivemall/tools/array/ArangeUDF.java
index 1097956..2618b26 100644
--- a/core/src/main/java/hivemall/tools/array/ArangeUDF.java
+++ b/core/src/main/java/hivemall/tools/array/ArangeUDF.java
@@ -51,7 +51,7 @@ extended = "SELECT arange(5), arange(1, 5), arange(1, 5, 1), arange(0, 5, 1);\n"
         "> 1, 3, 5\n" + 
         "\n" + 
         "SELECT arange(-1, -6, 2);\n" + 
-        "> -1, -3, -5")
+        "-1, -3, -5")
 // @formatter:on
 @UDFType(deterministic = true, stateful = false)
 public final class ArangeUDF extends GenericUDF {
diff --git a/core/src/main/java/hivemall/tools/array/ArgmaxUDF.java b/core/src/main/java/hivemall/tools/array/ArgmaxUDF.java
index d697907..0e8c132 100644
--- a/core/src/main/java/hivemall/tools/array/ArgmaxUDF.java
+++ b/core/src/main/java/hivemall/tools/array/ArgmaxUDF.java
@@ -35,7 +35,7 @@ import org.apache.hadoop.io.IntWritable;
 
 @Description(name = "argmax",
         value = "_FUNC_(array<T> a) - Returns the first index of the maximum value",
-        extended = "SELECT argmax(array(5,2,0,1));\n" + "> 0")
+        extended = "SELECT argmax(array(5,2,0,1));\n" + "0")
 @UDFType(deterministic = true, stateful = false)
 public final class ArgmaxUDF extends GenericUDF {
 
diff --git a/core/src/main/java/hivemall/tools/array/ArgminUDF.java b/core/src/main/java/hivemall/tools/array/ArgminUDF.java
index a2664b7..b24b755 100644
--- a/core/src/main/java/hivemall/tools/array/ArgminUDF.java
+++ b/core/src/main/java/hivemall/tools/array/ArgminUDF.java
@@ -35,7 +35,7 @@ import org.apache.hadoop.io.IntWritable;
 
 @Description(name = "argmin",
         value = "_FUNC_(array<T> a) - Returns the first index of the minimum value",
-        extended = "SELECT argmin(array(5,2,0,1));\n" + "> 2")
+        extended = "SELECT argmin(array(5,2,0,1));\n" + "2")
 @UDFType(deterministic = true, stateful = false)
 public final class ArgminUDF extends GenericUDF {
 
diff --git a/core/src/main/java/hivemall/tools/array/ArgrankUDF.java b/core/src/main/java/hivemall/tools/array/ArgrankUDF.java
index f283d39..54f7665 100644
--- a/core/src/main/java/hivemall/tools/array/ArgrankUDF.java
+++ b/core/src/main/java/hivemall/tools/array/ArgrankUDF.java
@@ -45,7 +45,7 @@ import org.apache.hadoop.io.IntWritable;
 @Description(name = "argrank",
         value = "_FUNC_(array<ANY> a) - Returns the indices that would sort an array.",
         extended = "SELECT argrank(array(5,2,0,1)), argsort(argsort(array(5,2,0,1)));\n" + 
-                "> [3, 2, 0, 1]     [3, 2, 0, 1]")
+                "[3, 2, 0, 1]     [3, 2, 0, 1]")
 // @formatter:on
 @UDFType(deterministic = true, stateful = false)
 public final class ArgrankUDF extends GenericUDF {
diff --git a/core/src/main/java/hivemall/tools/array/ArgsortUDF.java b/core/src/main/java/hivemall/tools/array/ArgsortUDF.java
index f8e6b8f..2404dd2 100644
--- a/core/src/main/java/hivemall/tools/array/ArgsortUDF.java
+++ b/core/src/main/java/hivemall/tools/array/ArgsortUDF.java
@@ -45,10 +45,10 @@ import org.apache.hadoop.io.IntWritable;
 @Description(name = "argsort",
         value = "_FUNC_(array<ANY> a) - Returns the indices that would sort an array.",
         extended = "SELECT argsort(array(5,2,0,1));\n" + 
-                "> 2, 3, 1, 0\n" + 
+                "2, 3, 1, 0\n" + 
                 "\n" + 
                 "SELECT array_slice(array(5,2,0,1), argsort(array(5,2,0,1)));\n" + 
-                "> 0, 1, 2, 5")
+                "0, 1, 2, 5")
 // @formatter:on
 @UDFType(deterministic = true, stateful = false)
 public final class ArgsortUDF extends GenericUDF {
diff --git a/core/src/main/java/hivemall/tools/math/IsFiniteUDF.java b/core/src/main/java/hivemall/tools/math/IsFiniteUDF.java
index 4d6550f..f0b6e94 100644
--- a/core/src/main/java/hivemall/tools/math/IsFiniteUDF.java
+++ b/core/src/main/java/hivemall/tools/math/IsFiniteUDF.java
@@ -22,7 +22,7 @@ import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.UDF;
 
 @Description(name = "is_finite", value = "_FUNC_(x) - Determine if x is finite.",
-        extended = "SELECT is_finite(333), is_finite(infinity());\n" + "> true false")
+        extended = "SELECT is_finite(333), is_finite(infinity());\n" + "true false")
 public final class IsFiniteUDF extends UDF {
     public Boolean evaluate(Double num) {
         if (num == null) {
diff --git a/core/src/main/java/hivemall/tools/math/L2NormUDAF.java b/core/src/main/java/hivemall/tools/math/L2NormUDAF.java
index dc65801..50ec5cf 100644
--- a/core/src/main/java/hivemall/tools/math/L2NormUDAF.java
+++ b/core/src/main/java/hivemall/tools/math/L2NormUDAF.java
@@ -33,7 +33,7 @@ import org.apache.hadoop.hive.serde2.io.DoubleWritable;
                 ")\n" + 
                 "select l2_norm(v) as l2norm\n" + 
                 "from input;\n" + 
-                "> 3.7416573867739413 = sqrt(1^2+2^2+3^2))")
+                "3.7416573867739413 = sqrt(1^2+2^2+3^2))")
 // @formatter:on
 public final class L2NormUDAF extends UDAF {
 
diff --git a/core/src/main/java/hivemall/tools/math/NanUDF.java b/core/src/main/java/hivemall/tools/math/NanUDF.java
index f00a5ba..d316631 100644
--- a/core/src/main/java/hivemall/tools/math/NanUDF.java
+++ b/core/src/main/java/hivemall/tools/math/NanUDF.java
@@ -22,7 +22,7 @@ import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.UDF;
 
 @Description(name = "nan", value = "_FUNC_() - Returns the constant representing not-a-number.",
-        extended = "SELECT nan(), is_nan(nan());\n" + "> NaN true")
+        extended = "SELECT nan(), is_nan(nan());\n" + "NaN true")
 public final class NanUDF extends UDF {
     public double evaluate() {
         return Double.NaN;
diff --git a/core/src/main/java/hivemall/tools/math/SigmoidGenericUDF.java b/core/src/main/java/hivemall/tools/math/SigmoidGenericUDF.java
index d097a18..8540aa6 100644
--- a/core/src/main/java/hivemall/tools/math/SigmoidGenericUDF.java
+++ b/core/src/main/java/hivemall/tools/math/SigmoidGenericUDF.java
@@ -49,8 +49,8 @@ import org.apache.hadoop.io.FloatWritable;
                     "  sigmoid(x)\n" + 
                     "from\n" + 
                     "  input;\n" + 
-                    "> 0.04742587317756678   0.04742587357759476\n" + 
-                    "> 0.9525741268224334    0.9525741338729858")
+                    "0.04742587317756678   0.04742587357759476\n" + 
+                    "0.9525741268224334    0.9525741338729858")
 // @formatter:on
 @UDFType(deterministic = true, stateful = false)
 public final class SigmoidGenericUDF extends GenericUDF {
diff --git a/core/src/main/java/hivemall/tools/matrix/TransposeAndDotUDAF.java b/core/src/main/java/hivemall/tools/matrix/TransposeAndDotUDAF.java
index 4c52ebe..8366c83 100644
--- a/core/src/main/java/hivemall/tools/matrix/TransposeAndDotUDAF.java
+++ b/core/src/main/java/hivemall/tools/matrix/TransposeAndDotUDAF.java
@@ -58,7 +58,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectIn
                 "  transpose_and_dot(y, x) as yx\n" + 
                 "from \n" + 
                 "  input;\n\n" + 
-                "> [[\"3.0\",\"6.0\"],[\"5.0\",\"10.0\"],[\"7.0\",\"14.0\"],[\"9.0\",\"18.0\"]]" + 
+                "[[\"3.0\",\"6.0\"],[\"5.0\",\"10.0\"],[\"7.0\",\"14.0\"],[\"9.0\",\"18.0\"]]" + 
                 "   [[\"3.0\",\"5.0\",\"7.0\",\"9.0\"],[\"6.0\",\"10.0\",\"14.0\",\"18.0\"]]\n")
 // @formatter:on
 public final class TransposeAndDotUDAF extends AbstractGenericUDAFResolver {
diff --git a/docs/gitbook/misc/generic_funcs.md b/docs/gitbook/misc/generic_funcs.md
index fb0b240..dc195a6 100644
--- a/docs/gitbook/misc/generic_funcs.md
+++ b/docs/gitbook/misc/generic_funcs.md
@@ -34,7 +34,7 @@ This page describes a list of useful Hivemall generic functions. See also a [lis
     majority_vote(k) as k
   from 
     data;
-  > 2
+  2
   ```
 
 - `max_by(x, y)` - Returns the value of x associated with the maximum value of y over all input values.
@@ -51,7 +51,7 @@ This page describes a list of useful Hivemall generic functions. See also a [lis
     max_by(name, age) as name
   from
     data;
-  > tom
+  tom
   ```
 
 - `min_by(x, y)` - Returns the value of x associated with the minimum value of y over all input values.
@@ -69,7 +69,7 @@ This page describes a list of useful Hivemall generic functions. See also a [lis
   from
     data;
 
-  > jake
+  jake
   ```
 
 # Array
@@ -83,34 +83,34 @@ This page describes a list of useful Hivemall generic functions. See also a [lis
   > 1, 3, 5
 
   SELECT arange(-1, -6, 2);
-  > -1, -3, -5
+  -1, -3, -5
   ```
 
 - `argmax(array<T> a)` - Returns the first index of the maximum value
   ```sql
   SELECT argmax(array(5,2,0,1));
-  > 0
+  0
   ```
 
 - `argmin(array<T> a)` - Returns the first index of the minimum value
   ```sql
   SELECT argmin(array(5,2,0,1));
-  > 2
+  2
   ```
 
 - `argrank(array<ANY> a)` - Returns the indices that would sort an array.
   ```sql
   SELECT argrank(array(5,2,0,1)), argsort(argsort(array(5,2,0,1)));
-  > [3, 2, 0, 1]     [3, 2, 0, 1]
+  [3, 2, 0, 1]     [3, 2, 0, 1]
   ```
 
 - `argsort(array<ANY> a)` - Returns the indices that would sort an array.
   ```sql
   SELECT argsort(array(5,2,0,1));
-  > 2, 3, 1, 0
+  2, 3, 1, 0
 
   SELECT array_slice(array(5,2,0,1), argsort(array(5,2,0,1)));
-  > 0, 1, 2, 5
+  0, 1, 2, 5
   ```
 
 - `array_append(array<T> arr, T elem)` - Append an element to the end of an array
@@ -712,7 +712,7 @@ This page describes a list of useful Hivemall generic functions. See also a [lis
 - `is_finite(x)` - Determine if x is finite.
   ```sql
   SELECT is_finite(333), is_finite(infinity());
-  > true false
+  true false
   ```
 
 - `is_infinite(x)` - Determine if x is infinite.
@@ -726,13 +726,13 @@ This page describes a list of useful Hivemall generic functions. See also a [lis
   )
   select l2_norm(v) as l2norm
   from input;
-  > 3.7416573867739413 = sqrt(1^2+2^2+3^2))
+  3.7416573867739413 = sqrt(1^2+2^2+3^2))
   ```
 
 - `nan()` - Returns the constant representing not-a-number.
   ```sql
   SELECT nan(), is_nan(nan());
-  > NaN true
+  NaN true
   ```
 
 - `sigmoid(x)` - Returns 1.0 / (1.0 + exp(-x))
@@ -747,8 +747,8 @@ This page describes a list of useful Hivemall generic functions. See also a [lis
     sigmoid(x)
   from
     input;
-  > 0.04742587317756678   0.04742587357759476
-  > 0.9525741268224334    0.9525741338729858
+  0.04742587317756678   0.04742587357759476
+  0.9525741268224334    0.9525741338729858
   ```
 
 # Vector/Matrix
@@ -766,7 +766,7 @@ This page describes a list of useful Hivemall generic functions. See also a [lis
   from 
     input;
 
-  > [["3.0","6.0"],["5.0","10.0"],["7.0","14.0"],["9.0","18.0"]]   [["3.0","5.0","7.0","9.0"],["6.0","10.0","14.0","18.0"]]
+  [["3.0","6.0"],["5.0","10.0"],["7.0","14.0"],["9.0","18.0"]]   [["3.0","5.0","7.0","9.0"],["6.0","10.0","14.0","18.0"]]
 
   ```