You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hivemall.apache.org by my...@apache.org on 2019/11/28 07:46:11 UTC
[incubator-hivemall] branch master updated: Minor refactoring and
fixed function docs
This is an automated email from the ASF dual-hosted git repository.
myui pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-hivemall.git
The following commit(s) were added to refs/heads/master by this push:
new a8d4865 Minor refactoring and fixed function docs
a8d4865 is described below
commit a8d4865eaf56dfda5bca74421f1fe41362373bab
Author: Makoto Yui <my...@apache.org>
AuthorDate: Thu Nov 28 16:46:02 2019 +0900
Minor refactoring and fixed function docs
---
core/src/main/java/hivemall/LearnerBaseUDTF.java | 2 +-
.../java/hivemall/tools/GenerateSeriesUDTF.java | 18 +++----------
core/src/main/java/hivemall/tools/TryCastUDF.java | 2 +-
.../java/hivemall/tools/aggr/MajorityVoteUDAF.java | 2 +-
.../main/java/hivemall/tools/aggr/MaxByUDAF.java | 2 +-
.../main/java/hivemall/tools/aggr/MinByUDAF.java | 2 +-
.../main/java/hivemall/tools/array/ArangeUDF.java | 2 +-
.../main/java/hivemall/tools/array/ArgmaxUDF.java | 2 +-
.../main/java/hivemall/tools/array/ArgminUDF.java | 2 +-
.../main/java/hivemall/tools/array/ArgrankUDF.java | 2 +-
.../main/java/hivemall/tools/array/ArgsortUDF.java | 4 +--
.../main/java/hivemall/tools/math/IsFiniteUDF.java | 2 +-
.../main/java/hivemall/tools/math/L2NormUDAF.java | 2 +-
core/src/main/java/hivemall/tools/math/NanUDF.java | 2 +-
.../hivemall/tools/math/SigmoidGenericUDF.java | 4 +--
.../hivemall/tools/matrix/TransposeAndDotUDAF.java | 2 +-
docs/gitbook/misc/generic_funcs.md | 30 +++++++++++-----------
17 files changed, 35 insertions(+), 47 deletions(-)
diff --git a/core/src/main/java/hivemall/LearnerBaseUDTF.java b/core/src/main/java/hivemall/LearnerBaseUDTF.java
index 5407d0e..62a816f 100644
--- a/core/src/main/java/hivemall/LearnerBaseUDTF.java
+++ b/core/src/main/java/hivemall/LearnerBaseUDTF.java
@@ -117,7 +117,7 @@ public abstract class LearnerBaseUDTF extends UDTFWithOptions {
CommandLine cl = null;
if (argOIs.length >= 3) {
- String rawArgs = HiveUtils.getConstString(argOIs[2]);
+ String rawArgs = HiveUtils.getConstString(argOIs, 2);
cl = parseOptions(rawArgs);
denseModel = cl.hasOption("dense");
diff --git a/core/src/main/java/hivemall/tools/GenerateSeriesUDTF.java b/core/src/main/java/hivemall/tools/GenerateSeriesUDTF.java
index bd4088f..8bcd40e 100644
--- a/core/src/main/java/hivemall/tools/GenerateSeriesUDTF.java
+++ b/core/src/main/java/hivemall/tools/GenerateSeriesUDTF.java
@@ -99,23 +99,11 @@ public final class GenerateSeriesUDTF extends GenericUDTF {
throw new UDFArgumentException(
"Expected number of arguments is 2 or 3: " + argOIs.length);
}
- if (!HiveUtils.isIntegerOI(argOIs[0])) {
- throw new UDFArgumentException(
- "Expected Integer type for the first argument: " + argOIs[0].getTypeName());
- }
- if (!HiveUtils.isIntegerOI(argOIs[1])) {
- throw new UDFArgumentException(
- "Expected Integer type for the second argument: " + argOIs[1].getTypeName());
- }
- this.startOI = HiveUtils.asIntegerOI(argOIs[0]);
- this.endOI = HiveUtils.asIntegerOI(argOIs[1]);
+ this.startOI = HiveUtils.asIntegerOI(argOIs, 0);
+ this.endOI = HiveUtils.asIntegerOI(argOIs, 1);
if (argOIs.length == 3) {
- if (!HiveUtils.isIntegerOI(argOIs[2])) {
- throw new UDFArgumentException(
- "Expected Integer type for the third argument: " + argOIs[2].getTypeName());
- }
- this.stepOI = HiveUtils.asIntegerOI(argOIs[2]);
+ this.stepOI = HiveUtils.asIntegerOI(argOIs, 2);
}
this.returnLong = HiveUtils.isBigIntOI(startOI) || HiveUtils.isBigIntOI(endOI);
diff --git a/core/src/main/java/hivemall/tools/TryCastUDF.java b/core/src/main/java/hivemall/tools/TryCastUDF.java
index adb8328..a3df115 100644
--- a/core/src/main/java/hivemall/tools/TryCastUDF.java
+++ b/core/src/main/java/hivemall/tools/TryCastUDF.java
@@ -51,7 +51,7 @@ public final class TryCastUDF extends GenericUDF {
}
this.inputOI = argOIs[0];
- String typeString = HiveUtils.getConstString(argOIs[1]);
+ String typeString = HiveUtils.getConstString(argOIs, 1);
ObjectInspector outputOI = HiveUtils.getObjectInspector(typeString, true);
this.converter = ObjectInspectorConverters.getConverter(inputOI, outputOI);
diff --git a/core/src/main/java/hivemall/tools/aggr/MajorityVoteUDAF.java b/core/src/main/java/hivemall/tools/aggr/MajorityVoteUDAF.java
index 89f0409..e2899fd 100644
--- a/core/src/main/java/hivemall/tools/aggr/MajorityVoteUDAF.java
+++ b/core/src/main/java/hivemall/tools/aggr/MajorityVoteUDAF.java
@@ -60,7 +60,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
" majority_vote(k) as k\n" +
"from \n" +
" data;\n" +
- "> 2")
+ "2")
//@formatter:on
public final class MajorityVoteUDAF extends AbstractGenericUDAFResolver {
diff --git a/core/src/main/java/hivemall/tools/aggr/MaxByUDAF.java b/core/src/main/java/hivemall/tools/aggr/MaxByUDAF.java
index ae89a70..0dd1250 100644
--- a/core/src/main/java/hivemall/tools/aggr/MaxByUDAF.java
+++ b/core/src/main/java/hivemall/tools/aggr/MaxByUDAF.java
@@ -57,7 +57,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
" max_by(name, age) as name\n" +
"from\n" +
" data;\n" +
- "> tom")
+ "tom")
//@formatter:on
public final class MaxByUDAF extends AbstractGenericUDAFResolver {
diff --git a/core/src/main/java/hivemall/tools/aggr/MinByUDAF.java b/core/src/main/java/hivemall/tools/aggr/MinByUDAF.java
index 1c001ea..23499cd 100644
--- a/core/src/main/java/hivemall/tools/aggr/MinByUDAF.java
+++ b/core/src/main/java/hivemall/tools/aggr/MinByUDAF.java
@@ -58,7 +58,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
"from\n" +
" data;\n" +
"\n" +
- "> jake")
+ "jake")
//@formatter:on
public final class MinByUDAF extends AbstractGenericUDAFResolver {
diff --git a/core/src/main/java/hivemall/tools/array/ArangeUDF.java b/core/src/main/java/hivemall/tools/array/ArangeUDF.java
index 1097956..2618b26 100644
--- a/core/src/main/java/hivemall/tools/array/ArangeUDF.java
+++ b/core/src/main/java/hivemall/tools/array/ArangeUDF.java
@@ -51,7 +51,7 @@ extended = "SELECT arange(5), arange(1, 5), arange(1, 5, 1), arange(0, 5, 1);\n"
"> 1, 3, 5\n" +
"\n" +
"SELECT arange(-1, -6, 2);\n" +
- "> -1, -3, -5")
+ "-1, -3, -5")
// @formatter:on
@UDFType(deterministic = true, stateful = false)
public final class ArangeUDF extends GenericUDF {
diff --git a/core/src/main/java/hivemall/tools/array/ArgmaxUDF.java b/core/src/main/java/hivemall/tools/array/ArgmaxUDF.java
index d697907..0e8c132 100644
--- a/core/src/main/java/hivemall/tools/array/ArgmaxUDF.java
+++ b/core/src/main/java/hivemall/tools/array/ArgmaxUDF.java
@@ -35,7 +35,7 @@ import org.apache.hadoop.io.IntWritable;
@Description(name = "argmax",
value = "_FUNC_(array<T> a) - Returns the first index of the maximum value",
- extended = "SELECT argmax(array(5,2,0,1));\n" + "> 0")
+ extended = "SELECT argmax(array(5,2,0,1));\n" + "0")
@UDFType(deterministic = true, stateful = false)
public final class ArgmaxUDF extends GenericUDF {
diff --git a/core/src/main/java/hivemall/tools/array/ArgminUDF.java b/core/src/main/java/hivemall/tools/array/ArgminUDF.java
index a2664b7..b24b755 100644
--- a/core/src/main/java/hivemall/tools/array/ArgminUDF.java
+++ b/core/src/main/java/hivemall/tools/array/ArgminUDF.java
@@ -35,7 +35,7 @@ import org.apache.hadoop.io.IntWritable;
@Description(name = "argmin",
value = "_FUNC_(array<T> a) - Returns the first index of the minimum value",
- extended = "SELECT argmin(array(5,2,0,1));\n" + "> 2")
+ extended = "SELECT argmin(array(5,2,0,1));\n" + "2")
@UDFType(deterministic = true, stateful = false)
public final class ArgminUDF extends GenericUDF {
diff --git a/core/src/main/java/hivemall/tools/array/ArgrankUDF.java b/core/src/main/java/hivemall/tools/array/ArgrankUDF.java
index f283d39..54f7665 100644
--- a/core/src/main/java/hivemall/tools/array/ArgrankUDF.java
+++ b/core/src/main/java/hivemall/tools/array/ArgrankUDF.java
@@ -45,7 +45,7 @@ import org.apache.hadoop.io.IntWritable;
@Description(name = "argrank",
value = "_FUNC_(array<ANY> a) - Returns the indices that would sort an array.",
extended = "SELECT argrank(array(5,2,0,1)), argsort(argsort(array(5,2,0,1)));\n" +
- "> [3, 2, 0, 1] [3, 2, 0, 1]")
+ "[3, 2, 0, 1] [3, 2, 0, 1]")
// @formatter:on
@UDFType(deterministic = true, stateful = false)
public final class ArgrankUDF extends GenericUDF {
diff --git a/core/src/main/java/hivemall/tools/array/ArgsortUDF.java b/core/src/main/java/hivemall/tools/array/ArgsortUDF.java
index f8e6b8f..2404dd2 100644
--- a/core/src/main/java/hivemall/tools/array/ArgsortUDF.java
+++ b/core/src/main/java/hivemall/tools/array/ArgsortUDF.java
@@ -45,10 +45,10 @@ import org.apache.hadoop.io.IntWritable;
@Description(name = "argsort",
value = "_FUNC_(array<ANY> a) - Returns the indices that would sort an array.",
extended = "SELECT argsort(array(5,2,0,1));\n" +
- "> 2, 3, 1, 0\n" +
+ "2, 3, 1, 0\n" +
"\n" +
"SELECT array_slice(array(5,2,0,1), argsort(array(5,2,0,1)));\n" +
- "> 0, 1, 2, 5")
+ "0, 1, 2, 5")
// @formatter:on
@UDFType(deterministic = true, stateful = false)
public final class ArgsortUDF extends GenericUDF {
diff --git a/core/src/main/java/hivemall/tools/math/IsFiniteUDF.java b/core/src/main/java/hivemall/tools/math/IsFiniteUDF.java
index 4d6550f..f0b6e94 100644
--- a/core/src/main/java/hivemall/tools/math/IsFiniteUDF.java
+++ b/core/src/main/java/hivemall/tools/math/IsFiniteUDF.java
@@ -22,7 +22,7 @@ import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
@Description(name = "is_finite", value = "_FUNC_(x) - Determine if x is finite.",
- extended = "SELECT is_finite(333), is_finite(infinity());\n" + "> true false")
+ extended = "SELECT is_finite(333), is_finite(infinity());\n" + "true false")
public final class IsFiniteUDF extends UDF {
public Boolean evaluate(Double num) {
if (num == null) {
diff --git a/core/src/main/java/hivemall/tools/math/L2NormUDAF.java b/core/src/main/java/hivemall/tools/math/L2NormUDAF.java
index dc65801..50ec5cf 100644
--- a/core/src/main/java/hivemall/tools/math/L2NormUDAF.java
+++ b/core/src/main/java/hivemall/tools/math/L2NormUDAF.java
@@ -33,7 +33,7 @@ import org.apache.hadoop.hive.serde2.io.DoubleWritable;
")\n" +
"select l2_norm(v) as l2norm\n" +
"from input;\n" +
- "> 3.7416573867739413 = sqrt(1^2+2^2+3^2))")
+ "3.7416573867739413 = sqrt(1^2+2^2+3^2))")
// @formatter:on
public final class L2NormUDAF extends UDAF {
diff --git a/core/src/main/java/hivemall/tools/math/NanUDF.java b/core/src/main/java/hivemall/tools/math/NanUDF.java
index f00a5ba..d316631 100644
--- a/core/src/main/java/hivemall/tools/math/NanUDF.java
+++ b/core/src/main/java/hivemall/tools/math/NanUDF.java
@@ -22,7 +22,7 @@ import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
@Description(name = "nan", value = "_FUNC_() - Returns the constant representing not-a-number.",
- extended = "SELECT nan(), is_nan(nan());\n" + "> NaN true")
+ extended = "SELECT nan(), is_nan(nan());\n" + "NaN true")
public final class NanUDF extends UDF {
public double evaluate() {
return Double.NaN;
diff --git a/core/src/main/java/hivemall/tools/math/SigmoidGenericUDF.java b/core/src/main/java/hivemall/tools/math/SigmoidGenericUDF.java
index d097a18..8540aa6 100644
--- a/core/src/main/java/hivemall/tools/math/SigmoidGenericUDF.java
+++ b/core/src/main/java/hivemall/tools/math/SigmoidGenericUDF.java
@@ -49,8 +49,8 @@ import org.apache.hadoop.io.FloatWritable;
" sigmoid(x)\n" +
"from\n" +
" input;\n" +
- "> 0.04742587317756678 0.04742587357759476\n" +
- "> 0.9525741268224334 0.9525741338729858")
+ "0.04742587317756678 0.04742587357759476\n" +
+ "0.9525741268224334 0.9525741338729858")
// @formatter:on
@UDFType(deterministic = true, stateful = false)
public final class SigmoidGenericUDF extends GenericUDF {
diff --git a/core/src/main/java/hivemall/tools/matrix/TransposeAndDotUDAF.java b/core/src/main/java/hivemall/tools/matrix/TransposeAndDotUDAF.java
index 4c52ebe..8366c83 100644
--- a/core/src/main/java/hivemall/tools/matrix/TransposeAndDotUDAF.java
+++ b/core/src/main/java/hivemall/tools/matrix/TransposeAndDotUDAF.java
@@ -58,7 +58,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectIn
" transpose_and_dot(y, x) as yx\n" +
"from \n" +
" input;\n\n" +
- "> [[\"3.0\",\"6.0\"],[\"5.0\",\"10.0\"],[\"7.0\",\"14.0\"],[\"9.0\",\"18.0\"]]" +
+ "[[\"3.0\",\"6.0\"],[\"5.0\",\"10.0\"],[\"7.0\",\"14.0\"],[\"9.0\",\"18.0\"]]" +
" [[\"3.0\",\"5.0\",\"7.0\",\"9.0\"],[\"6.0\",\"10.0\",\"14.0\",\"18.0\"]]\n")
// @formatter:on
public final class TransposeAndDotUDAF extends AbstractGenericUDAFResolver {
diff --git a/docs/gitbook/misc/generic_funcs.md b/docs/gitbook/misc/generic_funcs.md
index fb0b240..dc195a6 100644
--- a/docs/gitbook/misc/generic_funcs.md
+++ b/docs/gitbook/misc/generic_funcs.md
@@ -34,7 +34,7 @@ This page describes a list of useful Hivemall generic functions. See also a [lis
majority_vote(k) as k
from
data;
- > 2
+ 2
```
- `max_by(x, y)` - Returns the value of x associated with the maximum value of y over all input values.
@@ -51,7 +51,7 @@ This page describes a list of useful Hivemall generic functions. See also a [lis
max_by(name, age) as name
from
data;
- > tom
+ tom
```
- `min_by(x, y)` - Returns the value of x associated with the minimum value of y over all input values.
@@ -69,7 +69,7 @@ This page describes a list of useful Hivemall generic functions. See also a [lis
from
data;
- > jake
+ jake
```
# Array
@@ -83,34 +83,34 @@ This page describes a list of useful Hivemall generic functions. See also a [lis
> 1, 3, 5
SELECT arange(-1, -6, 2);
- > -1, -3, -5
+ -1, -3, -5
```
- `argmax(array<T> a)` - Returns the first index of the maximum value
```sql
SELECT argmax(array(5,2,0,1));
- > 0
+ 0
```
- `argmin(array<T> a)` - Returns the first index of the minimum value
```sql
SELECT argmin(array(5,2,0,1));
- > 2
+ 2
```
- `argrank(array<ANY> a)` - Returns the indices that would sort an array.
```sql
SELECT argrank(array(5,2,0,1)), argsort(argsort(array(5,2,0,1)));
- > [3, 2, 0, 1] [3, 2, 0, 1]
+ [3, 2, 0, 1] [3, 2, 0, 1]
```
- `argsort(array<ANY> a)` - Returns the indices that would sort an array.
```sql
SELECT argsort(array(5,2,0,1));
- > 2, 3, 1, 0
+ 2, 3, 1, 0
SELECT array_slice(array(5,2,0,1), argsort(array(5,2,0,1)));
- > 0, 1, 2, 5
+ 0, 1, 2, 5
```
- `array_append(array<T> arr, T elem)` - Append an element to the end of an array
@@ -712,7 +712,7 @@ This page describes a list of useful Hivemall generic functions. See also a [lis
- `is_finite(x)` - Determine if x is finite.
```sql
SELECT is_finite(333), is_finite(infinity());
- > true false
+ true false
```
- `is_infinite(x)` - Determine if x is infinite.
@@ -726,13 +726,13 @@ This page describes a list of useful Hivemall generic functions. See also a [lis
)
select l2_norm(v) as l2norm
from input;
- > 3.7416573867739413 = sqrt(1^2+2^2+3^2))
+ 3.7416573867739413 = sqrt(1^2+2^2+3^2))
```
- `nan()` - Returns the constant representing not-a-number.
```sql
SELECT nan(), is_nan(nan());
- > NaN true
+ NaN true
```
- `sigmoid(x)` - Returns 1.0 / (1.0 + exp(-x))
@@ -747,8 +747,8 @@ This page describes a list of useful Hivemall generic functions. See also a [lis
sigmoid(x)
from
input;
- > 0.04742587317756678 0.04742587357759476
- > 0.9525741268224334 0.9525741338729858
+ 0.04742587317756678 0.04742587357759476
+ 0.9525741268224334 0.9525741338729858
```
# Vector/Matrix
@@ -766,7 +766,7 @@ This page describes a list of useful Hivemall generic functions. See also a [lis
from
input;
- > [["3.0","6.0"],["5.0","10.0"],["7.0","14.0"],["9.0","18.0"]] [["3.0","5.0","7.0","9.0"],["6.0","10.0","14.0","18.0"]]
+ [["3.0","6.0"],["5.0","10.0"],["7.0","14.0"],["9.0","18.0"]] [["3.0","5.0","7.0","9.0"],["6.0","10.0","14.0","18.0"]]
```