You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hivemall.apache.org by my...@apache.org on 2019/06/20 10:30:19 UTC
[incubator-hivemall] 03/03: Updated function usage page
This is an automated email from the ASF dual-hosted git repository.
myui pushed a commit to branch libsvm
in repository https://gitbox.apache.org/repos/asf/incubator-hivemall.git
commit 8dd14c1e8c818b6a84ddeb8c5dcc3bed7d708de1
Author: Makoto Yui <my...@apache.org>
AuthorDate: Thu Jun 20 19:30:06 2019 +0900
Updated function usage page
---
.../hivemall/ftvec/conv/ToLibSVMFormatUDF.java | 15 ++++++++--
docs/gitbook/misc/funcs.md | 33 ++++++++++++++++++----
2 files changed, 41 insertions(+), 7 deletions(-)
diff --git a/core/src/main/java/hivemall/ftvec/conv/ToLibSVMFormatUDF.java b/core/src/main/java/hivemall/ftvec/conv/ToLibSVMFormatUDF.java
index a1c85d9..723cb0b 100644
--- a/core/src/main/java/hivemall/ftvec/conv/ToLibSVMFormatUDF.java
+++ b/core/src/main/java/hivemall/ftvec/conv/ToLibSVMFormatUDF.java
@@ -46,9 +46,18 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+// @formatter:off
@Description(name = "to_libsvm_format",
value = "_FUNC_(array<string> feautres [, double/integer target, const string options])"
- + " - Returns a string representation of libsvm")
+ + " - Returns a string representation of libsvm",
+ extended = "Usage:\n" +
+ " select to_libsvm_format(array('apple:3.4','orange:2.1'))\n" +
+ " > 6284535:3.4 8104713:2.1\n" +
+ " select to_libsvm_format(array('apple:3.4','orange:2.1'), '-features 10')\n" +
+ " > 3:2.1 7:3.4\n" +
+ " select to_libsvm_format(array('7:3.4','3:2.1'), 5.0)\n" +
+ " > 5.0 3:2.1 7:3.4")
+// @formatter:on
@UDFType(deterministic = true, stateful = false)
public final class ToLibSVMFormatUDF extends UDFWithOptions {
@@ -69,7 +78,9 @@ public final class ToLibSVMFormatUDF extends UDFWithOptions {
@Override
protected CommandLine processOptions(@Nonnull String optionValue) throws UDFArgumentException {
CommandLine cl = parseOptions(optionValue);
- this._numFeatures = Primitives.parseInt(cl.getOptionValue("num_features"), _numFeatures);
+ this._numFeatures = Primitives.parseInt(cl.getOptionValue("num_features"),
+ MurmurHash3.DEFAULT_NUM_FEATURES);
+ assumeTrue(_numFeatures > 0, "num_features must be greater than 0: " + _numFeatures);
return cl;
}
diff --git a/docs/gitbook/misc/funcs.md b/docs/gitbook/misc/funcs.md
index ade9ee3..1b1b280 100644
--- a/docs/gitbook/misc/funcs.md
+++ b/docs/gitbook/misc/funcs.md
@@ -65,13 +65,25 @@ Reference: <a href="https://papers.nips.cc/paper/3848-adaptive-regularization-of
GROUP BY feature
```
-- `train_pa1_regr(array<int|bigint|string> features, float target [, constant string options])` - PA-1 regressor that returns a relation consists of `<int|bigint|string> feature, float weight`. Find PA-1 algorithm detail in http://jmlr.csail.mit.edu/papers/volume7/crammer06a/crammer06a.pdf
-
-- `train_pa1a_regr(array<int|bigint|string> features, float target [, constant string options])` - Returns a relation consists of `<int|bigint|string> feature, float weight`.
+- `train_pa1_regr(array<int|bigint|string> features, float target [, constant string options])` - PA-1 regressor that returns a relation consists of `(int|bigint|string) feature, float weight`.
+ ```sql
+ SELECT
+ feature,
+ avg(weight) as weight
+ FROM
+ (SELECT
+ train_pa1_regr(features,label) as (feature,weight)
+ FROM
+ training_data
+ ) t
+ GROUP BY feature
+ ```
+Reference: <a href="http://jmlr.csail.mit.edu/papers/volume7/crammer06a/crammer06a.pdf" target="_blank">Koby Crammer et.al., Online Passive-Aggressive Algorithms. Journal of Machine Learning Research, 2006.</a><br/>
+- `train_pa1a_regr(array<int|bigint|string> features, float target [, constant string options])` - Returns a relation consists of `(int|bigint|string) feature, float weight`.
-- `train_pa2_regr(array<int|bigint|string> features, float target [, constant string options])` - Returns a relation consists of `<int|bigint|string> feature, float weight`.
+- `train_pa2_regr(array<int|bigint|string> features, float target [, constant string options])` - Returns a relation consists of `(int|bigint|string) feature, float weight`.
-- `train_pa2a_regr(array<int|bigint|string> features, float target [, constant string options])` - Returns a relation consists of `<int|bigint|string> feature, float weight`.
+- `train_pa2a_regr(array<int|bigint|string> features, float target [, constant string options])` - Returns a relation consists of `(int|bigint|string) feature, float weight`.
- `train_regressor(list<string|int|bigint> features, double label [, const string options])` - Returns a relation consists of <string|int|bigint feature, float weight>
```
@@ -261,6 +273,17 @@ Reference: <a href="https://papers.nips.cc/paper/3848-adaptive-regularization-of
- `to_dense_features(array<string> feature_vector, int dimensions)` - Returns a dense feature in array<float>
+- `to_libsvm_format(array<string> feautres [, double/integer target, const string options])` - Returns a string representation of libsvm
+ ```sql
+ Usage:
+ select to_libsvm_format(array('apple:3.4','orange:2.1'))
+ > 6284535:3.4 8104713:2.1
+ select to_libsvm_format(array('apple:3.4','orange:2.1'), '-features 10')
+ > 3:2.1 7:3.4
+ select to_libsvm_format(array('7:3.4','3:2.1'), 5.0)
+ > 5.0 3:2.1 7:3.4
+ ```
+
- `to_sparse_features(array<float> feature_vector)` - Returns a sparse feature in array<string>
## Feature hashing