You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hivemall.apache.org by my...@apache.org on 2019/06/20 10:30:19 UTC

[incubator-hivemall] 03/03: Updated function usage page

This is an automated email from the ASF dual-hosted git repository.

myui pushed a commit to branch libsvm
in repository https://gitbox.apache.org/repos/asf/incubator-hivemall.git

commit 8dd14c1e8c818b6a84ddeb8c5dcc3bed7d708de1
Author: Makoto Yui <my...@apache.org>
AuthorDate: Thu Jun 20 19:30:06 2019 +0900

    Updated function usage page
---
 .../hivemall/ftvec/conv/ToLibSVMFormatUDF.java     | 15 ++++++++--
 docs/gitbook/misc/funcs.md                         | 33 ++++++++++++++++++----
 2 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/core/src/main/java/hivemall/ftvec/conv/ToLibSVMFormatUDF.java b/core/src/main/java/hivemall/ftvec/conv/ToLibSVMFormatUDF.java
index a1c85d9..723cb0b 100644
--- a/core/src/main/java/hivemall/ftvec/conv/ToLibSVMFormatUDF.java
+++ b/core/src/main/java/hivemall/ftvec/conv/ToLibSVMFormatUDF.java
@@ -46,9 +46,18 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
 
+// @formatter:off
 @Description(name = "to_libsvm_format",
         value = "_FUNC_(array<string> feautres [, double/integer target, const string options])"
-                + " - Returns a string representation of libsvm")
+                + " - Returns a string representation of libsvm",
+                extended = "Usage:\n" + 
+                        " select to_libsvm_format(array('apple:3.4','orange:2.1'))\n" + 
+                        " > 6284535:3.4 8104713:2.1\n" + 
+                        " select to_libsvm_format(array('apple:3.4','orange:2.1'), '-features 10')\n" + 
+                        " > 3:2.1 7:3.4\n" + 
+                        " select to_libsvm_format(array('7:3.4','3:2.1'), 5.0)\n" + 
+                        " > 5.0 3:2.1 7:3.4")
+// @formatter:on
 @UDFType(deterministic = true, stateful = false)
 public final class ToLibSVMFormatUDF extends UDFWithOptions {
 
@@ -69,7 +78,9 @@ public final class ToLibSVMFormatUDF extends UDFWithOptions {
     @Override
     protected CommandLine processOptions(@Nonnull String optionValue) throws UDFArgumentException {
         CommandLine cl = parseOptions(optionValue);
-        this._numFeatures = Primitives.parseInt(cl.getOptionValue("num_features"), _numFeatures);
+        this._numFeatures = Primitives.parseInt(cl.getOptionValue("num_features"),
+            MurmurHash3.DEFAULT_NUM_FEATURES);
+        assumeTrue(_numFeatures > 0, "num_features must be greater than 0: " + _numFeatures);
         return cl;
     }
 
diff --git a/docs/gitbook/misc/funcs.md b/docs/gitbook/misc/funcs.md
index ade9ee3..1b1b280 100644
--- a/docs/gitbook/misc/funcs.md
+++ b/docs/gitbook/misc/funcs.md
@@ -65,13 +65,25 @@ Reference: <a href="https://papers.nips.cc/paper/3848-adaptive-regularization-of
   GROUP BY feature
   ```
 
-- `train_pa1_regr(array<int|bigint|string> features, float target [, constant string options])` - PA-1 regressor that returns a relation consists of `&lt;int|bigint|string&gt; feature, float weight`. Find PA-1 algorithm detail in http://jmlr.csail.mit.edu/papers/volume7/crammer06a/crammer06a.pdf
-
-- `train_pa1a_regr(array<int|bigint|string> features, float target [, constant string options])` - Returns a relation consists of `&lt;int|bigint|string&gt; feature, float weight`.
+- `train_pa1_regr(array<int|bigint|string> features, float target [, constant string options])` - PA-1 regressor that returns a relation consists of `(int|bigint|string) feature, float weight`.
+  ```sql
+  SELECT 
+   feature,
+   avg(weight) as weight
+  FROM 
+   (SELECT 
+       train_pa1_regr(features,label) as (feature,weight)
+    FROM 
+       training_data
+   ) t 
+  GROUP BY feature
+  ```
+Reference: <a href="http://jmlr.csail.mit.edu/papers/volume7/crammer06a/crammer06a.pdf" target="_blank">Koby Crammer et.al., Online Passive-Aggressive Algorithms. Journal of Machine Learning Research, 2006.</a><br/>
+- `train_pa1a_regr(array<int|bigint|string> features, float target [, constant string options])` - Returns a relation consists of `(int|bigint|string) feature, float weight`.
 
-- `train_pa2_regr(array<int|bigint|string> features, float target [, constant string options])` - Returns a relation consists of `&lt;int|bigint|string&gt; feature, float weight`.
+- `train_pa2_regr(array<int|bigint|string> features, float target [, constant string options])` - Returns a relation consists of `(int|bigint|string) feature, float weight`.
 
-- `train_pa2a_regr(array<int|bigint|string> features, float target [, constant string options])` - Returns a relation consists of `&lt;int|bigint|string&gt; feature, float weight`.
+- `train_pa2a_regr(array<int|bigint|string> features, float target [, constant string options])` - Returns a relation consists of `(int|bigint|string) feature, float weight`.
 
 - `train_regressor(list<string|int|bigint> features, double label [, const string options])` - Returns a relation consists of &lt;string|int|bigint feature, float weight&gt;
   ```
@@ -261,6 +273,17 @@ Reference: <a href="https://papers.nips.cc/paper/3848-adaptive-regularization-of
 
 - `to_dense_features(array<string> feature_vector, int dimensions)` - Returns a dense feature in array&lt;float&gt;
 
+- `to_libsvm_format(array<string> feautres [, double/integer target, const string options])` - Returns a string representation of libsvm
+  ```sql
+  Usage:
+   select to_libsvm_format(array('apple:3.4','orange:2.1'))
+   > 6284535:3.4 8104713:2.1
+   select to_libsvm_format(array('apple:3.4','orange:2.1'), '-features 10')
+   > 3:2.1 7:3.4
+   select to_libsvm_format(array('7:3.4','3:2.1'), 5.0)
+   > 5.0 3:2.1 7:3.4
+  ```
+
 - `to_sparse_features(array<float> feature_vector)` - Returns a sparse feature in array&lt;string&gt;
 
 ## Feature hashing