You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hivemall.apache.org by my...@apache.org on 2019/04/13 20:37:20 UTC

[incubator-hivemall] branch master updated: [HIVEMALL-237-1] Add usage in ML function reference page

This is an automated email from the ASF dual-hosted git repository.

myui pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-hivemall.git


The following commit(s) were added to refs/heads/master by this push:
     new bbedfac  [HIVEMALL-237-1] Add usage in ML function reference page
bbedfac is described below

commit bbedfac0b1c042f0214e9a1275fa5eb5d8f53b40
Author: Makoto Yui <my...@apache.org>
AuthorDate: Sun Apr 14 05:37:14 2019 +0900

    [HIVEMALL-237-1] Add usage in ML function reference page
    
    ## What changes were proposed in this pull request?
    
    Add usage in ML function reference page
    
    ## What type of PR is it?
    
    Documentation
    
    ## What is the Jira issue?
    
    https://issues.apache.org/jira/browse/HIVEMALL-237
    
    ## How was this patch tested?
    
    via CI
    
    ## Checklist
    
    - [x] Did you apply source code formatter, i.e., `./bin/format_code.sh`, for your commit?
    
    Author: Makoto Yui <my...@apache.org>
    Author: Makoto YUI <yu...@gmail.com>
    
    Closes #183 from myui/HIVEMALL-237.
---
 bin/update_func_md.sh                              |  3 +-
 core/src/main/java/hivemall/annotations/Cite.java  | 35 ++++++++++++
 .../hivemall/classifier/AROWClassifierUDTF.java    |  3 +
 .../java/hivemall/ftvec/text/OkapiBM25UDF.java     |  4 +-
 .../hivemall/regression/AROWRegressionUDTF.java    | 50 +++++++++++++++--
 .../PassiveAggressiveRegressionUDTF.java           | 23 ++++++--
 docs/gitbook/ft_engineering/binarize.md            | 19 +++++++
 docs/gitbook/misc/funcs.md                         | 64 ++++++++++++++++++----
 tools/hivemall-docs/pom.xml                        |  2 +-
 .../java/hivemall/docs/FuncsListGeneratorMojo.java | 18 ++++++
 10 files changed, 199 insertions(+), 22 deletions(-)

diff --git a/bin/update_func_md.sh b/bin/update_func_md.sh
index 6357e30..462704b 100755
--- a/bin/update_func_md.sh
+++ b/bin/update_func_md.sh
@@ -37,7 +37,8 @@ VERSION=`cat VERSION`
 # Deploy to local Maven repos
 
 export MAVEN_OPTS="-XX:MaxMetaspaceSize=256m -Dhttps.protocols=TLSv1,TLSv1.1,TLSv1.2"
-mvn clean install -DskipTests=true -Dmaven.test.skip=true -pl '.,core,nlp,xgboost,tools/hivemall-docs'
+mvn dependency:purge-local-repository -DmanualInclude=org.apache.hivemall
+mvn -U clean install -DskipTests=true -Dmaven.test.skip=true -pl '.,core,nlp,xgboost,tools/hivemall-docs'
 
 # Generate docs
 
diff --git a/core/src/main/java/hivemall/annotations/Cite.java b/core/src/main/java/hivemall/annotations/Cite.java
new file mode 100644
index 0000000..2b93cd6
--- /dev/null
+++ b/core/src/main/java/hivemall/annotations/Cite.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.annotations;
+
+import java.lang.annotation.Documented;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+
+import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
+
+@Documented
+@Retention(RetentionPolicy.RUNTIME)
+public @interface Cite {
+    @Nonnull
+    String description();
+    @Nullable
+    String url();
+}
diff --git a/core/src/main/java/hivemall/classifier/AROWClassifierUDTF.java b/core/src/main/java/hivemall/classifier/AROWClassifierUDTF.java
index 30cd909..ecdd598 100644
--- a/core/src/main/java/hivemall/classifier/AROWClassifierUDTF.java
+++ b/core/src/main/java/hivemall/classifier/AROWClassifierUDTF.java
@@ -18,6 +18,7 @@
  */
 package hivemall.classifier;
 
+import hivemall.annotations.Cite;
 import hivemall.model.FeatureValue;
 import hivemall.model.IWeightValue;
 import hivemall.model.PredictionResult;
@@ -45,6 +46,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
         value = "_FUNC_(list<string|int|bigint> features, int label [, const string options])"
                 + " - Returns a relation consists of <string|int|bigint feature, float weight, float covar>",
         extended = "Build a prediction model by Adaptive Regularization of Weight Vectors (AROW) binary classifier")
+@Cite(description = "K. Crammer, A. Kulesza, and M. Dredze, \"Adaptive Regularization of Weight Vectors\", In Proc. NIPS, 2009.",
+        url = "https://papers.nips.cc/paper/3848-adaptive-regularization-of-weight-vectors.pdf")
 public class AROWClassifierUDTF extends BinaryOnlineClassifierUDTF {
 
     /** Regularization parameter r */
diff --git a/core/src/main/java/hivemall/ftvec/text/OkapiBM25UDF.java b/core/src/main/java/hivemall/ftvec/text/OkapiBM25UDF.java
index acd80bf..edf3681 100644
--- a/core/src/main/java/hivemall/ftvec/text/OkapiBM25UDF.java
+++ b/core/src/main/java/hivemall/ftvec/text/OkapiBM25UDF.java
@@ -38,7 +38,9 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectIn
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
 
 @Description(name = "bm25",
-        value = "_FUNC_(double termFrequency, int docLength, double avgDocLength, int numDocs, int numDocsWithTerm [, const string options]) - Return an Okapi BM25 score in double")
+        value = "_FUNC_(double termFrequency, int docLength, double avgDocLength, int numDocs, int numDocsWithTerm [, const string options]) "
+                + "- Return an Okapi BM25 score in double. "
+                + "Refer http://hivemall.incubator.apache.org/userguide/ft_engineering/bm25.html for usage")
 @UDFType(deterministic = true, stateful = false)
 public final class OkapiBM25UDF extends UDFWithOptions {
 
diff --git a/core/src/main/java/hivemall/regression/AROWRegressionUDTF.java b/core/src/main/java/hivemall/regression/AROWRegressionUDTF.java
index f393a3b..419944b 100644
--- a/core/src/main/java/hivemall/regression/AROWRegressionUDTF.java
+++ b/core/src/main/java/hivemall/regression/AROWRegressionUDTF.java
@@ -18,6 +18,7 @@
  */
 package hivemall.regression;
 
+import hivemall.annotations.Cite;
 import hivemall.model.FeatureValue;
 import hivemall.model.IWeightValue;
 import hivemall.model.PredictionResult;
@@ -34,9 +35,24 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 
+// @formatter:off
 @Description(name = "train_arow_regr",
         value = "_FUNC_(array<int|bigint|string> features, float target [, constant string options])"
-                + " - Returns a relation consists of <{int|bigint|string} feature, float weight, float covar>")
+                + " - a standard AROW (Adaptive Reguralization of Weight Vectors) regressor "
+                + "that uses `y - w^Tx` for the loss function.",
+        extended = "SELECT \n" + 
+                "  feature,\n" + 
+                "  argmin_kld(weight, covar) as weight\n" + 
+                "FROM (\n" + 
+                "  SELECT \n" + 
+                "     train_arow_regr(features,label) as (feature,weight,covar)\n" + 
+                "  FROM \n" + 
+                "     training_data\n" + 
+                " ) t \n" + 
+                "GROUP BY feature")
+// @formatter:on
+@Cite(description = "K. Crammer, A. Kulesza, and M. Dredze, \"Adaptive Regularization of Weight Vectors\", In Proc. NIPS, 2009.",
+        url = "https://papers.nips.cc/paper/3848-adaptive-regularization-of-weight-vectors.pdf")
 public class AROWRegressionUDTF extends RegressionBaseUDTF {
 
     /** Regularization parameter r */
@@ -103,7 +119,7 @@ public class AROWRegressionUDTF extends RegressionBaseUDTF {
      * @return target - predicted
      */
     protected float loss(float target, float predicted) {
-        return target - predicted; // y - m^Tx
+        return target - predicted; // y - w^Tx
     }
 
     @Override
@@ -141,9 +157,22 @@ public class AROWRegressionUDTF extends RegressionBaseUDTF {
         return new WeightValueWithCovar(new_w, new_cov);
     }
 
+    // @formatter:off
     @Description(name = "train_arowe_regr",
             value = "_FUNC_(array<int|bigint|string> features, float target [, constant string options])"
-                    + " - Returns a relation consists of <{int|bigint|string} feature, float weight, float covar>")
+                    + " - a refined version of AROW (Adaptive Reguralization of Weight Vectors) regressor "
+                    + "that usages epsilon-insensitive hinge loss `|w^t - y| - epsilon` for the loss function",
+            extended = "SELECT \n" + 
+                    "  feature,\n" + 
+                    "  argmin_kld(weight, covar) as weight\n" + 
+                    "FROM (\n" + 
+                    "  SELECT \n" + 
+                    "     train_arowe_regr(features,label) as (feature,weight,covar)\n" + 
+                    "  FROM \n" + 
+                    "     training_data\n" + 
+                    " ) t \n" + 
+                    "GROUP BY feature")
+    // @formatter:on
     public static class AROWe extends AROWRegressionUDTF {
 
         /** Sensitivity to prediction mistakes */
@@ -199,9 +228,22 @@ public class AROWRegressionUDTF extends RegressionBaseUDTF {
         }
     }
 
+    // @formatter:off
     @Description(name = "train_arowe2_regr",
             value = "_FUNC_(array<int|bigint|string> features, float target [, constant string options])"
-                    + " - Returns a relation consists of <{int|bigint|string} feature, float weight, float covar>")
+                    + " - a refined version of AROW (Adaptive Reguralization of Weight Vectors) regressor "
+                    + "that usages adaptive epsilon-insensitive hinge loss `|w^t - y| - epsilon * stddev` for the loss function",
+            extended = "SELECT \n" + 
+                    "  feature,\n" + 
+                    "  argmin_kld(weight, covar) as weight\n" + 
+                    "FROM (\n" + 
+                    "  SELECT \n" + 
+                    "     train_arowe2_regr(features,label) as (feature,weight,covar)\n" + 
+                    "  FROM \n" + 
+                    "     training_data\n" + 
+                    " ) t \n" + 
+                    "GROUP BY feature")
+    // @formatter:on
     public static class AROWe2 extends AROWe {
 
         private OnlineVariance targetStdDev;
diff --git a/core/src/main/java/hivemall/regression/PassiveAggressiveRegressionUDTF.java b/core/src/main/java/hivemall/regression/PassiveAggressiveRegressionUDTF.java
index c0e53f0..7ab13d6 100644
--- a/core/src/main/java/hivemall/regression/PassiveAggressiveRegressionUDTF.java
+++ b/core/src/main/java/hivemall/regression/PassiveAggressiveRegressionUDTF.java
@@ -18,6 +18,7 @@
  */
 package hivemall.regression;
 
+import hivemall.annotations.Cite;
 import hivemall.model.FeatureValue;
 import hivemall.model.PredictionResult;
 import hivemall.optimizer.LossFunctions;
@@ -32,9 +33,23 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 
+// @formatter:off
 @Description(name = "train_pa1_regr",
         value = "_FUNC_(array<int|bigint|string> features, float target [, constant string options])"
-                + " - Returns a relation consists of <{int|bigint|string} feature, float weight>")
+                + " - PA-1 regressor that returns a relation consists of `(int|bigint|string) feature, float weight`.",
+        extended = "SELECT \n" + 
+                " feature,\n" + 
+                " avg(weight) as weight\n" + 
+                "FROM \n" + 
+                " (SELECT \n" + 
+                "     train_pa1_regr(features,label) as (feature,weight)\n" + 
+                "  FROM \n" + 
+                "     training_data\n" + 
+                " ) t \n" + 
+                "GROUP BY feature")
+// @formatter:on
+@Cite(description = "Koby Crammer et.al., Online Passive-Aggressive Algorithms. Journal of Machine Learning Research, 2006.",
+        url = "http://jmlr.csail.mit.edu/papers/volume7/crammer06a/crammer06a.pdf")
 public class PassiveAggressiveRegressionUDTF extends RegressionBaseUDTF {
 
     /** Aggressiveness parameter */
@@ -133,7 +148,7 @@ public class PassiveAggressiveRegressionUDTF extends RegressionBaseUDTF {
 
     @Description(name = "train_pa1a_regr",
             value = "_FUNC_(array<int|bigint|string> features, float target [, constant string options])"
-                    + " - Returns a relation consists of <{int|bigint|string} feature, float weight>")
+                    + " - Returns a relation consists of `(int|bigint|string) feature, float weight`.")
     public static final class PA1a extends PassiveAggressiveRegressionUDTF {
 
         private OnlineVariance targetStdDev;
@@ -161,7 +176,7 @@ public class PassiveAggressiveRegressionUDTF extends RegressionBaseUDTF {
 
     @Description(name = "train_pa2_regr",
             value = "_FUNC_(array<int|bigint|string> features, float target [, constant string options])"
-                    + " - Returns a relation consists of <{int|bigint|string} feature, float weight>")
+                    + " - Returns a relation consists of `(int|bigint|string) feature, float weight`.")
     public static class PA2 extends PassiveAggressiveRegressionUDTF {
 
         @Override
@@ -180,7 +195,7 @@ public class PassiveAggressiveRegressionUDTF extends RegressionBaseUDTF {
 
     @Description(name = "train_pa2a_regr",
             value = "_FUNC_(array<int|bigint|string> features, float target [, constant string options])"
-                    + " - Returns a relation consists of <{int|bigint|string} feature, float weight>")
+                    + " - Returns a relation consists of `(int|bigint|string) feature, float weight`.")
     public static final class PA2a extends PA2 {
 
         private OnlineVariance targetStdDev;
diff --git a/docs/gitbook/ft_engineering/binarize.md b/docs/gitbook/ft_engineering/binarize.md
index f237eb3..797cdb4 100644
--- a/docs/gitbook/ft_engineering/binarize.md
+++ b/docs/gitbook/ft_engineering/binarize.md
@@ -1,3 +1,22 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
 ## Introduction
 
 Expanding numeric labels to actual count of samples can contribute to accuracy improvement in some cases. `binarize_label` explode a record that keeps the count of positive/negative labeled samples into corresponding actual count of samples. For example,
diff --git a/docs/gitbook/misc/funcs.md b/docs/gitbook/misc/funcs.md
index e623572..ade9ee3 100644
--- a/docs/gitbook/misc/funcs.md
+++ b/docs/gitbook/misc/funcs.md
@@ -23,19 +23,55 @@ This page describes a list of Hivemall functions. See also a [list of generic Hi
 
 # Regression
 
-- `train_arow_regr(array<int|bigint|string> features, float target [, constant string options])` - Returns a relation consists of &lt;{int|bigint|string} feature, float weight, float covar&gt;
-
-- `train_arowe2_regr(array<int|bigint|string> features, float target [, constant string options])` - Returns a relation consists of &lt;{int|bigint|string} feature, float weight, float covar&gt;
-
-- `train_arowe_regr(array<int|bigint|string> features, float target [, constant string options])` - Returns a relation consists of &lt;{int|bigint|string} feature, float weight, float covar&gt;
+- `train_arow_regr(array<int|bigint|string> features, float target [, constant string options])` - a standard AROW (Adaptive Reguralization of Weight Vectors) regressor that uses `y - w^Tx` for the loss function.
+  ```sql
+  SELECT 
+    feature,
+    argmin_kld(weight, covar) as weight
+  FROM (
+    SELECT 
+       train_arow_regr(features,label) as (feature,weight,covar)
+    FROM 
+       training_data
+   ) t 
+  GROUP BY feature
+  ```
+Reference: <a href="https://papers.nips.cc/paper/3848-adaptive-regularization-of-weight-vectors.pdf" target="_blank">K. Crammer, A. Kulesza, and M. Dredze, "Adaptive Regularization of Weight Vectors", In Proc. NIPS, 2009.</a><br/>
+- `train_arowe2_regr(array<int|bigint|string> features, float target [, constant string options])` - a refined version of AROW (Adaptive Reguralization of Weight Vectors) regressor that usages adaptive epsilon-insensitive hinge loss `|w^t - y| - epsilon * stddev` for the loss function
+  ```sql
+  SELECT 
+    feature,
+    argmin_kld(weight, covar) as weight
+  FROM (
+    SELECT 
+       train_arowe2_regr(features,label) as (feature,weight,covar)
+    FROM 
+       training_data
+   ) t 
+  GROUP BY feature
+  ```
+
+- `train_arowe_regr(array<int|bigint|string> features, float target [, constant string options])` - a refined version of AROW (Adaptive Reguralization of Weight Vectors) regressor that usages epsilon-insensitive hinge loss `|w^t - y| - epsilon` for the loss function
+  ```sql
+  SELECT 
+    feature,
+    argmin_kld(weight, covar) as weight
+  FROM (
+    SELECT 
+       train_arowe_regr(features,label) as (feature,weight,covar)
+    FROM 
+       training_data
+   ) t 
+  GROUP BY feature
+  ```
 
-- `train_pa1_regr(array<int|bigint|string> features, float target [, constant string options])` - Returns a relation consists of &lt;{int|bigint|string} feature, float weight&gt;
+- `train_pa1_regr(array<int|bigint|string> features, float target [, constant string options])` - PA-1 regressor that returns a relation consists of `&lt;int|bigint|string&gt; feature, float weight`. Find PA-1 algorithm detail in http://jmlr.csail.mit.edu/papers/volume7/crammer06a/crammer06a.pdf
 
-- `train_pa1a_regr(array<int|bigint|string> features, float target [, constant string options])` - Returns a relation consists of &lt;{int|bigint|string} feature, float weight&gt;
+- `train_pa1a_regr(array<int|bigint|string> features, float target [, constant string options])` - Returns a relation consists of `&lt;int|bigint|string&gt; feature, float weight`.
 
-- `train_pa2_regr(array<int|bigint|string> features, float target [, constant string options])` - Returns a relation consists of &lt;{int|bigint|string} feature, float weight&gt;
+- `train_pa2_regr(array<int|bigint|string> features, float target [, constant string options])` - Returns a relation consists of `&lt;int|bigint|string&gt; feature, float weight`.
 
-- `train_pa2a_regr(array<int|bigint|string> features, float target [, constant string options])` - Returns a relation consists of &lt;{int|bigint|string} feature, float weight&gt;
+- `train_pa2a_regr(array<int|bigint|string> features, float target [, constant string options])` - Returns a relation consists of `&lt;int|bigint|string&gt; feature, float weight`.
 
 - `train_regressor(list<string|int|bigint> features, double label [, const string options])` - Returns a relation consists of &lt;string|int|bigint feature, float weight&gt;
   ```
@@ -52,7 +88,7 @@ This page describes a list of Hivemall functions. See also a [list of generic Hi
   ```
   Build a prediction model by Adaptive Regularization of Weight Vectors (AROW) binary classifier
   ```
-
+Reference: <a href="https://papers.nips.cc/paper/3848-adaptive-regularization-of-weight-vectors.pdf" target="_blank">K. Crammer, A. Kulesza, and M. Dredze, "Adaptive Regularization of Weight Vectors", In Proc. NIPS, 2009.</a><br/>
 - `train_arowh(list<string|int|bigint> features, int label [, const string options])` - Returns a relation consists of &lt;string|int|bigint feature, float weight, float covar&gt;
   ```
   Build a prediction model by AROW binary classifier using hinge loss
@@ -520,6 +556,12 @@ This page describes a list of Hivemall functions. See also a [list of generic Hi
 
 - `xgboost_predict(string rowid, string[] features, string model_id, array<byte> pred_model [, string options])` - Returns a prediction result as (string rowid, float predicted)
 
+# Term Vector Model
+
+- `bm25(double termFrequency, int docLength, double avgDocLength, int numDocs, int numDocsWithTerm [, const string options])` - Return an Okapi BM25 score in double. Refer http://hivemall.incubator.apache.org/userguide/ft_engineering/bm25.html for usage
+
+- `tf(string text)` - Return a term frequency in &lt;string, float&gt;
+
 # Others
 
 - `hivemall_version()` - Returns the version of Hivemall
@@ -532,7 +574,7 @@ This page describes a list of Hivemall functions. See also a [list of generic Hi
   WITH dual AS (SELECT 1) SELECT lr_datagen('-n_examples 1k -n_features 10') FROM dual;
   ```
 
-- `bm25(double termFrequency, int docLength, double avgDocLength, int numDocs, int numDocsWithTerm [, const string options])` - Return an Okapi BM25 score in double
+- `bm25(double termFrequency, int docLength, double avgDocLength, int numDocs, int numDocsWithTerm [, const string options])` - Return an Okapi BM25 score in double. Refer http://hivemall.incubator.apache.org/userguide/ft_engineering/bm25.html for usage
 
 - `tf(string text)` - Return a term frequency in &lt;string, float&gt;
 
diff --git a/tools/hivemall-docs/pom.xml b/tools/hivemall-docs/pom.xml
index 917a55d..65ff8d9 100644
--- a/tools/hivemall-docs/pom.xml
+++ b/tools/hivemall-docs/pom.xml
@@ -74,7 +74,7 @@
 			<groupId>org.apache.hivemall</groupId>
 			<artifactId>hivemall-core</artifactId>
 			<version>${project.version}</version>
-			<scope>runtime</scope>
+			<scope>provided</scope> <!-- for Cite annotation -->
 		</dependency>
 		<dependency>
 			<groupId>org.apache.hivemall</groupId>
diff --git a/tools/hivemall-docs/src/main/java/hivemall/docs/FuncsListGeneratorMojo.java b/tools/hivemall-docs/src/main/java/hivemall/docs/FuncsListGeneratorMojo.java
index d4943a4..50298cc 100644
--- a/tools/hivemall-docs/src/main/java/hivemall/docs/FuncsListGeneratorMojo.java
+++ b/tools/hivemall-docs/src/main/java/hivemall/docs/FuncsListGeneratorMojo.java
@@ -24,6 +24,8 @@ import static hivemall.docs.utils.MarkdownUtils.asListElement;
 import static hivemall.docs.utils.MarkdownUtils.indent;
 import static org.apache.commons.lang.StringEscapeUtils.escapeHtml;
 
+import hivemall.annotations.Cite;
+
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileNotFoundException;
@@ -142,6 +144,7 @@ public class FuncsListGeneratorMojo extends AbstractMojo {
             "hivemall.smile.classification", "hivemall.smile.regression", "hivemall.smile.tools"));
         funcsHeaders.put("# XGBoost", Arrays.asList("hivemall.xgboost.classification",
             "hivemall.xgboost.regression", "hivemall.xgboost.tools"));
+        funcsHeaders.put("# Term Vector Model", Collections.singletonList("hivemall.ftvec.text"));
         funcsHeaders.put("# Others",
             Arrays.asList("hivemall", "hivemall.dataset", "hivemall.ftvec.text"));
     }
@@ -208,6 +211,21 @@ public class FuncsListGeneratorMojo extends AbstractMojo {
                 }
             }
 
+            Cite cite = annotatedClass.getAnnotation(Cite.class);
+            if (cite != null) {
+                sb.append("Reference: ");
+                String desc = cite.description();
+                String url = cite.url();
+                if (url == null) {
+                    sb.append(desc);
+                } else {
+                    sb.append("<a href=\"").append(url).append("\" target=\"_blank\">");
+                    sb.append(desc);
+                    sb.append("</a>");
+                }
+                sb.append("<br/>");
+            }
+
             String packageName = annotatedClass.getPackage().getName();
             if (!packages.containsKey(packageName)) {
                 Set<String> set = new TreeSet<>();