You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hivemall.apache.org by my...@apache.org on 2019/12/25 09:30:38 UTC
[incubator-hivemall] branch master updated: Fixed docs for UDF preparation

This is an automated email from the ASF dual-hosted git repository.

myui pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-hivemall.git


The following commit(s) were added to refs/heads/master by this push:
     new 2d9050e  Fixed docs for UDF preparation
2d9050e is described below

commit 2d9050e9b17c54b39e9a99b222f0d9f1beb44ea3
Author: Makoto Yui <my...@apache.org>
AuthorDate: Wed Dec 25 18:30:06 2019 +0900

    Fixed docs for UDF preparation
---
 docs/gitbook/binaryclass/kdd2010a_dataset.md       |  3 ---
 docs/gitbook/binaryclass/kdd2010b_dataset.md       |  3 ---
 docs/gitbook/binaryclass/webspam_dataset.md        |  6 +----
 docs/gitbook/binaryclass/webspam_scw.md            | 12 ++-------
 docs/gitbook/multiclass/news20_ensemble.md         | 30 +++-------------------
 docs/gitbook/multiclass/news20_one-vs-the-rest.md  | 18 +------------
 .../multiclass/news20_one-vs-the-rest_dataset.md   |  8 ------
 docs/gitbook/recommend/news20_jaccard.md           | 10 +-------
 docs/gitbook/tips/ensemble_learning.md             | 22 ----------------
 9 files changed, 8 insertions(+), 104 deletions(-)

diff --git a/docs/gitbook/binaryclass/kdd2010a_dataset.md b/docs/gitbook/binaryclass/kdd2010a_dataset.md
index b5dcadf..46939d0 100644
--- a/docs/gitbook/binaryclass/kdd2010a_dataset.md
+++ b/docs/gitbook/binaryclass/kdd2010a_dataset.md
@@ -26,9 +26,6 @@
 ---
 # Define training/testing tables
 ```sql
-add jar ./tmp/hivemall.jar;
-source ./tmp/define-all.hive;
-
 create database kdd2010;
 use kdd2010;
 
diff --git a/docs/gitbook/binaryclass/kdd2010b_dataset.md b/docs/gitbook/binaryclass/kdd2010b_dataset.md
index 6ad71af..7313b13 100644
--- a/docs/gitbook/binaryclass/kdd2010b_dataset.md
+++ b/docs/gitbook/binaryclass/kdd2010b_dataset.md
@@ -26,9 +26,6 @@
 ---
 # Define training/testing tables
 ```sql
-add jar ./tmp/hivemall.jar;
-source ./tmp/define-all.hive;
-
 create database kdd2010;
 use kdd2010;
 
diff --git a/docs/gitbook/binaryclass/webspam_dataset.md b/docs/gitbook/binaryclass/webspam_dataset.md
index a0777c7..34ccfba 100644
--- a/docs/gitbook/binaryclass/webspam_dataset.md
+++ b/docs/gitbook/binaryclass/webspam_dataset.md
@@ -16,7 +16,7 @@
   specific language governing permissions and limitations
   under the License.
 -->
-        
+
 Get the dataset from 
 https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary.html#webspam
 
@@ -33,10 +33,6 @@ hadoop fs -put - /dataset/webspam/raw/
 create database webspam;
 use webspam;
 
-delete jar ./tmp/hivemall.jar;
-add jar ./tmp/hivemall.jar;
-source ./tmp/define-all.hive;
-
 create external table webspam_raw (
   rowid int,
   label int,
diff --git a/docs/gitbook/binaryclass/webspam_scw.md b/docs/gitbook/binaryclass/webspam_scw.md
index 067e8f2..bd7f4c5 100644
--- a/docs/gitbook/binaryclass/webspam_scw.md
+++ b/docs/gitbook/binaryclass/webspam_scw.md
@@ -16,20 +16,12 @@
   specific language governing permissions and limitations
   under the License.
 -->
-        
-# Preparation
-
-```
-use webspam;
-
-delete jar ./tmp/hivemall.jar;
-add jar ./tmp/hivemall.jar;
-source ./tmp/define-all.hive;
-```
 
 # PA1
 
 ```sql
+use webspam;
+
 drop table webspam_pa1_model1;
 create table webspam_pa1_model1 as
 select 
diff --git a/docs/gitbook/multiclass/news20_ensemble.md b/docs/gitbook/multiclass/news20_ensemble.md
index 7389a47..200fd68 100644
--- a/docs/gitbook/multiclass/news20_ensemble.md
+++ b/docs/gitbook/multiclass/news20_ensemble.md
@@ -16,22 +16,11 @@
   specific language governing permissions and limitations
   under the License.
 -->
-        
+
 This example explains how to run ensemble learning in Hivemall.   
 Two heads are better than one? Let's verify it by ensemble learning.
 
----
-
-## UDF preparation
-```sql
-delete jar /home/myui/tmp/hivemall.jar;
-add jar /home/myui/tmp/hivemall.jar;
-
-source /home/myui/tmp/define-all.hive;
-```
-
-[Case1] Model ensemble/mixing
-=======================
+# [Case1] Model ensemble/mixing
 
 ## training
 ```sql
@@ -117,15 +106,6 @@ where actual == predicted;
 
 > 0.8494866015527173
 
-## Cleaning
-
-```sql
-drop table news20mc_ensemble_model1;
-drop view news20mc_ensemble_predict1;
-drop view news20mc_ensemble_submit1;
-```
----
-
 Unfortunately, too many cooks spoil the broth in this case :-(
 
 | Algorithm | Accuracy |
@@ -135,11 +115,7 @@ Unfortunately, too many cooks spoil the broth in this case :-(
 | Ensemble(model) | 0.8494866015527173 |
 | CW |  0.850488354620586 |
 
-
----
-
-[Case2] Prediction ensemble
-=================
+# [Case2] Prediction ensemble
 
 ## prediction
 ```sql
diff --git a/docs/gitbook/multiclass/news20_one-vs-the-rest.md b/docs/gitbook/multiclass/news20_one-vs-the-rest.md
index d98329f..75640e7 100644
--- a/docs/gitbook/multiclass/news20_one-vs-the-rest.md
+++ b/docs/gitbook/multiclass/news20_one-vs-the-rest.md
@@ -16,16 +16,8 @@
   specific language governing permissions and limitations
   under the License.
 -->
-        
-A one-vs-the-rest classifier use the binary classifier for each class.
-
-## UDF preparation
-```sql
-delete jar /home/myui/tmp/hivemall.jar;
-add jar /home/myui/tmp/hivemall.jar;
 
-source /home/myui/tmp/define-all.hive;
-```
+A one-vs-the-rest classifier use the binary classifier for each class.
 
 ## training
 ```sql
@@ -334,14 +326,6 @@ where actual == predicted;
 
 > 0.8567493112947658
 
-## Cleaning
-
-```sql
-drop table news20_onevsrest_arow_model1;
-drop view news20_onevsrest_arow_predict1;
-drop view news20_onevsrest_arow_submit1;
-```
-
 | Algorithm | Accuracy |
 |:-----------|------------:|
 | AROW(multi-class) | 0.8474830954169797 |
diff --git a/docs/gitbook/multiclass/news20_one-vs-the-rest_dataset.md b/docs/gitbook/multiclass/news20_one-vs-the-rest_dataset.md
index 62bc397..639dcf3 100644
--- a/docs/gitbook/multiclass/news20_one-vs-the-rest_dataset.md
+++ b/docs/gitbook/multiclass/news20_one-vs-the-rest_dataset.md
@@ -20,14 +20,6 @@
 *One-vs-the-rest* is a multiclass classification method that uses binary classifiers independently for each class.
 http://en.wikipedia.org/wiki/Multiclass_classification#one_vs_all
 
-## UDF preparation
-```sql
-delete jar /home/myui/tmp/hivemall.jar;
-add jar /home/myui/tmp/hivemall.jar;
-
-source /home/myui/tmp/define-all.hive;
-```
-
 ## Dataset preparation for one-vs-the-rest classifiers
 
 ```sql
diff --git a/docs/gitbook/recommend/news20_jaccard.md b/docs/gitbook/recommend/news20_jaccard.md
index 0166ed5..ad80b6e 100644
--- a/docs/gitbook/recommend/news20_jaccard.md
+++ b/docs/gitbook/recommend/news20_jaccard.md
@@ -19,18 +19,10 @@
         
 List related (similar) articles for each article.
 
-# Preparation
+# Extract clusters
 ```sql
 use news20;
 
-delete jar /home/myui/tmp/hivemall.jar;
-add jar /home/myui/tmp/hivemall.jar;
-
-source /home/myui/tmp/define-all.hive;
-```
-
-# Extract clusters
-```sql
 set hivevar:hashes=100; -- Generate N sets of minhash values for each row (DEFAULT: 5)
 set hivevar:keygroups=2; -- Use K minhash value for generating a resulting value (DEFAULT: 2)
 
diff --git a/docs/gitbook/tips/ensemble_learning.md b/docs/gitbook/tips/ensemble_learning.md
index 2157a5b..54503b9 100644
--- a/docs/gitbook/tips/ensemble_learning.md
+++ b/docs/gitbook/tips/ensemble_learning.md
@@ -22,16 +22,6 @@ Two heads are better than one? Let's verify it by ensemble learning.
 
 <!-- toc -->
 
----
-
-## UDF preparation
-```sql
-delete jar /home/myui/tmp/hivemall.jar;
-add jar /home/myui/tmp/hivemall.jar;
-
-source /home/myui/tmp/define-all.hive;
-```
-
 # [Case1] Model ensemble/mixing
 
 ## training
@@ -118,15 +108,6 @@ where actual == predicted;
 
 > 0.8494866015527173
 
-## Cleaning
-
-```sql
-drop table news20mc_ensemble_model1;
-drop view news20mc_ensemble_predict1;
-drop view news20mc_ensemble_submit1;
-```
----
-
 Unfortunately, too many cooks spoil the broth in this case :-(
 
 | Algorithm | Accuracy |
@@ -136,9 +117,6 @@ Unfortunately, too many cooks spoil the broth in this case :-(
 | Ensemble(model) | 0.8494866015527173 |
 | CW |  0.850488354620586 |
 
-
----
-
 # [Case2] Prediction ensemble
 
 ## prediction