You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hivemall.apache.org by my...@apache.org on 2017/06/23 10:04:09 UTC
[34/41] incubator-hivemall-site git commit: Added descriptions about
Feature Pairing in the user guide
http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/ba518dab/userguide/binaryclass/webspam_dataset.html
----------------------------------------------------------------------
diff --git a/userguide/binaryclass/webspam_dataset.html b/userguide/binaryclass/webspam_dataset.html
index 8ca32f7..6394815 100644
--- a/userguide/binaryclass/webspam_dataset.html
+++ b/userguide/binaryclass/webspam_dataset.html
@@ -598,14 +598,30 @@
</li>
- <li class="chapter " data-level="3.5" data-path="../ft_engineering/tfidf.html">
+ <li class="chapter " data-level="3.5" data-path="../ft_engineering/pairing.html">
- <a href="../ft_engineering/tfidf.html">
+ <a href="../ft_engineering/pairing.html">
<b>3.5.</b>
- TF-IDF Calculation
+ FEATURE PAIRING
+
+ </a>
+
+
+
+ <ul class="articles">
+
+
+ <li class="chapter " data-level="3.5.1" data-path="../ft_engineering/polynomial.html">
+
+ <a href="../ft_engineering/polynomial.html">
+
+
+ <b>3.5.1.</b>
+
+ Polynomial Features
</a>
@@ -613,6 +629,11 @@
</li>
+
+ </ul>
+
+ </li>
+
<li class="chapter " data-level="3.6" data-path="../ft_engineering/ft_trans.html">
<a href="../ft_engineering/ft_trans.html">
@@ -664,6 +685,21 @@
</li>
+ <li class="chapter " data-level="3.7" data-path="../ft_engineering/tfidf.html">
+
+ <a href="../ft_engineering/tfidf.html">
+
+
+ <b>3.7.</b>
+
+ TF-IDF Calculation
+
+ </a>
+
+
+
+ </li>
+
@@ -761,7 +797,7 @@
- <li class="header">Part V - Prediction</li>
+ <li class="header">Part V - Supervised Learning</li>
@@ -780,27 +816,19 @@
</li>
- <li class="chapter " data-level="5.2" data-path="../regression/general.html">
-
- <a href="../regression/general.html">
-
-
- <b>5.2.</b>
-
- Regression
-
- </a>
-
-
- </li>
- <li class="chapter " data-level="5.3" data-path="general.html">
+
+ <li class="header">Part VI - Binary classification</li>
+
+
+
+ <li class="chapter " data-level="6.1" data-path="general.html">
<a href="general.html">
- <b>5.3.</b>
+ <b>6.1.</b>
Binary Classification
@@ -810,21 +838,14 @@
</li>
-
-
-
- <li class="header">Part VI - Binary classification tutorials</li>
-
-
-
- <li class="chapter " data-level="6.1" data-path="a9a.html">
+ <li class="chapter " data-level="6.2" data-path="a9a.html">
<a href="a9a.html">
- <b>6.1.</b>
+ <b>6.2.</b>
- a9a
+ a9a tutorial
</a>
@@ -833,12 +854,12 @@
<ul class="articles">
- <li class="chapter " data-level="6.1.1" data-path="a9a_dataset.html">
+ <li class="chapter " data-level="6.2.1" data-path="a9a_dataset.html">
<a href="a9a_dataset.html">
- <b>6.1.1.</b>
+ <b>6.2.1.</b>
Data preparation
@@ -848,12 +869,12 @@
</li>
- <li class="chapter " data-level="6.1.2" data-path="a9a_lr.html">
+ <li class="chapter " data-level="6.2.2" data-path="a9a_lr.html">
<a href="a9a_lr.html">
- <b>6.1.2.</b>
+ <b>6.2.2.</b>
Logistic Regression
@@ -863,12 +884,12 @@
</li>
- <li class="chapter " data-level="6.1.3" data-path="a9a_minibatch.html">
+ <li class="chapter " data-level="6.2.3" data-path="a9a_minibatch.html">
<a href="a9a_minibatch.html">
- <b>6.1.3.</b>
+ <b>6.2.3.</b>
Mini-batch Gradient Descent
@@ -883,14 +904,14 @@
</li>
- <li class="chapter " data-level="6.2" data-path="news20.html">
+ <li class="chapter " data-level="6.3" data-path="news20.html">
<a href="news20.html">
- <b>6.2.</b>
+ <b>6.3.</b>
- News20
+ News20 tutorial
</a>
@@ -899,12 +920,12 @@
<ul class="articles">
- <li class="chapter " data-level="6.2.1" data-path="news20_dataset.html">
+ <li class="chapter " data-level="6.3.1" data-path="news20_dataset.html">
<a href="news20_dataset.html">
- <b>6.2.1.</b>
+ <b>6.3.1.</b>
Data preparation
@@ -914,12 +935,12 @@
</li>
- <li class="chapter " data-level="6.2.2" data-path="news20_pa.html">
+ <li class="chapter " data-level="6.3.2" data-path="news20_pa.html">
<a href="news20_pa.html">
- <b>6.2.2.</b>
+ <b>6.3.2.</b>
Perceptron, Passive Aggressive
@@ -929,12 +950,12 @@
</li>
- <li class="chapter " data-level="6.2.3" data-path="news20_scw.html">
+ <li class="chapter " data-level="6.3.3" data-path="news20_scw.html">
<a href="news20_scw.html">
- <b>6.2.3.</b>
+ <b>6.3.3.</b>
CW, AROW, SCW
@@ -944,12 +965,12 @@
</li>
- <li class="chapter " data-level="6.2.4" data-path="news20_adagrad.html">
+ <li class="chapter " data-level="6.3.4" data-path="news20_adagrad.html">
<a href="news20_adagrad.html">
- <b>6.2.4.</b>
+ <b>6.3.4.</b>
AdaGradRDA, AdaGrad, AdaDelta
@@ -964,14 +985,14 @@
</li>
- <li class="chapter " data-level="6.3" data-path="kdd2010a.html">
+ <li class="chapter " data-level="6.4" data-path="kdd2010a.html">
<a href="kdd2010a.html">
- <b>6.3.</b>
+ <b>6.4.</b>
- KDD2010a
+ KDD2010a tutorial
</a>
@@ -980,12 +1001,12 @@
<ul class="articles">
- <li class="chapter " data-level="6.3.1" data-path="kdd2010a_dataset.html">
+ <li class="chapter " data-level="6.4.1" data-path="kdd2010a_dataset.html">
<a href="kdd2010a_dataset.html">
- <b>6.3.1.</b>
+ <b>6.4.1.</b>
Data preparation
@@ -995,12 +1016,12 @@
</li>
- <li class="chapter " data-level="6.3.2" data-path="kdd2010a_scw.html">
+ <li class="chapter " data-level="6.4.2" data-path="kdd2010a_scw.html">
<a href="kdd2010a_scw.html">
- <b>6.3.2.</b>
+ <b>6.4.2.</b>
PA, CW, AROW, SCW
@@ -1015,14 +1036,14 @@
</li>
- <li class="chapter " data-level="6.4" data-path="kdd2010b.html">
+ <li class="chapter " data-level="6.5" data-path="kdd2010b.html">
<a href="kdd2010b.html">
- <b>6.4.</b>
+ <b>6.5.</b>
- KDD2010b
+ KDD2010b tutorial
</a>
@@ -1031,12 +1052,12 @@
<ul class="articles">
- <li class="chapter " data-level="6.4.1" data-path="kdd2010b_dataset.html">
+ <li class="chapter " data-level="6.5.1" data-path="kdd2010b_dataset.html">
<a href="kdd2010b_dataset.html">
- <b>6.4.1.</b>
+ <b>6.5.1.</b>
Data preparation
@@ -1046,12 +1067,12 @@
</li>
- <li class="chapter " data-level="6.4.2" data-path="kdd2010b_arow.html">
+ <li class="chapter " data-level="6.5.2" data-path="kdd2010b_arow.html">
<a href="kdd2010b_arow.html">
- <b>6.4.2.</b>
+ <b>6.5.2.</b>
AROW
@@ -1066,14 +1087,14 @@
</li>
- <li class="chapter " data-level="6.5" data-path="webspam.html">
+ <li class="chapter " data-level="6.6" data-path="webspam.html">
<a href="webspam.html">
- <b>6.5.</b>
+ <b>6.6.</b>
- Webspam
+ Webspam tutorial
</a>
@@ -1082,12 +1103,12 @@
<ul class="articles">
- <li class="chapter active" data-level="6.5.1" data-path="webspam_dataset.html">
+ <li class="chapter active" data-level="6.6.1" data-path="webspam_dataset.html">
<a href="webspam_dataset.html">
- <b>6.5.1.</b>
+ <b>6.6.1.</b>
Data pareparation
@@ -1097,12 +1118,12 @@
</li>
- <li class="chapter " data-level="6.5.2" data-path="webspam_scw.html">
+ <li class="chapter " data-level="6.6.2" data-path="webspam_scw.html">
<a href="webspam_scw.html">
- <b>6.5.2.</b>
+ <b>6.6.2.</b>
PA1, AROW, SCW
@@ -1117,14 +1138,14 @@
</li>
- <li class="chapter " data-level="6.6" data-path="titanic_rf.html">
+ <li class="chapter " data-level="6.7" data-path="titanic_rf.html">
<a href="titanic_rf.html">
- <b>6.6.</b>
+ <b>6.7.</b>
- Kaggle Titanic
+ Kaggle Titanic tutorial
</a>
@@ -1135,7 +1156,7 @@
- <li class="header">Part VII - Multiclass classification tutorials</li>
+ <li class="header">Part VII - Multiclass classification</li>
@@ -1146,7 +1167,7 @@
<b>7.1.</b>
- News20 Multiclass
+ News20 Multiclass tutorial
</a>
@@ -1257,7 +1278,7 @@
<b>7.2.</b>
- Iris
+ Iris tutorial
</a>
@@ -1319,18 +1340,33 @@
- <li class="header">Part VIII - Regression tutorials</li>
+ <li class="header">Part VIII - Regression</li>
- <li class="chapter " data-level="8.1" data-path="../regression/e2006.html">
+ <li class="chapter " data-level="8.1" data-path="../regression/general.html">
- <a href="../regression/e2006.html">
+ <a href="../regression/general.html">
<b>8.1.</b>
- E2006-tfidf regression
+ Regression
+
+ </a>
+
+
+
+ </li>
+
+ <li class="chapter " data-level="8.2" data-path="../regression/e2006.html">
+
+ <a href="../regression/e2006.html">
+
+
+ <b>8.2.</b>
+
+ E2006-tfidf regression tutorial
</a>
@@ -1339,12 +1375,12 @@
<ul class="articles">
- <li class="chapter " data-level="8.1.1" data-path="../regression/e2006_dataset.html">
+ <li class="chapter " data-level="8.2.1" data-path="../regression/e2006_dataset.html">
<a href="../regression/e2006_dataset.html">
- <b>8.1.1.</b>
+ <b>8.2.1.</b>
Data preparation
@@ -1354,12 +1390,12 @@
</li>
- <li class="chapter " data-level="8.1.2" data-path="../regression/e2006_arow.html">
+ <li class="chapter " data-level="8.2.2" data-path="../regression/e2006_arow.html">
<a href="../regression/e2006_arow.html">
- <b>8.1.2.</b>
+ <b>8.2.2.</b>
Passive Aggressive, AROW
@@ -1374,14 +1410,14 @@
</li>
- <li class="chapter " data-level="8.2" data-path="../regression/kddcup12tr2.html">
+ <li class="chapter " data-level="8.3" data-path="../regression/kddcup12tr2.html">
<a href="../regression/kddcup12tr2.html">
- <b>8.2.</b>
+ <b>8.3.</b>
- KDDCup 2012 track 2 CTR prediction
+ KDDCup 2012 track 2 CTR prediction tutorial
</a>
@@ -1390,12 +1426,12 @@
<ul class="articles">
- <li class="chapter " data-level="8.2.1" data-path="../regression/kddcup12tr2_dataset.html">
+ <li class="chapter " data-level="8.3.1" data-path="../regression/kddcup12tr2_dataset.html">
<a href="../regression/kddcup12tr2_dataset.html">
- <b>8.2.1.</b>
+ <b>8.3.1.</b>
Data preparation
@@ -1405,12 +1441,12 @@
</li>
- <li class="chapter " data-level="8.2.2" data-path="../regression/kddcup12tr2_lr.html">
+ <li class="chapter " data-level="8.3.2" data-path="../regression/kddcup12tr2_lr.html">
<a href="../regression/kddcup12tr2_lr.html">
- <b>8.2.2.</b>
+ <b>8.3.2.</b>
Logistic Regression, Passive Aggressive
@@ -1420,12 +1456,12 @@
</li>
- <li class="chapter " data-level="8.2.3" data-path="../regression/kddcup12tr2_lr_amplify.html">
+ <li class="chapter " data-level="8.3.3" data-path="../regression/kddcup12tr2_lr_amplify.html">
<a href="../regression/kddcup12tr2_lr_amplify.html">
- <b>8.2.3.</b>
+ <b>8.3.3.</b>
Logistic Regression with Amplifier
@@ -1435,12 +1471,12 @@
</li>
- <li class="chapter " data-level="8.2.4" data-path="../regression/kddcup12tr2_adagrad.html">
+ <li class="chapter " data-level="8.3.4" data-path="../regression/kddcup12tr2_adagrad.html">
<a href="../regression/kddcup12tr2_adagrad.html">
- <b>8.2.4.</b>
+ <b>8.3.4.</b>
AdaGrad, AdaDelta
@@ -2199,7 +2235,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda
<script>
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"Data pareparation","level":"6.5.1","depth":2,"next":{"title":"PA1, AROW, SCW","level":"6.5.2","depth":2,"path":"binaryclass/webspam_scw.md","ref":"binaryclass/webspam_scw.md","articles":[]},"previous":{"title":"Webspam","level":"6.5","depth":1,"path":"binaryclass/webspam.md","ref":"binaryclass/webspam.md","articles":[{"title":"Data pareparation","level":"6.5.1","depth":2,"path":"binaryclass/webspam_dataset.md","ref":"binaryclass/webspam_dataset.md","articles":[]},{"title":"PA1, AROW, SCW","level":"6.5.2","depth":2,"path":"binaryclass/webspam_scw.md","ref":"binaryclass/webspam_scw.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.
css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://git
hub.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"binaryclass/webspam_dataset.md","mtime":"2017-02-08T07:24:57.000Z","type":"markdown"},"gitbook":{"version":"3.
2.2","time":"2017-06-15T10:33:21.138Z"},"basePath":"..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"Data pareparation","level":"6.6.1","depth":2,"next":{"title":"PA1, AROW, SCW","level":"6.6.2","depth":2,"path":"binaryclass/webspam_scw.md","ref":"binaryclass/webspam_scw.md","articles":[]},"previous":{"title":"Webspam tutorial","level":"6.6","depth":1,"path":"binaryclass/webspam.md","ref":"binaryclass/webspam.md","articles":[{"title":"Data pareparation","level":"6.6.1","depth":2,"path":"binaryclass/webspam_dataset.md","ref":"binaryclass/webspam_dataset.md","articles":[]},{"title":"PA1, AROW, SCW","level":"6.6.2","depth":2,"path":"binaryclass/webspam_scw.md","ref":"binaryclass/webspam_scw.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"sty
les/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"ht
tps://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"binaryclass/webspam_dataset.md","mtime":"2016-12-02T08:02:42.000Z","type":"markdown"},"gitbook":{"ver
sion":"3.2.2","time":"2017-06-23T09:59:20.878Z"},"basePath":"..","book":{"language":""}});
});
</script>
</div>
http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/ba518dab/userguide/binaryclass/webspam_scw.html
----------------------------------------------------------------------
diff --git a/userguide/binaryclass/webspam_scw.html b/userguide/binaryclass/webspam_scw.html
index 9881e15..551d072 100644
--- a/userguide/binaryclass/webspam_scw.html
+++ b/userguide/binaryclass/webspam_scw.html
@@ -598,14 +598,30 @@
</li>
- <li class="chapter " data-level="3.5" data-path="../ft_engineering/tfidf.html">
+ <li class="chapter " data-level="3.5" data-path="../ft_engineering/pairing.html">
- <a href="../ft_engineering/tfidf.html">
+ <a href="../ft_engineering/pairing.html">
<b>3.5.</b>
- TF-IDF Calculation
+ FEATURE PAIRING
+
+ </a>
+
+
+
+ <ul class="articles">
+
+
+ <li class="chapter " data-level="3.5.1" data-path="../ft_engineering/polynomial.html">
+
+ <a href="../ft_engineering/polynomial.html">
+
+
+ <b>3.5.1.</b>
+
+ Polynomial Features
</a>
@@ -613,6 +629,11 @@
</li>
+
+ </ul>
+
+ </li>
+
<li class="chapter " data-level="3.6" data-path="../ft_engineering/ft_trans.html">
<a href="../ft_engineering/ft_trans.html">
@@ -664,6 +685,21 @@
</li>
+ <li class="chapter " data-level="3.7" data-path="../ft_engineering/tfidf.html">
+
+ <a href="../ft_engineering/tfidf.html">
+
+
+ <b>3.7.</b>
+
+ TF-IDF Calculation
+
+ </a>
+
+
+
+ </li>
+
@@ -761,7 +797,7 @@
- <li class="header">Part V - Prediction</li>
+ <li class="header">Part V - Supervised Learning</li>
@@ -780,27 +816,19 @@
</li>
- <li class="chapter " data-level="5.2" data-path="../regression/general.html">
-
- <a href="../regression/general.html">
-
-
- <b>5.2.</b>
-
- Regression
-
- </a>
-
-
- </li>
- <li class="chapter " data-level="5.3" data-path="general.html">
+
+ <li class="header">Part VI - Binary classification</li>
+
+
+
+ <li class="chapter " data-level="6.1" data-path="general.html">
<a href="general.html">
- <b>5.3.</b>
+ <b>6.1.</b>
Binary Classification
@@ -810,21 +838,14 @@
</li>
-
-
-
- <li class="header">Part VI - Binary classification tutorials</li>
-
-
-
- <li class="chapter " data-level="6.1" data-path="a9a.html">
+ <li class="chapter " data-level="6.2" data-path="a9a.html">
<a href="a9a.html">
- <b>6.1.</b>
+ <b>6.2.</b>
- a9a
+ a9a tutorial
</a>
@@ -833,12 +854,12 @@
<ul class="articles">
- <li class="chapter " data-level="6.1.1" data-path="a9a_dataset.html">
+ <li class="chapter " data-level="6.2.1" data-path="a9a_dataset.html">
<a href="a9a_dataset.html">
- <b>6.1.1.</b>
+ <b>6.2.1.</b>
Data preparation
@@ -848,12 +869,12 @@
</li>
- <li class="chapter " data-level="6.1.2" data-path="a9a_lr.html">
+ <li class="chapter " data-level="6.2.2" data-path="a9a_lr.html">
<a href="a9a_lr.html">
- <b>6.1.2.</b>
+ <b>6.2.2.</b>
Logistic Regression
@@ -863,12 +884,12 @@
</li>
- <li class="chapter " data-level="6.1.3" data-path="a9a_minibatch.html">
+ <li class="chapter " data-level="6.2.3" data-path="a9a_minibatch.html">
<a href="a9a_minibatch.html">
- <b>6.1.3.</b>
+ <b>6.2.3.</b>
Mini-batch Gradient Descent
@@ -883,14 +904,14 @@
</li>
- <li class="chapter " data-level="6.2" data-path="news20.html">
+ <li class="chapter " data-level="6.3" data-path="news20.html">
<a href="news20.html">
- <b>6.2.</b>
+ <b>6.3.</b>
- News20
+ News20 tutorial
</a>
@@ -899,12 +920,12 @@
<ul class="articles">
- <li class="chapter " data-level="6.2.1" data-path="news20_dataset.html">
+ <li class="chapter " data-level="6.3.1" data-path="news20_dataset.html">
<a href="news20_dataset.html">
- <b>6.2.1.</b>
+ <b>6.3.1.</b>
Data preparation
@@ -914,12 +935,12 @@
</li>
- <li class="chapter " data-level="6.2.2" data-path="news20_pa.html">
+ <li class="chapter " data-level="6.3.2" data-path="news20_pa.html">
<a href="news20_pa.html">
- <b>6.2.2.</b>
+ <b>6.3.2.</b>
Perceptron, Passive Aggressive
@@ -929,12 +950,12 @@
</li>
- <li class="chapter " data-level="6.2.3" data-path="news20_scw.html">
+ <li class="chapter " data-level="6.3.3" data-path="news20_scw.html">
<a href="news20_scw.html">
- <b>6.2.3.</b>
+ <b>6.3.3.</b>
CW, AROW, SCW
@@ -944,12 +965,12 @@
</li>
- <li class="chapter " data-level="6.2.4" data-path="news20_adagrad.html">
+ <li class="chapter " data-level="6.3.4" data-path="news20_adagrad.html">
<a href="news20_adagrad.html">
- <b>6.2.4.</b>
+ <b>6.3.4.</b>
AdaGradRDA, AdaGrad, AdaDelta
@@ -964,14 +985,14 @@
</li>
- <li class="chapter " data-level="6.3" data-path="kdd2010a.html">
+ <li class="chapter " data-level="6.4" data-path="kdd2010a.html">
<a href="kdd2010a.html">
- <b>6.3.</b>
+ <b>6.4.</b>
- KDD2010a
+ KDD2010a tutorial
</a>
@@ -980,12 +1001,12 @@
<ul class="articles">
- <li class="chapter " data-level="6.3.1" data-path="kdd2010a_dataset.html">
+ <li class="chapter " data-level="6.4.1" data-path="kdd2010a_dataset.html">
<a href="kdd2010a_dataset.html">
- <b>6.3.1.</b>
+ <b>6.4.1.</b>
Data preparation
@@ -995,12 +1016,12 @@
</li>
- <li class="chapter " data-level="6.3.2" data-path="kdd2010a_scw.html">
+ <li class="chapter " data-level="6.4.2" data-path="kdd2010a_scw.html">
<a href="kdd2010a_scw.html">
- <b>6.3.2.</b>
+ <b>6.4.2.</b>
PA, CW, AROW, SCW
@@ -1015,14 +1036,14 @@
</li>
- <li class="chapter " data-level="6.4" data-path="kdd2010b.html">
+ <li class="chapter " data-level="6.5" data-path="kdd2010b.html">
<a href="kdd2010b.html">
- <b>6.4.</b>
+ <b>6.5.</b>
- KDD2010b
+ KDD2010b tutorial
</a>
@@ -1031,12 +1052,12 @@
<ul class="articles">
- <li class="chapter " data-level="6.4.1" data-path="kdd2010b_dataset.html">
+ <li class="chapter " data-level="6.5.1" data-path="kdd2010b_dataset.html">
<a href="kdd2010b_dataset.html">
- <b>6.4.1.</b>
+ <b>6.5.1.</b>
Data preparation
@@ -1046,12 +1067,12 @@
</li>
- <li class="chapter " data-level="6.4.2" data-path="kdd2010b_arow.html">
+ <li class="chapter " data-level="6.5.2" data-path="kdd2010b_arow.html">
<a href="kdd2010b_arow.html">
- <b>6.4.2.</b>
+ <b>6.5.2.</b>
AROW
@@ -1066,14 +1087,14 @@
</li>
- <li class="chapter " data-level="6.5" data-path="webspam.html">
+ <li class="chapter " data-level="6.6" data-path="webspam.html">
<a href="webspam.html">
- <b>6.5.</b>
+ <b>6.6.</b>
- Webspam
+ Webspam tutorial
</a>
@@ -1082,12 +1103,12 @@
<ul class="articles">
- <li class="chapter " data-level="6.5.1" data-path="webspam_dataset.html">
+ <li class="chapter " data-level="6.6.1" data-path="webspam_dataset.html">
<a href="webspam_dataset.html">
- <b>6.5.1.</b>
+ <b>6.6.1.</b>
Data pareparation
@@ -1097,12 +1118,12 @@
</li>
- <li class="chapter active" data-level="6.5.2" data-path="webspam_scw.html">
+ <li class="chapter active" data-level="6.6.2" data-path="webspam_scw.html">
<a href="webspam_scw.html">
- <b>6.5.2.</b>
+ <b>6.6.2.</b>
PA1, AROW, SCW
@@ -1117,14 +1138,14 @@
</li>
- <li class="chapter " data-level="6.6" data-path="titanic_rf.html">
+ <li class="chapter " data-level="6.7" data-path="titanic_rf.html">
<a href="titanic_rf.html">
- <b>6.6.</b>
+ <b>6.7.</b>
- Kaggle Titanic
+ Kaggle Titanic tutorial
</a>
@@ -1135,7 +1156,7 @@
- <li class="header">Part VII - Multiclass classification tutorials</li>
+ <li class="header">Part VII - Multiclass classification</li>
@@ -1146,7 +1167,7 @@
<b>7.1.</b>
- News20 Multiclass
+ News20 Multiclass tutorial
</a>
@@ -1257,7 +1278,7 @@
<b>7.2.</b>
- Iris
+ Iris tutorial
</a>
@@ -1319,18 +1340,33 @@
- <li class="header">Part VIII - Regression tutorials</li>
+ <li class="header">Part VIII - Regression</li>
- <li class="chapter " data-level="8.1" data-path="../regression/e2006.html">
+ <li class="chapter " data-level="8.1" data-path="../regression/general.html">
- <a href="../regression/e2006.html">
+ <a href="../regression/general.html">
<b>8.1.</b>
- E2006-tfidf regression
+ Regression
+
+ </a>
+
+
+
+ </li>
+
+ <li class="chapter " data-level="8.2" data-path="../regression/e2006.html">
+
+ <a href="../regression/e2006.html">
+
+
+ <b>8.2.</b>
+
+ E2006-tfidf regression tutorial
</a>
@@ -1339,12 +1375,12 @@
<ul class="articles">
- <li class="chapter " data-level="8.1.1" data-path="../regression/e2006_dataset.html">
+ <li class="chapter " data-level="8.2.1" data-path="../regression/e2006_dataset.html">
<a href="../regression/e2006_dataset.html">
- <b>8.1.1.</b>
+ <b>8.2.1.</b>
Data preparation
@@ -1354,12 +1390,12 @@
</li>
- <li class="chapter " data-level="8.1.2" data-path="../regression/e2006_arow.html">
+ <li class="chapter " data-level="8.2.2" data-path="../regression/e2006_arow.html">
<a href="../regression/e2006_arow.html">
- <b>8.1.2.</b>
+ <b>8.2.2.</b>
Passive Aggressive, AROW
@@ -1374,14 +1410,14 @@
</li>
- <li class="chapter " data-level="8.2" data-path="../regression/kddcup12tr2.html">
+ <li class="chapter " data-level="8.3" data-path="../regression/kddcup12tr2.html">
<a href="../regression/kddcup12tr2.html">
- <b>8.2.</b>
+ <b>8.3.</b>
- KDDCup 2012 track 2 CTR prediction
+ KDDCup 2012 track 2 CTR prediction tutorial
</a>
@@ -1390,12 +1426,12 @@
<ul class="articles">
- <li class="chapter " data-level="8.2.1" data-path="../regression/kddcup12tr2_dataset.html">
+ <li class="chapter " data-level="8.3.1" data-path="../regression/kddcup12tr2_dataset.html">
<a href="../regression/kddcup12tr2_dataset.html">
- <b>8.2.1.</b>
+ <b>8.3.1.</b>
Data preparation
@@ -1405,12 +1441,12 @@
</li>
- <li class="chapter " data-level="8.2.2" data-path="../regression/kddcup12tr2_lr.html">
+ <li class="chapter " data-level="8.3.2" data-path="../regression/kddcup12tr2_lr.html">
<a href="../regression/kddcup12tr2_lr.html">
- <b>8.2.2.</b>
+ <b>8.3.2.</b>
Logistic Regression, Passive Aggressive
@@ -1420,12 +1456,12 @@
</li>
- <li class="chapter " data-level="8.2.3" data-path="../regression/kddcup12tr2_lr_amplify.html">
+ <li class="chapter " data-level="8.3.3" data-path="../regression/kddcup12tr2_lr_amplify.html">
<a href="../regression/kddcup12tr2_lr_amplify.html">
- <b>8.2.3.</b>
+ <b>8.3.3.</b>
Logistic Regression with Amplifier
@@ -1435,12 +1471,12 @@
</li>
- <li class="chapter " data-level="8.2.4" data-path="../regression/kddcup12tr2_adagrad.html">
+ <li class="chapter " data-level="8.3.4" data-path="../regression/kddcup12tr2_adagrad.html">
<a href="../regression/kddcup12tr2_adagrad.html">
- <b>8.2.4.</b>
+ <b>8.3.4.</b>
AdaGrad, AdaDelta
@@ -2259,7 +2295,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda
<script>
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"PA1, AROW, SCW","level":"6.5.2","depth":2,"next":{"title":"Kaggle Titanic","level":"6.6","depth":1,"path":"binaryclass/titanic_rf.md","ref":"binaryclass/titanic_rf.md","articles":[]},"previous":{"title":"Data pareparation","level":"6.5.1","depth":2,"path":"binaryclass/webspam_dataset.md","ref":"binaryclass/webspam_dataset.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"
splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{
"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"binaryclass/webspam_scw.md","mtime":"2017-02-08T07:24:57.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2017-06-15T10:33:21.138Z"},"basePath":"..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"PA1, AROW, SCW","level":"6.6.2","depth":2,"next":{"title":"Kaggle Titanic tutorial","level":"6.7","depth":1,"path":"binaryclass/titanic_rf.md","ref":"binaryclass/titanic_rf.md","articles":[]},"previous":{"title":"Data pareparation","level":"6.6.1","depth":2,"path":"binaryclass/webspam_dataset.md","ref":"binaryclass/webspam_dataset.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hive
mall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"an
chorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"binaryclass/webspam_scw.md","mtime":"2016-12-02T08:02:42.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2017-06-23T09:59:20.878Z"},"basePath":"..","book":{"language":""}});
});
</script>
</div>
http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/ba518dab/userguide/clustering/lda.html
----------------------------------------------------------------------
diff --git a/userguide/clustering/lda.html b/userguide/clustering/lda.html
index 92eea38..a14aa18 100644
--- a/userguide/clustering/lda.html
+++ b/userguide/clustering/lda.html
@@ -598,14 +598,30 @@
</li>
- <li class="chapter " data-level="3.5" data-path="../ft_engineering/tfidf.html">
+ <li class="chapter " data-level="3.5" data-path="../ft_engineering/pairing.html">
- <a href="../ft_engineering/tfidf.html">
+ <a href="../ft_engineering/pairing.html">
<b>3.5.</b>
- TF-IDF Calculation
+ FEATURE PAIRING
+
+ </a>
+
+
+
+ <ul class="articles">
+
+
+ <li class="chapter " data-level="3.5.1" data-path="../ft_engineering/polynomial.html">
+
+ <a href="../ft_engineering/polynomial.html">
+
+
+ <b>3.5.1.</b>
+
+ Polynomial Features
</a>
@@ -613,6 +629,11 @@
</li>
+
+ </ul>
+
+ </li>
+
<li class="chapter " data-level="3.6" data-path="../ft_engineering/ft_trans.html">
<a href="../ft_engineering/ft_trans.html">
@@ -664,6 +685,21 @@
</li>
+ <li class="chapter " data-level="3.7" data-path="../ft_engineering/tfidf.html">
+
+ <a href="../ft_engineering/tfidf.html">
+
+
+ <b>3.7.</b>
+
+ TF-IDF Calculation
+
+ </a>
+
+
+
+ </li>
+
@@ -761,7 +797,7 @@
- <li class="header">Part V - Prediction</li>
+ <li class="header">Part V - Supervised Learning</li>
@@ -780,27 +816,19 @@
</li>
- <li class="chapter " data-level="5.2" data-path="../regression/general.html">
-
- <a href="../regression/general.html">
-
-
- <b>5.2.</b>
-
- Regression
-
- </a>
-
-
- </li>
- <li class="chapter " data-level="5.3" data-path="../binaryclass/general.html">
+
+ <li class="header">Part VI - Binary classification</li>
+
+
+
+ <li class="chapter " data-level="6.1" data-path="../binaryclass/general.html">
<a href="../binaryclass/general.html">
- <b>5.3.</b>
+ <b>6.1.</b>
Binary Classification
@@ -810,21 +838,14 @@
</li>
-
-
-
- <li class="header">Part VI - Binary classification tutorials</li>
-
-
-
- <li class="chapter " data-level="6.1" data-path="../binaryclass/a9a.html">
+ <li class="chapter " data-level="6.2" data-path="../binaryclass/a9a.html">
<a href="../binaryclass/a9a.html">
- <b>6.1.</b>
+ <b>6.2.</b>
- a9a
+ a9a tutorial
</a>
@@ -833,12 +854,12 @@
<ul class="articles">
- <li class="chapter " data-level="6.1.1" data-path="../binaryclass/a9a_dataset.html">
+ <li class="chapter " data-level="6.2.1" data-path="../binaryclass/a9a_dataset.html">
<a href="../binaryclass/a9a_dataset.html">
- <b>6.1.1.</b>
+ <b>6.2.1.</b>
Data preparation
@@ -848,12 +869,12 @@
</li>
- <li class="chapter " data-level="6.1.2" data-path="../binaryclass/a9a_lr.html">
+ <li class="chapter " data-level="6.2.2" data-path="../binaryclass/a9a_lr.html">
<a href="../binaryclass/a9a_lr.html">
- <b>6.1.2.</b>
+ <b>6.2.2.</b>
Logistic Regression
@@ -863,12 +884,12 @@
</li>
- <li class="chapter " data-level="6.1.3" data-path="../binaryclass/a9a_minibatch.html">
+ <li class="chapter " data-level="6.2.3" data-path="../binaryclass/a9a_minibatch.html">
<a href="../binaryclass/a9a_minibatch.html">
- <b>6.1.3.</b>
+ <b>6.2.3.</b>
Mini-batch Gradient Descent
@@ -883,14 +904,14 @@
</li>
- <li class="chapter " data-level="6.2" data-path="../binaryclass/news20.html">
+ <li class="chapter " data-level="6.3" data-path="../binaryclass/news20.html">
<a href="../binaryclass/news20.html">
- <b>6.2.</b>
+ <b>6.3.</b>
- News20
+ News20 tutorial
</a>
@@ -899,12 +920,12 @@
<ul class="articles">
- <li class="chapter " data-level="6.2.1" data-path="../binaryclass/news20_dataset.html">
+ <li class="chapter " data-level="6.3.1" data-path="../binaryclass/news20_dataset.html">
<a href="../binaryclass/news20_dataset.html">
- <b>6.2.1.</b>
+ <b>6.3.1.</b>
Data preparation
@@ -914,12 +935,12 @@
</li>
- <li class="chapter " data-level="6.2.2" data-path="../binaryclass/news20_pa.html">
+ <li class="chapter " data-level="6.3.2" data-path="../binaryclass/news20_pa.html">
<a href="../binaryclass/news20_pa.html">
- <b>6.2.2.</b>
+ <b>6.3.2.</b>
Perceptron, Passive Aggressive
@@ -929,12 +950,12 @@
</li>
- <li class="chapter " data-level="6.2.3" data-path="../binaryclass/news20_scw.html">
+ <li class="chapter " data-level="6.3.3" data-path="../binaryclass/news20_scw.html">
<a href="../binaryclass/news20_scw.html">
- <b>6.2.3.</b>
+ <b>6.3.3.</b>
CW, AROW, SCW
@@ -944,12 +965,12 @@
</li>
- <li class="chapter " data-level="6.2.4" data-path="../binaryclass/news20_adagrad.html">
+ <li class="chapter " data-level="6.3.4" data-path="../binaryclass/news20_adagrad.html">
<a href="../binaryclass/news20_adagrad.html">
- <b>6.2.4.</b>
+ <b>6.3.4.</b>
AdaGradRDA, AdaGrad, AdaDelta
@@ -964,14 +985,14 @@
</li>
- <li class="chapter " data-level="6.3" data-path="../binaryclass/kdd2010a.html">
+ <li class="chapter " data-level="6.4" data-path="../binaryclass/kdd2010a.html">
<a href="../binaryclass/kdd2010a.html">
- <b>6.3.</b>
+ <b>6.4.</b>
- KDD2010a
+ KDD2010a tutorial
</a>
@@ -980,12 +1001,12 @@
<ul class="articles">
- <li class="chapter " data-level="6.3.1" data-path="../binaryclass/kdd2010a_dataset.html">
+ <li class="chapter " data-level="6.4.1" data-path="../binaryclass/kdd2010a_dataset.html">
<a href="../binaryclass/kdd2010a_dataset.html">
- <b>6.3.1.</b>
+ <b>6.4.1.</b>
Data preparation
@@ -995,12 +1016,12 @@
</li>
- <li class="chapter " data-level="6.3.2" data-path="../binaryclass/kdd2010a_scw.html">
+ <li class="chapter " data-level="6.4.2" data-path="../binaryclass/kdd2010a_scw.html">
<a href="../binaryclass/kdd2010a_scw.html">
- <b>6.3.2.</b>
+ <b>6.4.2.</b>
PA, CW, AROW, SCW
@@ -1015,14 +1036,14 @@
</li>
- <li class="chapter " data-level="6.4" data-path="../binaryclass/kdd2010b.html">
+ <li class="chapter " data-level="6.5" data-path="../binaryclass/kdd2010b.html">
<a href="../binaryclass/kdd2010b.html">
- <b>6.4.</b>
+ <b>6.5.</b>
- KDD2010b
+ KDD2010b tutorial
</a>
@@ -1031,12 +1052,12 @@
<ul class="articles">
- <li class="chapter " data-level="6.4.1" data-path="../binaryclass/kdd2010b_dataset.html">
+ <li class="chapter " data-level="6.5.1" data-path="../binaryclass/kdd2010b_dataset.html">
<a href="../binaryclass/kdd2010b_dataset.html">
- <b>6.4.1.</b>
+ <b>6.5.1.</b>
Data preparation
@@ -1046,12 +1067,12 @@
</li>
- <li class="chapter " data-level="6.4.2" data-path="../binaryclass/kdd2010b_arow.html">
+ <li class="chapter " data-level="6.5.2" data-path="../binaryclass/kdd2010b_arow.html">
<a href="../binaryclass/kdd2010b_arow.html">
- <b>6.4.2.</b>
+ <b>6.5.2.</b>
AROW
@@ -1066,14 +1087,14 @@
</li>
- <li class="chapter " data-level="6.5" data-path="../binaryclass/webspam.html">
+ <li class="chapter " data-level="6.6" data-path="../binaryclass/webspam.html">
<a href="../binaryclass/webspam.html">
- <b>6.5.</b>
+ <b>6.6.</b>
- Webspam
+ Webspam tutorial
</a>
@@ -1082,12 +1103,12 @@
<ul class="articles">
- <li class="chapter " data-level="6.5.1" data-path="../binaryclass/webspam_dataset.html">
+ <li class="chapter " data-level="6.6.1" data-path="../binaryclass/webspam_dataset.html">
<a href="../binaryclass/webspam_dataset.html">
- <b>6.5.1.</b>
+ <b>6.6.1.</b>
Data pareparation
@@ -1097,12 +1118,12 @@
</li>
- <li class="chapter " data-level="6.5.2" data-path="../binaryclass/webspam_scw.html">
+ <li class="chapter " data-level="6.6.2" data-path="../binaryclass/webspam_scw.html">
<a href="../binaryclass/webspam_scw.html">
- <b>6.5.2.</b>
+ <b>6.6.2.</b>
PA1, AROW, SCW
@@ -1117,14 +1138,14 @@
</li>
- <li class="chapter " data-level="6.6" data-path="../binaryclass/titanic_rf.html">
+ <li class="chapter " data-level="6.7" data-path="../binaryclass/titanic_rf.html">
<a href="../binaryclass/titanic_rf.html">
- <b>6.6.</b>
+ <b>6.7.</b>
- Kaggle Titanic
+ Kaggle Titanic tutorial
</a>
@@ -1135,7 +1156,7 @@
- <li class="header">Part VII - Multiclass classification tutorials</li>
+ <li class="header">Part VII - Multiclass classification</li>
@@ -1146,7 +1167,7 @@
<b>7.1.</b>
- News20 Multiclass
+ News20 Multiclass tutorial
</a>
@@ -1257,7 +1278,7 @@
<b>7.2.</b>
- Iris
+ Iris tutorial
</a>
@@ -1319,18 +1340,33 @@
- <li class="header">Part VIII - Regression tutorials</li>
+ <li class="header">Part VIII - Regression</li>
- <li class="chapter " data-level="8.1" data-path="../regression/e2006.html">
+ <li class="chapter " data-level="8.1" data-path="../regression/general.html">
- <a href="../regression/e2006.html">
+ <a href="../regression/general.html">
<b>8.1.</b>
- E2006-tfidf regression
+ Regression
+
+ </a>
+
+
+
+ </li>
+
+ <li class="chapter " data-level="8.2" data-path="../regression/e2006.html">
+
+ <a href="../regression/e2006.html">
+
+
+ <b>8.2.</b>
+
+ E2006-tfidf regression tutorial
</a>
@@ -1339,12 +1375,12 @@
<ul class="articles">
- <li class="chapter " data-level="8.1.1" data-path="../regression/e2006_dataset.html">
+ <li class="chapter " data-level="8.2.1" data-path="../regression/e2006_dataset.html">
<a href="../regression/e2006_dataset.html">
- <b>8.1.1.</b>
+ <b>8.2.1.</b>
Data preparation
@@ -1354,12 +1390,12 @@
</li>
- <li class="chapter " data-level="8.1.2" data-path="../regression/e2006_arow.html">
+ <li class="chapter " data-level="8.2.2" data-path="../regression/e2006_arow.html">
<a href="../regression/e2006_arow.html">
- <b>8.1.2.</b>
+ <b>8.2.2.</b>
Passive Aggressive, AROW
@@ -1374,14 +1410,14 @@
</li>
- <li class="chapter " data-level="8.2" data-path="../regression/kddcup12tr2.html">
+ <li class="chapter " data-level="8.3" data-path="../regression/kddcup12tr2.html">
<a href="../regression/kddcup12tr2.html">
- <b>8.2.</b>
+ <b>8.3.</b>
- KDDCup 2012 track 2 CTR prediction
+ KDDCup 2012 track 2 CTR prediction tutorial
</a>
@@ -1390,12 +1426,12 @@
<ul class="articles">
- <li class="chapter " data-level="8.2.1" data-path="../regression/kddcup12tr2_dataset.html">
+ <li class="chapter " data-level="8.3.1" data-path="../regression/kddcup12tr2_dataset.html">
<a href="../regression/kddcup12tr2_dataset.html">
- <b>8.2.1.</b>
+ <b>8.3.1.</b>
Data preparation
@@ -1405,12 +1441,12 @@
</li>
- <li class="chapter " data-level="8.2.2" data-path="../regression/kddcup12tr2_lr.html">
+ <li class="chapter " data-level="8.3.2" data-path="../regression/kddcup12tr2_lr.html">
<a href="../regression/kddcup12tr2_lr.html">
- <b>8.2.2.</b>
+ <b>8.3.2.</b>
Logistic Regression, Passive Aggressive
@@ -1420,12 +1456,12 @@
</li>
- <li class="chapter " data-level="8.2.3" data-path="../regression/kddcup12tr2_lr_amplify.html">
+ <li class="chapter " data-level="8.3.3" data-path="../regression/kddcup12tr2_lr_amplify.html">
<a href="../regression/kddcup12tr2_lr_amplify.html">
- <b>8.2.3.</b>
+ <b>8.3.3.</b>
Logistic Regression with Amplifier
@@ -1435,12 +1471,12 @@
</li>
- <li class="chapter " data-level="8.2.4" data-path="../regression/kddcup12tr2_adagrad.html">
+ <li class="chapter " data-level="8.3.4" data-path="../regression/kddcup12tr2_adagrad.html">
<a href="../regression/kddcup12tr2_adagrad.html">
- <b>8.2.4.</b>
+ <b>8.3.4.</b>
AdaGrad, AdaDelta
@@ -2119,13 +2155,15 @@
<span class="hljs-keyword">select</span>
docid,
feature(word, <span class="hljs-keyword">count</span>(word)) <span class="hljs-keyword">as</span> word_count
- <span class="hljs-keyword">from</span> docs t1 LATERAL <span class="hljs-keyword">VIEW</span> explode(tokenize(doc, <span class="hljs-literal">true</span>)) t2 <span class="hljs-keyword">as</span> word
+ <span class="hljs-keyword">from</span>
+ docs t1
+ LATERAL <span class="hljs-keyword">VIEW</span> explode(tokenize(doc, <span class="hljs-literal">true</span>)) t2 <span class="hljs-keyword">as</span> word
<span class="hljs-keyword">where</span>
<span class="hljs-keyword">not</span> is_stopword(word)
<span class="hljs-keyword">group</span> <span class="hljs-keyword">by</span>
docid, word
)
-<span class="hljs-keyword">select</span> docid, collect_set(word_count) <span class="hljs-keyword">as</span> feature
+<span class="hljs-keyword">select</span> docid, collect_list(word_count) <span class="hljs-keyword">as</span> features
<span class="hljs-keyword">from</span> word_counts
<span class="hljs-keyword">group</span> <span class="hljs-keyword">by</span> docid
;
@@ -2134,7 +2172,7 @@
<thead>
<tr>
<th style="text-align:center">docid</th>
-<th style="text-align:left">feature</th>
+<th style="text-align:left">features</th>
</tr>
</thead>
<tbody>
@@ -2160,35 +2198,38 @@
<span class="hljs-keyword">not</span> is_stopword(word)
<span class="hljs-keyword">group</span> <span class="hljs-keyword">by</span>
docid, word
-)
-<span class="hljs-keyword">select</span>
- train_lda(feature, <span class="hljs-string">"-topics 2 -iter 20"</span>) <span class="hljs-keyword">as</span> (label, word, lambda)
-<span class="hljs-keyword">from</span> (
- <span class="hljs-keyword">select</span> docid, collect_set(word_count) <span class="hljs-keyword">as</span> feature
+),
+<span class="hljs-keyword">input</span> <span class="hljs-keyword">as</span> (
+ <span class="hljs-keyword">select</span> docid, collect_list(word_count) <span class="hljs-keyword">as</span> features
<span class="hljs-keyword">from</span> word_counts
<span class="hljs-keyword">group</span> <span class="hljs-keyword">by</span> docid
- <span class="hljs-keyword">order</span> <span class="hljs-keyword">by</span> docid
-) t
+)
+<span class="hljs-keyword">select</span>
+ train_lda(features, <span class="hljs-string">'-topics 2 -iter 20'</span>) <span class="hljs-keyword">as</span> (label, word, lambda)
+<span class="hljs-keyword">from</span>
+ <span class="hljs-keyword">input</span>
;
</code></pre>
<p>Here, an option <code>-topics 2</code> specifies the number of topics we assume in the set of documents.</p>
<p>Notice that <code>order by docid</code> ensures building a LDA model precisely in a single node. In case that you like to launch <code>train_lda</code> in parallel, following query hopefully returns similar (but might be slightly approximated) result:</p>
<pre><code class="lang-sql">with word_counts as (
<span class="hljs-comment">-- same as above</span>
+),
+input as (
+ <span class="hljs-keyword">select</span> docid, collect_list(f) <span class="hljs-keyword">as</span> features
+ <span class="hljs-keyword">from</span> word_counts
+ <span class="hljs-keyword">group</span> <span class="hljs-keyword">by</span> docid
)
<span class="hljs-keyword">select</span>
label, word, <span class="hljs-keyword">avg</span>(lambda) <span class="hljs-keyword">as</span> lambda
<span class="hljs-keyword">from</span> (
<span class="hljs-keyword">select</span>
- train_lda(feature, <span class="hljs-string">"-topics 2 -iter 20"</span>) <span class="hljs-keyword">as</span> (label, word, lambda)
- <span class="hljs-keyword">from</span> (
- <span class="hljs-keyword">select</span> docid, collect_set(f) <span class="hljs-keyword">as</span> feature
- <span class="hljs-keyword">from</span> word_counts
- <span class="hljs-keyword">group</span> <span class="hljs-keyword">by</span> docid
- ) t1
+ train_lda(features, <span class="hljs-string">'-topics 2 -iter 20'</span>) <span class="hljs-keyword">as</span> (label, word, lambda)
+ <span class="hljs-keyword">from</span>
+ <span class="hljs-keyword">input</span>
) t2
<span class="hljs-keyword">group</span> <span class="hljs-keyword">by</span> label, word
-<span class="hljs-keyword">order</span> <span class="hljs-keyword">by</span> lambda <span class="hljs-keyword">desc</span>
+<span class="hljs-comment">-- order by lambda desc -- ordering is optional</span>
;
</code></pre>
<p>Eventually, a new table <code>lda_model</code> is generated as shown below:</p>
@@ -2303,7 +2344,9 @@
docid,
word,
<span class="hljs-keyword">count</span>(word) <span class="hljs-keyword">as</span> <span class="hljs-keyword">value</span>
- <span class="hljs-keyword">from</span> docs t1 LATERAL <span class="hljs-keyword">VIEW</span> explode(tokenize(doc, <span class="hljs-literal">true</span>)) t2 <span class="hljs-keyword">as</span> word
+ <span class="hljs-keyword">from</span>
+ docs t1
+ LATERAL <span class="hljs-keyword">VIEW</span> explode(tokenize(doc, <span class="hljs-literal">true</span>)) t2 <span class="hljs-keyword">as</span> word
<span class="hljs-keyword">where</span>
<span class="hljs-keyword">not</span> is_stopword(word)
<span class="hljs-keyword">group</span> <span class="hljs-keyword">by</span>
@@ -2311,7 +2354,7 @@
)
<span class="hljs-keyword">select</span>
t.docid,
- lda_predict(t.word, t.<span class="hljs-keyword">value</span>, m.label, m.lambda, <span class="hljs-string">"-topics 2"</span>) <span class="hljs-keyword">as</span> probabilities
+ lda_predict(t.word, t.<span class="hljs-keyword">value</span>, m.label, m.lambda, <span class="hljs-string">'-topics 2'</span>) <span class="hljs-keyword">as</span> probabilities
<span class="hljs-keyword">from</span>
<span class="hljs-keyword">test</span> t
<span class="hljs-keyword">JOIN</span> lda_model m <span class="hljs-keyword">ON</span> (t.word = m.word)
@@ -2340,8 +2383,7 @@
<p>Importantly, an option <code>-topics</code> is expected to be the same value as you set for training.</p>
<p>Since the probabilities are sorted in descending order, a label of the most promising topic is easily obtained as:</p>
<pre><code class="lang-sql"><span class="hljs-keyword">select</span> docid, probabilities[<span class="hljs-number">0</span>].label
-<span class="hljs-keyword">from</span> topic
-;
+<span class="hljs-keyword">from</span> topic;
</code></pre>
<table>
<thead>
@@ -2417,7 +2459,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda
<script>
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"Latent Dirichlet Allocation","level":"11.1","depth":1,"next":{"title":"Probabilistic Latent Semantic Analysis","level":"11.2","depth":1,"path":"clustering/plsa.md","ref":"clustering/plsa.md","articles":[]},"previous":{"title":"ChangeFinder: Detecting Outlier and Change-Point Simultaneously","level":"10.3","depth":1,"path":"anomaly/changefinder.md","ref":"anomaly/changefinder.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"u
rl":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","pri
nt":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"clustering/lda.md","mtime":"2017-05-30T05:53:27.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2017-06-15T10:33:21.138Z"},"basePath":"..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"Latent Dirichlet Allocation","level":"11.1","depth":1,"next":{"title":"Probabilistic Latent Semantic Analysis","level":"11.2","depth":1,"path":"clustering/plsa.md","ref":"clustering/plsa.md","articles":[]},"previous":{"title":"ChangeFinder: Detecting Outlier and Change-Point Simultaneously","level":"10.3","depth":1,"path":"anomaly/changefinder.md","ref":"anomaly/changefinder.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"u
rl":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","pri
nt":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"clustering/lda.md","mtime":"2017-06-23T09:56:22.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2017-06-23T09:59:20.878Z"},"basePath":"..","book":{"language":""}});
});
</script>
</div>