You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by nk...@apache.org on 2018/10/15 18:48:49 UTC

[14/51] [partial] madlib-site git commit: Doc: Add v1.15.1 documentation

http://git-wip-us.apache.org/repos/asf/madlib-site/blob/af0e5f14/docs/v1.15.1/group__grp__ordinal.html
----------------------------------------------------------------------
diff --git a/docs/v1.15.1/group__grp__ordinal.html b/docs/v1.15.1/group__grp__ordinal.html
new file mode 100644
index 0000000..c4e9251
--- /dev/null
+++ b/docs/v1.15.1/group__grp__ordinal.html
@@ -0,0 +1,477 @@
+<!-- HTML header for doxygen 1.8.4-->
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.14"/>
+<meta name="keywords" content="madlib,postgres,greenplum,machine learning,data mining,deep learning,ensemble methods,data science,market basket analysis,affinity analysis,pca,lda,regression,elastic net,huber white,proportional hazards,k-means,latent dirichlet allocation,bayes,support vector machines,svm"/>
+<title>MADlib: Ordinal Regression</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="navtree.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="resize.js"></script>
+<script type="text/javascript" src="navtreedata.js"></script>
+<script type="text/javascript" src="navtree.js"></script>
+<script type="text/javascript">
+/* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt GPL-v2 */
+  $(document).ready(initResizable);
+/* @license-end */</script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/searchdata.js"></script>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+/* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt GPL-v2 */
+  $(document).ready(function() { init_search(); });
+/* @license-end */
+</script>
+<script type="text/x-mathjax-config">
+  MathJax.Hub.Config({
+    extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
+    jax: ["input/TeX","output/HTML-CSS"],
+});
+</script><script type="text/javascript" async src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.2/MathJax.js"></script>
+<!-- hack in the navigation tree -->
+<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
+<!-- google analytics -->
+<script>
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+  ga('create', 'UA-45382226-1', 'madlib.apache.org');
+  ga('send', 'pageview');
+</script>
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td id="projectlogo"><a href="http://madlib.apache.org"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">
+   <span id="projectnumber">1.15.1</span>
+   </div>
+   <div id="projectbrief">User Documentation for Apache MADlib</div>
+  </td>
+   <td>        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+</td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.14 -->
+<script type="text/javascript">
+/* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt GPL-v2 */
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+/* @license-end */
+</script>
+</div><!-- top -->
+<div id="side-nav" class="ui-resizable side-nav-resizable">
+  <div id="nav-tree">
+    <div id="nav-tree-contents">
+      <div id="nav-sync" class="sync"></div>
+    </div>
+  </div>
+  <div id="splitbar" style="-moz-user-select:none;" 
+       class="ui-resizable-handle">
+  </div>
+</div>
+<script type="text/javascript">
+/* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt GPL-v2 */
+$(document).ready(function(){initNavTree('group__grp__ordinal.html','');});
+/* @license-end */
+</script>
+<div id="doc-content">
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+</div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+<div class="header">
+  <div class="headertitle">
+<div class="title">Ordinal Regression<div class="ingroups"><a class="el" href="group__grp__super.html">Supervised Learning</a> &raquo; <a class="el" href="group__grp__regml.html">Regression Models</a></div></div>  </div>
+</div><!--header-->
+<div class="contents">
+<div class="toc"><b>Contents</b> <ul>
+<li class="level1">
+<a href="#train">Training Function</a> </li>
+<li class="level1">
+<a href="#predict">Prediction Function</a> </li>
+<li class="level1">
+<a href="#examples">Examples</a> </li>
+<li class="level1">
+<a href="#background">Model Details</a> </li>
+<li class="level1">
+<a href="#literature">Literature</a> </li>
+<li class="level1">
+<a href="#related">Related Topics</a> </li>
+</ul>
+</div><p>In statistics, ordinal regression is a type of regression analysis used for predicting an ordinal variable, i.e. a variable whose value exists on an arbitrary scale where only the relative ordering between different values is significant. The two most common types of ordinal regression models are ordered logit, which applies to data that meet the proportional odds assumption, and ordered probit.</p>
+<p><a class="anchor" id="train"></a></p><dl class="section user"><dt>Training Function</dt><dd>The ordinal regression training function has the following syntax: <pre class="syntax">
+ordinal(source_table,
+         model_table,
+         dependent_varname,
+         independent_varname,
+         cat_order,
+         link_func,
+         grouping_col,
+         optim_params,
+         verbose
+        )
+</pre></dd></dl>
+<p><b>Arguments</b> </p><dl class="arglist">
+<dt>source_table </dt>
+<dd><p class="startdd">VARCHAR. Name of the table containing the training data.</p>
+<p class="enddd"></p>
+</dd>
+<dt>model_table </dt>
+<dd><p class="startdd">VARCHAR. Name of the generated table containing the model.</p>
+<p>The model table produced by ordinal() contains the following columns:</p>
+<table class="output">
+<tr>
+<th>&lt;...&gt; </th><td><p class="starttd">Grouping columns, if provided in input. This could be multiple columns depending on the <code>grouping_col</code> input. </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>coef_threshold </th><td><p class="starttd">FLOAT8[]. Vector of the threshold coefficients in linear predictor. The threshold coefficients are the intercepts specific to each categorical levels </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>std_err_threshold </th><td><p class="starttd">FLOAT8[]. Vector of the threshold standard errors of the threshold coefficients. </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>z_stats_threshold </th><td><p class="starttd">FLOAT8[]. Vector of the threshold z-statistics of the thresholdcoefficients. </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>p_values_threshold </th><td><p class="starttd">FLOAT8[]. Vector of the threshold p-values of the threshold coefficients. </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>log_likelihood </th><td><p class="starttd">FLOAT8. The log-likelihood \( l(\boldsymbol \beta) \). The value will be the same across categories within the same group. </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>coef_feature </th><td><p class="starttd">FLOAT8[]. Vector of the feature coefficients in linear predictor. The feature coefficients are the coefficients for the independent variables. They are the same across categories. </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>std_err_feature </th><td><p class="starttd">FLOAT8[]. Vector of the feature standard errors of the feature coefficients. </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>z_stats_feature </th><td><p class="starttd">FLOAT8[]. Vector of the feature z-statistics of the feature coefficients. </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>p_values_feature </th><td><p class="starttd">FLOAT8[]. Vector of the feature p-values of the feature coefficients. </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>num_rows_processed </th><td><p class="starttd">BIGINT. Number of rows processed. </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>num_rows_skipped </th><td><p class="starttd">BIGINT. Number of rows skipped due to missing values or failures. </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>num_iterations </th><td>INTEGER. Number of iterations actually completed. This would be different from the <code>nIterations</code> argument if a <code>tolerance</code> parameter is provided and the algorithm converges before all iterations are completed.  </td></tr>
+</table>
+<p>A summary table named &lt;model_table&gt;_summary is also created at the same time, which has the following columns: </p><table class="output">
+<tr>
+<th>method </th><td><p class="starttd">VARCHAR. String describes the model: 'ordinal'. </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>source_table </th><td><p class="starttd">VARCHAR. Data source table name. </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>model_table </th><td><p class="starttd">VARCHAR. Model table name. </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>dependent_varname </th><td><p class="starttd">VARCHAR. Expression for dependent variable. </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>independent_varname </th><td><p class="starttd">VARCHAR. Expression for independent variables. The independent variables should not include intercept term. Otherwise there will be an error message indicating Hessian matrix is not finite. In that case, the user should drop the intercept and rerun the function agian. </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>cat_order </th><td><p class="starttd">VARCHAR. String representation of category order. Default is the sorted categories in data using python sort </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>link_func </th><td><p class="starttd">VARCHAR. String that contains link function parameters: 'logit' and 'probit' links are implemented now </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>grouping_col </th><td><p class="starttd">VARCHAR. String representation of grouping columns. </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>optimizer_params </th><td><p class="starttd">VARCHAR. String that contains optimizer parameters, and has the form of 'optimizer=..., max_iter=..., tolerance=...'. </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>num_all_groups </th><td><p class="starttd">INTEGER. Number of groups in ordinal regression training. </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>num_failed_groups </th><td><p class="starttd">INTEGER. Number of failed groups in ordinal regression training. </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>total_rows_processed </th><td><p class="starttd">BIGINT. Total number of rows processed in all groups. </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>total_rows_skipped </th><td><p class="starttd">BIGINT. Total number of rows skipped in all groups due to missing values or failures. </p>
+<p class="endtd"></p>
+</td></tr>
+</table>
+<p class="enddd"></p>
+</dd>
+<dt>dependent_varname </dt>
+<dd><p class="startdd">VARCHAR. Name of the dependent variable column.</p>
+<p class="enddd"></p>
+</dd>
+<dt>independent_varname </dt>
+<dd><p class="startdd">VARCHAR. Expression list to evaluate for the independent variables. The intercept should not be included here since the cumulative probability force to have intercepts for each category level.</p>
+<p class="enddd"></p>
+</dd>
+<dt>cat_order </dt>
+<dd><p class="startdd">VARCHAR, String that represents the order of category. The order is specified by charactor '&lt;'. </p>
+<p class="enddd"></p>
+</dd>
+<dt>link_function (optional) </dt>
+<dd><p class="startdd">VARCHAR, default: 'logit'. Parameters for link function. Currently, we support logit and probit. </p>
+<p class="enddd"></p>
+</dd>
+<dt>grouping_col (optional) </dt>
+<dd><p class="startdd">VARCHAR, default: NULL. An expression list used to group the input dataset into discrete groups, running one regression per group. Similar to the SQL "GROUP BY" clause. When this value is NULL, no grouping is used and a single model is generated.</p>
+<p class="enddd"></p>
+</dd>
+<dt>optim_params (optional) </dt>
+<dd><p class="startdd">VARCHAR, default: 'max_iter=100,optimizer=irls,tolerance=1e-6'. Parameters for optimizer. Currently, we support tolerance=[tolerance for relative error between log-likelihoods], max_iter=[maximum iterations to run], optimizer=irls.</p>
+<p class="enddd"></p>
+</dd>
+<dt>verbose (optional) </dt>
+<dd>BOOLEAN, default: FALSE. Provides verbose output of the results of training. </dd>
+</dl>
+<dl class="section note"><dt>Note</dt><dd>To calculate the standard error the coefficient, we are using the square root of the diagnal elements of the expected Fisher information matrix, which is a by-product of iteratively reweighted least square. This method is used in the original ordinal regression paper by McCullagh(1980). In some software like Stata, the standard error is calculated by the observed information matrix, which is supported by Efron and Hinkley (1978). In R, polr() uses the approximated observed information matrix while the optimization is achieved by first order optimization method. Therefore, there will be some difference on standard error, z-stats and p-value from other software.</dd></dl>
+<p><a class="anchor" id="predict"></a></p><dl class="section user"><dt>Prediction Function</dt><dd>Ordinal regression prediction function has the following format: <pre class="syntax">
+ordinal_predict(
+                    model_table,
+                    predict_table_input,
+                    output_table,
+                    predict_type,
+                    verbose
+               )
+</pre> <b>Arguments</b> <dl class="arglist">
+<dt>model_table </dt>
+<dd><p class="startdd">TEXT. Name of the generated table containing the model, which is the output table from ordinal().</p>
+<p class="enddd"></p>
+</dd>
+<dt>predict_table_input </dt>
+<dd><p class="startdd">TEXT. The name of the table containing the data to predict on. The table must contain id column as the primary key.</p>
+<p class="enddd"></p>
+</dd>
+<dt>output_table </dt>
+<dd><p class="startdd">TEXT. Name of the generated table containing the predicted values.</p>
+<p>The model table produced by ordinal_predict contains the following columns:</p>
+<table class="output">
+<tr>
+<th>id </th><td><p class="starttd">SERIAL. Column to identify the predicted value. </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>category </th><td><p class="starttd">TEXT. Available if the predicted type = 'response'. Column contains the predicted categories </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>category_value </th><td>FLOAT8. The predicted probability for the specific category_value.  </td></tr>
+</table>
+<p class="enddd"></p>
+</dd>
+<dt>predict_type </dt>
+<dd><p class="startdd">TEXT. Either 'response' or 'probability'. Using 'response' will give the predicted category with the largest probability. Using probability will give the predicted probabilities for all categories</p>
+<p class="enddd"></p>
+</dd>
+<dt>verbose </dt>
+<dd>BOOLEAN. Whether verbose is displayed </dd>
+</dl>
+</dd></dl>
+<p><a class="anchor" id="examples"></a></p><dl class="section user"><dt>Examples</dt><dd></dd></dl>
+<ol type="1">
+<li>Create the training data table. <pre class="example">
+DROP TABLE IF EXISTS test3;
+CREATE TABLE test3 (
+    feat1 INTEGER,
+    feat2 INTEGER,
+    cat INTEGER
+);
+INSERT INTO test3(feat1, feat2, cat) VALUES
+(1,35,1),
+(2,33,0),
+(3,39,1),
+(1,37,1),
+(2,31,1),
+(3,36,0),
+(2,36,1),
+(2,31,1),
+(2,41,1),
+(2,37,1),
+(1,44,1),
+(3,33,2),
+(1,31,1),
+(2,44,1),
+(1,35,1),
+(1,44,0),
+(1,46,0),
+(2,46,1),
+(2,46,2),
+(3,49,1),
+(2,39,0),
+(2,44,1),
+(1,47,1),
+(1,44,1),
+(1,37,2),
+(3,38,2),
+(1,49,0),
+(2,44,0),
+(3,61,2),
+(1,65,2),
+(3,67,1),
+(3,65,2),
+(1,65,2),
+(2,67,2),
+(1,65,2),
+(1,62,2),
+(3,52,2),
+(3,63,2),
+(2,59,2),
+(3,65,2),
+(2,59,0),
+(3,67,2),
+(3,67,2),
+(3,60,2),
+(3,67,2),
+(3,62,2),
+(2,54,2),
+(3,65,2),
+(3,62,2),
+(2,59,2),
+(3,60,2),
+(3,63,2),
+(3,65,2),
+(2,63,1),
+(2,67,2),
+(2,65,2),
+(2,62,2);
+</pre></li>
+<li>Run the multilogistic regression function. <pre class="example">
+DROP TABLE IF EXISTS test3_output;
+DROP TABLE IF EXISTS test3_output_summary;
+SELECT madlib.ordinal('test3',
+                       'test3_output',
+                       'cat',
+                       'ARRAY[feat1, feat2]',
+                       '0&lt;1&lt;2',
+                       'logit'
+                       );
+</pre></li>
+<li>View the regression results. <pre class="example">
+-- Set extended display on for easier reading of output
+\x on
+SELECT * FROM test3_output;
+</pre></li>
+</ol>
+<p>Result: </p><pre class="result">
+-[ RECORD 1 ]------+-------------------------------------------
+coef_threshold     | {4.12831944358935,6.55999442887089}
+std_err_threshold  | {1.3603408170882,1.54843501580999}
+z_stats_threshold  | {3.03476848722806,4.23653195768075}
+p_values_threshold | {0.00240720390579325,2.26998625331282e-05}
+log_likelihood     | -42.1390192418541
+coef_feature       | {0.574822563129293,0.108115645059558}
+std_err_feature    | {0.394064908788145,0.0276025960683975}
+z_stats_feature    | {1.45870020473791,3.91686509456046}
+p_values_feature   | {0.144647639733733,8.9707915817562e-05}
+num_rows_processed | 57
+num_rows_skipped   | 0
+iteration          | 7
+</pre><ol type="1">
+<li>Predicting dependent variable using ordinal model. (This example uses the original data table to perform the prediction. Typically a different test dataset with the same features as the original training dataset would be used for prediction.)</li>
+</ol>
+<pre class="example">
+\x off
+-- Add the id column for prediction function
+ALTER TABLE test3 ADD COLUMN id SERIAL;
+-- Predict probabilities for all categories using the original data
+SELECT ordinal_predict('test3_output','test3', 'test3_prd_prob', 'probability');
+-- Display the predicted value
+SELECT * FROM test3_prd_prob;
+</pre><p><a class="anchor" id="background"></a></p><dl class="section user"><dt>Model Details</dt><dd></dd></dl>
+<p>The function ordinal() fit the ordinal response model using a cumulative link model. The ordinal reponse variable, denoted by \( Y_i \), can fall in \( j = 1,.. , J\) categories. Then \( Y_i \) follows a multinomial distribution with parameter \(\pi\) where \(\pi_{ij}\) denote the probability that the \(i\)th observation falls in response category \(j\). We define the cumulative probabilities as </p><p class="formulaDsp">
+\[ \gamma_{ij} = \Pr(Y_i \le j)= \pi_{i1} +...+ \pi_{ij} . \]
+</p>
+<p> Next we will consider the logit link for illustration purpose. The logit function is defined as \( \mbox{logit}(\pi) = \log[\pi/(1-\pi)] \) and cumulative logits are defined as: </p><p class="formulaDsp">
+\[ \mbox{logit}(\gamma_{ij})=\mbox{logit}(\Pr(Y_i \le j))=\log \frac{\Pr(Y_i \le j)}{1-\Pr(Y_i\le j)}, j=1,...,J−1 \]
+</p>
+<p> so that the cumulative logits are defined for all but the last category.</p>
+<p>A cumulative link model with a logit link, or simply cumulative logit model is a regression model for cumulative logits: </p><p class="formulaDsp">
+\[ \mbox{logit}(\gamma_{ij}) = \theta_j - x^T_i \beta \]
+</p>
+<p> where \(x_i\) is a vector of explanatory variables for the \(i\)th observation and \(\beta\) is the corresponding set of regression parameters. The \(\{\theta_j\}\) parameters provide each cumulative logit (for each \(j\)) with its own intercept. A key point is that the regression part \(x^T_i\beta\) is independent of \(j\), so \(\beta\) has the same effect for each of the J − 1 cumulative logits. Note that \(x^T_i\beta\) does not contain an intercept, since the \(\{\theta_j\}\) act as intercepts. For small values of \(x^T_i\beta\) the response is likely to fall in the first category and for large values of \(x^T_i\beta\) the response is likely to fall in the last category. The horizontal displacements of the curves are given by the values of \(\{\theta_j\}\).</p>
+<p><a class="anchor" id="literature"></a></p><dl class="section user"><dt>Literature</dt><dd></dd></dl>
+<p>A collection of nice write-ups, with valuable pointers into further literature:</p>
+<p>[1] Peter McCullagh: Regression Models for Ordinal Data, Journal of the Royal Statistical Society. Series B (Methodological), Volume 42, Issue 2 (1980), 109-142</p>
+<p>[2] Rune Haubo B Christensen: Analysis of ordinal data with cumulative link models &ndash; estimation with the R-package ordinal. cran.r-project.org/web/packages/ordinal/vignettes/clm_intro.pdf</p>
+<p><a class="anchor" id="related"></a></p><dl class="section user"><dt>Related Topics</dt><dd></dd></dl>
+<p>File <a class="el" href="ordinal_8sql__in.html" title="SQL functions for ordinal regression. ">ordinal.sql_in</a> documenting the ordinal regression functions</p>
+<p><a class="el" href="group__grp__multinom.html">Multinomial Regression</a></p>
+</div><!-- contents -->
+</div><!-- doc-content -->
+<!-- start footer part -->
+<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
+  <ul>
+    <li class="footer">Generated on Mon Oct 15 2018 11:24:30 for MADlib by
+    <a href="http://www.doxygen.org/index.html">
+    <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.14 </li>
+  </ul>
+</div>
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/madlib-site/blob/af0e5f14/docs/v1.15.1/group__grp__other__functions.html
----------------------------------------------------------------------
diff --git a/docs/v1.15.1/group__grp__other__functions.html b/docs/v1.15.1/group__grp__other__functions.html
new file mode 100644
index 0000000..bd5d388
--- /dev/null
+++ b/docs/v1.15.1/group__grp__other__functions.html
@@ -0,0 +1,164 @@
+<!-- HTML header for doxygen 1.8.4-->
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.14"/>
+<meta name="keywords" content="madlib,postgres,greenplum,machine learning,data mining,deep learning,ensemble methods,data science,market basket analysis,affinity analysis,pca,lda,regression,elastic net,huber white,proportional hazards,k-means,latent dirichlet allocation,bayes,support vector machines,svm"/>
+<title>MADlib: Utilities</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="navtree.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="resize.js"></script>
+<script type="text/javascript" src="navtreedata.js"></script>
+<script type="text/javascript" src="navtree.js"></script>
+<script type="text/javascript">
+/* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt GPL-v2 */
+  $(document).ready(initResizable);
+/* @license-end */</script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/searchdata.js"></script>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+/* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt GPL-v2 */
+  $(document).ready(function() { init_search(); });
+/* @license-end */
+</script>
+<script type="text/x-mathjax-config">
+  MathJax.Hub.Config({
+    extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
+    jax: ["input/TeX","output/HTML-CSS"],
+});
+</script><script type="text/javascript" async src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.2/MathJax.js"></script>
+<!-- hack in the navigation tree -->
+<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
+<!-- google analytics -->
+<script>
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+  ga('create', 'UA-45382226-1', 'madlib.apache.org');
+  ga('send', 'pageview');
+</script>
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td id="projectlogo"><a href="http://madlib.apache.org"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">
+   <span id="projectnumber">1.15.1</span>
+   </div>
+   <div id="projectbrief">User Documentation for Apache MADlib</div>
+  </td>
+   <td>        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+</td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.14 -->
+<script type="text/javascript">
+/* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt GPL-v2 */
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+/* @license-end */
+</script>
+</div><!-- top -->
+<div id="side-nav" class="ui-resizable side-nav-resizable">
+  <div id="nav-tree">
+    <div id="nav-tree-contents">
+      <div id="nav-sync" class="sync"></div>
+    </div>
+  </div>
+  <div id="splitbar" style="-moz-user-select:none;" 
+       class="ui-resizable-handle">
+  </div>
+</div>
+<script type="text/javascript">
+/* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt GPL-v2 */
+$(document).ready(function(){initNavTree('group__grp__other__functions.html','');});
+/* @license-end */
+</script>
+<div id="doc-content">
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+</div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+<div class="header">
+  <div class="summary">
+<a href="#groups">Modules</a>  </div>
+  <div class="headertitle">
+<div class="title">Utilities</div>  </div>
+</div><!--header-->
+<div class="contents">
+<a name="details" id="details"></a><h2 class="groupheader">Detailed Description</h2>
+<p>Useful utilities for data science workflows. </p>
+<table class="memberdecls">
+<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="groups"></a>
+Modules</h2></td></tr>
+<tr class="memitem:group__grp__cols2vec"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group__grp__cols2vec.html">Columns to Vector</a></td></tr>
+<tr class="memdesc:group__grp__cols2vec"><td class="mdescLeft">&#160;</td><td class="mdescRight">Create a new table with all feature columns inserted into a single column as an array. <br /></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:group__grp__utilities"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group__grp__utilities.html">Database Functions</a></td></tr>
+<tr class="memdesc:group__grp__utilities"><td class="mdescLeft">&#160;</td><td class="mdescRight">Provides a collection of user-defined functions for performing common tasks in the database. <br /></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:group__grp__linear__solver"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group__grp__linear__solver.html">Linear Solvers</a></td></tr>
+<tr class="memdesc:group__grp__linear__solver"><td class="mdescLeft">&#160;</td><td class="mdescRight">Methods that implement solutions for systems of consistent linear equations. <br /></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:group__grp__minibatch__preprocessing"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group__grp__minibatch__preprocessing.html">Mini-Batch Preprocessor</a></td></tr>
+<tr class="memdesc:group__grp__minibatch__preprocessing"><td class="mdescLeft">&#160;</td><td class="mdescRight">Utility that prepares input data for use by models that support mini-batch as an optimization option. <br /></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:group__grp__pmml"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group__grp__pmml.html">PMML Export</a></td></tr>
+<tr class="memdesc:group__grp__pmml"><td class="mdescLeft">&#160;</td><td class="mdescRight">Implements the PMML XML standard to describe and exchange models produced by data mining and machine learning algorithms. <br /></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:group__grp__text__utilities"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group__grp__text__utilities.html">Term Frequency</a></td></tr>
+<tr class="memdesc:group__grp__text__utilities"><td class="mdescLeft">&#160;</td><td class="mdescRight">Provides a collection of functions for performing common tasks related to text analytics. <br /></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:group__grp__vec2cols"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group__grp__vec2cols.html">Vector to Columns</a></td></tr>
+<tr class="memdesc:group__grp__vec2cols"><td class="mdescLeft">&#160;</td><td class="mdescRight">Converts a feature array in a single column of an output table into multiple columns. <br /></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+</table>
+</div><!-- contents -->
+</div><!-- doc-content -->
+<!-- start footer part -->
+<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
+  <ul>
+    <li class="footer">Generated on Mon Oct 15 2018 11:24:30 for MADlib by
+    <a href="http://www.doxygen.org/index.html">
+    <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.14 </li>
+  </ul>
+</div>
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/madlib-site/blob/af0e5f14/docs/v1.15.1/group__grp__other__functions.js
----------------------------------------------------------------------
diff --git a/docs/v1.15.1/group__grp__other__functions.js b/docs/v1.15.1/group__grp__other__functions.js
new file mode 100644
index 0000000..5d88a12
--- /dev/null
+++ b/docs/v1.15.1/group__grp__other__functions.js
@@ -0,0 +1,10 @@
+var group__grp__other__functions =
+[
+    [ "Columns to Vector", "group__grp__cols2vec.html", null ],
+    [ "Database Functions", "group__grp__utilities.html", null ],
+    [ "Linear Solvers", "group__grp__linear__solver.html", "group__grp__linear__solver" ],
+    [ "Mini-Batch Preprocessor", "group__grp__minibatch__preprocessing.html", null ],
+    [ "PMML Export", "group__grp__pmml.html", null ],
+    [ "Term Frequency", "group__grp__text__utilities.html", null ],
+    [ "Vector to Columns", "group__grp__vec2cols.html", null ]
+];
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/madlib-site/blob/af0e5f14/docs/v1.15.1/group__grp__pagerank.html
----------------------------------------------------------------------
diff --git a/docs/v1.15.1/group__grp__pagerank.html b/docs/v1.15.1/group__grp__pagerank.html
new file mode 100644
index 0000000..da56fd4
--- /dev/null
+++ b/docs/v1.15.1/group__grp__pagerank.html
@@ -0,0 +1,376 @@
+<!-- HTML header for doxygen 1.8.4-->
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.14"/>
+<meta name="keywords" content="madlib,postgres,greenplum,machine learning,data mining,deep learning,ensemble methods,data science,market basket analysis,affinity analysis,pca,lda,regression,elastic net,huber white,proportional hazards,k-means,latent dirichlet allocation,bayes,support vector machines,svm"/>
+<title>MADlib: PageRank</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="navtree.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="resize.js"></script>
+<script type="text/javascript" src="navtreedata.js"></script>
+<script type="text/javascript" src="navtree.js"></script>
+<script type="text/javascript">
+/* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt GPL-v2 */
+  $(document).ready(initResizable);
+/* @license-end */</script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/searchdata.js"></script>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+/* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt GPL-v2 */
+  $(document).ready(function() { init_search(); });
+/* @license-end */
+</script>
+<script type="text/x-mathjax-config">
+  MathJax.Hub.Config({
+    extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
+    jax: ["input/TeX","output/HTML-CSS"],
+});
+</script><script type="text/javascript" async src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.2/MathJax.js"></script>
+<!-- hack in the navigation tree -->
+<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
+<!-- google analytics -->
+<script>
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+  ga('create', 'UA-45382226-1', 'madlib.apache.org');
+  ga('send', 'pageview');
+</script>
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td id="projectlogo"><a href="http://madlib.apache.org"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">
+   <span id="projectnumber">1.15.1</span>
+   </div>
+   <div id="projectbrief">User Documentation for Apache MADlib</div>
+  </td>
+   <td>        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+</td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.14 -->
+<script type="text/javascript">
+/* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt GPL-v2 */
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+/* @license-end */
+</script>
+</div><!-- top -->
+<div id="side-nav" class="ui-resizable side-nav-resizable">
+  <div id="nav-tree">
+    <div id="nav-tree-contents">
+      <div id="nav-sync" class="sync"></div>
+    </div>
+  </div>
+  <div id="splitbar" style="-moz-user-select:none;" 
+       class="ui-resizable-handle">
+  </div>
+</div>
+<script type="text/javascript">
+/* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt GPL-v2 */
+$(document).ready(function(){initNavTree('group__grp__pagerank.html','');});
+/* @license-end */
+</script>
+<div id="doc-content">
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+</div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+<div class="header">
+  <div class="headertitle">
+<div class="title">PageRank<div class="ingroups"><a class="el" href="group__grp__graph.html">Graph</a></div></div>  </div>
+</div><!--header-->
+<div class="contents">
+<div class="toc"><b>Contents</b> <ul>
+<li>
+<a href="#pagerank">PageRank</a> </li>
+<li>
+<a href="#examples">Examples</a> </li>
+<li>
+<a href="#literature">Literature</a> </li>
+</ul>
+</div><p>Given a graph, the PageRank algorithm outputs a probability distribution representing the likelihood that a person randomly traversing the graph will arrive at any particular vertex. This algorithm was originally used by Google to rank websites where the World Wide Web was modeled as a directed graph with the vertices representing the websites. The PageRank algorithm initially proposed by Larry Page and Sergey Brin is implemented here [1].</p>
+<p>We also implement personalized PageRank, in which a notion of importance provides personalization to a query. For example, importance scores can be biased according to a specified set of vertices in the graph that are of interest or special in some way [2].</p>
+<p><a class="anchor" id="pagerank"></a></p><dl class="section user"><dt>PageRank</dt><dd><pre class="syntax">
+pagerank( vertex_table,
+          vertex_id,
+          edge_table,
+          edge_args,
+          out_table,
+          damping_factor,
+          max_iter,
+          threshold,
+          grouping_cols,
+          personalization_vertices
+        )
+</pre></dd></dl>
+<p><b>Arguments</b> </p><dl class="arglist">
+<dt>vertex_table </dt>
+<dd><p class="startdd">TEXT. Name of the table containing the vertex data for the graph. Must contain the column specified in the 'vertex_id' parameter below.</p>
+<p class="enddd"></p>
+</dd>
+<dt>vertex_id </dt>
+<dd><p class="startdd">TEXT, default = 'id'. Name of the column in 'vertex_table' containing vertex ids. The vertex ids are of type INTEGER with no duplicates. They do not need to be contiguous.</p>
+<p class="enddd"></p>
+</dd>
+<dt>edge_table </dt>
+<dd><p class="startdd">TEXT. Name of the table containing the edge data. The edge table must contain columns for source vertex and destination vertex.</p>
+<p class="enddd"></p>
+</dd>
+<dt>edge_args </dt>
+<dd><p class="startdd">TEXT. A comma-delimited string containing multiple named arguments of the form "name=value". The following parameters are supported for this string argument:</p><ul>
+<li>src (INTEGER): Name of the column containing the source vertex ids in the edge table. Default column name is 'src'.</li>
+<li>dest (INTEGER): Name of the column containing the destination vertex ids in the edge table. Default column name is 'dest'.</li>
+</ul>
+<p class="enddd"></p>
+</dd>
+<dt>out_table </dt>
+<dd><p class="startdd">TEXT. Name of the table to store the result of PageRank. It will contain a row for every vertex from 'vertex_table' with the following columns:</p><ul>
+<li>vertex_id : The id of a vertex. Will use the input parameter 'vertex_id' for column naming.</li>
+<li>pagerank : The vertex's PageRank.</li>
+<li>grouping_cols : Grouping column (if any) values associated with the vertex_id.</li>
+</ul>
+<p>A summary table is also created that contains information regarding the number of iterations required for convergence. It is named by adding the suffix '_summary' to the 'out_table' parameter.</p>
+<p class="enddd"></p>
+</dd>
+<dt>damping_factor (optional) </dt>
+<dd><p class="startdd">FLOAT8, default 0.85. The probability, at any step, that a user will continue following the links in a random surfer model.</p>
+<p class="enddd"></p>
+</dd>
+<dt>max_iter (optional) </dt>
+<dd><p class="startdd">INTEGER, default: 100. The maximum number of iterations allowed.</p>
+<p class="enddd"></p>
+</dd>
+<dt>threshold (optional) </dt>
+<dd><p class="startdd">FLOAT8, default: (1/number of vertices * 1000). If the difference between the PageRank of every vertex of two consecutive iterations is smaller than 'threshold', or the iteration number is larger than 'max_iter', the computation stops. If you set the threshold to zero, then you will force the algorithm to run for the full number of iterations specified in 'max_iter'. It is advisable to set threshold to a value lower than 1/(number of vertices in the graph) since the PageRank value of nodes is initialized to that value.</p>
+<p class="enddd"></p>
+</dd>
+<dt>grouping_cols (optional) </dt>
+<dd>TEXT, default: NULL. A single column or a list of comma-separated columns that divides the input data into discrete groups, resulting in one distribution per group. When this value is NULL, no grouping is used and a single model is generated for all data. <dl class="section note"><dt>Note</dt><dd>Expressions are not currently supported for 'grouping_cols'.</dd></dl>
+</dd>
+<dt>personalization_vertices (optional) </dt>
+<dd>INTEGER[], default: NULL. A comma separated list of vertices or nodes for personalized PageRank. When this parameter is provided, personalized PageRank will run. In the absence of this parameter, regular PageRank will run. </dd>
+</dl>
+<p><a class="anchor" id="examples"></a></p><dl class="section user"><dt>Examples</dt><dd></dd></dl>
+<ol type="1">
+<li>Create vertex and edge tables to represent the graph: <pre class="syntax">
+DROP TABLE IF EXISTS vertex, edge;
+CREATE TABLE vertex(
+        id INTEGER
+        );
+CREATE TABLE edge(
+        src INTEGER,
+        dest INTEGER,
+        user_id INTEGER
+        );
+INSERT INTO vertex VALUES
+(0),
+(1),
+(2),
+(3),
+(4),
+(5),
+(6);
+INSERT INTO edge VALUES
+(0, 1, 1),
+(0, 2, 1),
+(0, 4, 1),
+(1, 2, 1),
+(1, 3, 1),
+(2, 3, 1),
+(2, 5, 1),
+(2, 6, 1),
+(3, 0, 1),
+(4, 0, 1),
+(5, 6, 1),
+(6, 3, 1),
+(0, 1, 2),
+(0, 2, 2),
+(0, 4, 2),
+(1, 2, 2),
+(1, 3, 2),
+(2, 3, 2),
+(3, 0, 2),
+(4, 0, 2),
+(5, 6, 2),
+(6, 3, 2);
+</pre></li>
+<li>Running PageRank with default values for optional parameters: <pre class="syntax">
+DROP TABLE IF EXISTS pagerank_out, pagerank_out_summary;
+SELECT madlib.pagerank(
+                       'vertex',             -- Vertex table
+                       'id',                 -- Vertix id column
+                       'edge',               -- Edge table
+                       'src=src, dest=dest', -- Comma delimted string of edge arguments
+                       'pagerank_out');      -- Output table of PageRank
+SELECT * FROM pagerank_out ORDER BY pagerank DESC;
+</pre> <pre class="result">
+ id |      pagerank
+----+-------------------
+  0 |  0.28753749341184
+  3 |  0.21016988901855
+  2 |  0.14662683454062
+  4 |  0.10289614384217
+  1 |  0.10289614384217
+  6 |  0.09728637768887
+  5 |  0.05258711765692
+(7 rows)
+</pre> <pre class="syntax">
+SELECT * FROM pagerank_out_summary;
+</pre> <pre class="result">
+ __iterations__
+ ----------------+
+             16
+(1 row)
+</pre></li>
+<li>Running PageRank with a damping factor of 0.5 results in different final values: <pre class="syntax">
+DROP TABLE IF EXISTS pagerank_out, pagerank_out_summary;
+SELECT madlib.pagerank(
+                         'vertex',             -- Vertex table
+                         'id',                 -- Vertix id column
+                         'edge',               -- Edge table
+                         'src=src, dest=dest', -- Comma delimted string of edge arguments
+                         'pagerank_out',       -- Output table of PageRank
+                         0.5);                 -- Damping factor
+SELECT * FROM pagerank_out ORDER BY pagerank DESC;
+</pre> <pre class="result">
+ id |      pagerank
+----+--------------------
+  0 |  0.225477161441199
+  3 |  0.199090328586664
+  2 |  0.136261327206477
+  6 |  0.132691559968224
+  4 |  0.109009291409508
+  1 |  0.109009291409508
+  5 | 0.0884610399788161
+(7 rows)
+</pre></li>
+<li>Now compute the PageRank of vertices associated with each user using the grouping feature: <pre class="syntax">
+DROP TABLE IF EXISTS pagerank_out, pagerank_out_summary;
+SELECT madlib.pagerank(
+                         'vertex',             -- Vertex table
+                         'id',                 -- Vertix id column
+                         'edge',               -- Edge table
+                         'src=src, dest=dest', -- Comma delimted string of edge arguments
+                         'pagerank_out',       -- Output table of PageRank
+                         NULL,                 -- Default damping factor (0.85)
+                         NULL,                 -- Default max iters (100)
+                         0.00000001,           -- Threshold
+                         'user_id');           -- Grouping column name
+SELECT * FROM pagerank_out ORDER BY user_id, pagerank DESC;
+</pre> <pre class="result">
+ user_id | id |      pagerank
+---------+----+--------------------
+       1 |  0 |  0.27825488388552
+       1 |  3 |  0.20188114667075
+       1 |  2 |  0.14288112346059
+       1 |  6 |  0.11453637832147
+       1 |  1 |  0.10026745615438
+       1 |  4 |  0.10026745615438
+       1 |  5 |  0.06191155535288
+       2 |  0 |  0.31854625004173
+       2 |  3 |  0.23786686773343
+       2 |  2 |  0.15914876489397
+       2 |  1 |  0.11168334437971
+       2 |  4 |  0.11168334437971
+       2 |  6 |  0.03964285714285
+       2 |  5 |  0.02142857142857
+(14 rows)
+</pre> <pre class="syntax">
+SELECT * FROM pagerank_out_summary ORDER BY user_id;
+</pre> <pre class="result">
+ user_id | __iterations__
+---------+----------------
+       1 |             27
+       2 |             31
+(2 rows)
+</pre></li>
+<li>Personalized PageRank. Here we specify {2,4} as the personalization vertices. This parameter could be specified as ARRAY[2,4] as well. <pre class="syntax">
+DROP TABLE IF EXISTS pagerank_out, pagerank_out_summary;
+SELECT madlib.pagerank(
+                       'vertex',             -- Vertex table
+                       'id',                 -- Vertix id column
+                       'edge',               -- Edge table
+                       'src=src, dest=dest', -- Comma delimted string of edge arguments
+                       'pagerank_out',       -- Output table of PageRank
+                        NULL,                -- Default damping factor (0.85)
+                        NULL,                -- Default max iters (100)
+                        NULL,                -- Default Threshold
+                        NULL,                -- No Grouping
+                       '{2,4}');             -- Personalization vertices
+SELECT * FROM pagerank_out ORDER BY pagerank DESC;
+</pre> <pre class="result">
+ id |      pagerank
+----+--------------------
+  0 |  0.565232961966315
+  2 |  0.378139420991773
+  3 |  0.355003292266017
+  4 |  0.310111215897626
+  1 |  0.160111215897626
+  6 |  0.148615315574136
+  5 | 0.0803403307142321
+(7 rows)
+</pre> <pre class="syntax">
+SELECT * FROM pagerank_out_summary;
+</pre> <pre class="result">
+ __iterations__
+ ----------------+
+             37
+(1 row)
+</pre></li>
+</ol>
+<p><a class="anchor" id="literature"></a></p><dl class="section user"><dt>Literature</dt><dd></dd></dl>
+<p>[1] Brin, S. and Page, L. (1998), "The anatomy of a large-scale hypertextual Web search engine", Computer Networks and ISDN Systems. 30: 107–117, <a href="http://infolab.stanford.edu/pub/papers/google.pdf">http://infolab.stanford.edu/pub/papers/google.pdf</a></p>
+<p>[2] Jeh, Glen and Widom, Jennifer. "Scaling Personalized Web Search", Proceedings of the 12th international conference on World Wide Web, Pages 271-279 Budapest, Hungary, May 20-24, 2003, <a href="http://ilpubs.stanford.edu:8090/530/1/2002-12.pdf">http://ilpubs.stanford.edu:8090/530/1/2002-12.pdf</a> </p>
+</div><!-- contents -->
+</div><!-- doc-content -->
+<!-- start footer part -->
+<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
+  <ul>
+    <li class="footer">Generated on Mon Oct 15 2018 11:24:30 for MADlib by
+    <a href="http://www.doxygen.org/index.html">
+    <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.14 </li>
+  </ul>
+</div>
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/madlib-site/blob/af0e5f14/docs/v1.15.1/group__grp__path.html
----------------------------------------------------------------------
diff --git a/docs/v1.15.1/group__grp__path.html b/docs/v1.15.1/group__grp__path.html
new file mode 100644
index 0000000..56f52bd
--- /dev/null
+++ b/docs/v1.15.1/group__grp__path.html
@@ -0,0 +1,488 @@
+<!-- HTML header for doxygen 1.8.4-->
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.14"/>
+<meta name="keywords" content="madlib,postgres,greenplum,machine learning,data mining,deep learning,ensemble methods,data science,market basket analysis,affinity analysis,pca,lda,regression,elastic net,huber white,proportional hazards,k-means,latent dirichlet allocation,bayes,support vector machines,svm"/>
+<title>MADlib: Path</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="navtree.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="resize.js"></script>
+<script type="text/javascript" src="navtreedata.js"></script>
+<script type="text/javascript" src="navtree.js"></script>
+<script type="text/javascript">
+/* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt GPL-v2 */
+  $(document).ready(initResizable);
+/* @license-end */</script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/searchdata.js"></script>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+/* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt GPL-v2 */
+  $(document).ready(function() { init_search(); });
+/* @license-end */
+</script>
+<script type="text/x-mathjax-config">
+  MathJax.Hub.Config({
+    extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
+    jax: ["input/TeX","output/HTML-CSS"],
+});
+</script><script type="text/javascript" async src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.2/MathJax.js"></script>
+<!-- hack in the navigation tree -->
+<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
+<!-- google analytics -->
+<script>
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+  ga('create', 'UA-45382226-1', 'madlib.apache.org');
+  ga('send', 'pageview');
+</script>
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td id="projectlogo"><a href="http://madlib.apache.org"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">
+   <span id="projectnumber">1.15.1</span>
+   </div>
+   <div id="projectbrief">User Documentation for Apache MADlib</div>
+  </td>
+   <td>        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+</td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.14 -->
+<script type="text/javascript">
+/* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt GPL-v2 */
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+/* @license-end */
+</script>
+</div><!-- top -->
+<div id="side-nav" class="ui-resizable side-nav-resizable">
+  <div id="nav-tree">
+    <div id="nav-tree-contents">
+      <div id="nav-sync" class="sync"></div>
+    </div>
+  </div>
+  <div id="splitbar" style="-moz-user-select:none;" 
+       class="ui-resizable-handle">
+  </div>
+</div>
+<script type="text/javascript">
+/* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt GPL-v2 */
+$(document).ready(function(){initNavTree('group__grp__path.html','');});
+/* @license-end */
+</script>
+<div id="doc-content">
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+</div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+<div class="header">
+  <div class="headertitle">
+<div class="title">Path<div class="ingroups"><a class="el" href="group__grp__datatrans.html">Data Types and Transformations</a></div></div>  </div>
+</div><!--header-->
+<div class="contents">
+<div class="toc"><b>Contents</b> <ul>
+<li>
+<a href="#syntax">Function Syntax</a> </li>
+<li>
+<a href="#examples">Examples</a> </li>
+<li>
+<a href="#nomenclature">Nomenclature</a> </li>
+<li>
+<a href="#literature">Literature</a> </li>
+</ul>
+</div><p>The goal of the MADlib path function is to perform regular pattern matching over a sequence of rows, and to extract useful information about the pattern matches. The useful information could be a simple count of matches or something more involved like aggregations or window functions.</p>
+<p>Symbols are used to identify particular rows of interest. Then, standard PostgreSQL pattern matching using symbols can be applied to identify patterns across the rows of interest. (This is similar in concept to regular expressions which match patterns within strings of text.)</p>
+<p>For example, a symbol can be defined for purchase events by on-line shoppers. Then, preceding events that led to the purchase can be identified and operated on, perhaps to find the common actions that resulted in a purchase. Or conversely, to find actions that resulted in an exit without a purchase having been made.</p>
+<p>Steps on how to use path functions:</p>
+<ol type="1">
+<li>Partition input rows.</li>
+<li>Order the partitions.</li>
+<li>Define symbols to match rows of interest.</li>
+<li>Define regular expression of symbols and operators to define patterns to match in your ordered partitions.</li>
+<li>Define an aggregate function to compute for each pattern match.</li>
+<li>If desired, output the pattern matches for inspection or to operate on them with subsequent queries.</li>
+</ol>
+<p><a class="anchor" id="syntax"></a></p><dl class="section user"><dt>Function Syntax</dt><dd><pre class="syntax">
+path(
+    source_table,
+    output_table,
+    partition_expr,
+    order_expr,
+    symbol,
+    pattern,
+    aggregate_func,
+    persist_rows,
+    overlapping_patterns
+)
+</pre></dd></dl>
+<p><b>Arguments</b> </p><dl class="arglist">
+<dt>source_table </dt>
+<dd><p class="startdd">VARCHAR. Name of the source table, containing data for path analysis.</p>
+<p class="enddd"></p>
+</dd>
+<dt>output_table </dt>
+<dd><p class="startdd">VARCHAR. Name of the result table.</p>
+<p class="enddd"></p>
+</dd>
+<dt>partition_expr </dt>
+<dd><p class="startdd">VARCHAR. The 'partition_expr' can be a single column or a list of comma-separated columns/expressions to divide all rows into groups, or partitions. Matching is applied across the rows that fall into the same partition. This can be NULL or '' to indicate the matching is to be applied to the whole table.</p>
+<p class="enddd"></p>
+</dd>
+<dt>order_expr </dt>
+<dd><p class="startdd">VARCHAR. This expression controls the order in which rows are processed or matched in a partition. For example, time is a common way to order partitions. </p>
+<p class="enddd"></p>
+</dd>
+<dt>symbol </dt>
+<dd><p class="startdd">VARCHAR. Symbols enable you to express patterns of interest in a simple way (see definition of ‘pattern’ argument below). A symbol identifies a row of a particular type that you’re searching for as part of a pattern match. Symbol definition uses the standard PostgreSQL assignment statement 'identifier := expression;' [1]. A given row can only match one symbol. If a row matches multiple symbols, the symbol that comes first in the symbol definition list will take precedence. </p>
+<p class="enddd"></p>
+</dd>
+<dt>pattern </dt>
+<dd><p class="startdd">VARCHAR. The 'pattern' clause defines the pattern that the path algorithm searches for. You express the pattern using symbols and operators following regular PostgreSQL pattern matching syntax and rules [2].</p>
+<p><a class="anchor" id="note"></a></p><dl class="section note"><dt>Note</dt><dd>Symbols defined using more than one (1) character need to be enclosed in parentheses '()' when referenced in the 'pattern' argument. For example:<ul>
+<li>a symbol defined as 'a' in the 'symbol' argument can be used directly in the 'pattern' argument</li>
+<li>a symbol defined as 'abc' in the 'symbol' argument must be written as '(abc)' in the 'pattern' argument</li>
+</ul>
+</dd></dl>
+<p>The following pattern matching metacharacters are supported: </p><ul>
+<li>
+| denotes alternation (either of two alternatives).  </li>
+<li>
+? denotes repetition of the previous item zero or one time.  </li>
+<li>
+* denotes repetition of the previous item zero or more times.  </li>
+<li>
++ denotes repetition of the previous item one or more times.  </li>
+<li>
+{m} denotes repetition of the previous item exactly m times.  </li>
+<li>
+{m,} denotes repetition of the previous item m or more times.  </li>
+<li>
+{m,n} denotes repetition of the previous item at least m and not more than n times.  </li>
+<li>
+Parentheses () can be used to group items into a single logical item. </li>
+</ul>
+<p class="enddd"></p>
+</dd>
+<dt>aggregate_func (optional) </dt>
+<dd><p class="startdd">VARCHAR, default NULL. A comma-separated list of aggregates to be applied to the pattern matches [3]. You can think of this input parameter as being like a SELECT clause. Please note that window functions cannot currently be used in the parameter 'aggregate_func'. If you want to use a window function [4], output the pattern matches and write a SQL query with a window function over the output tuples (see 'persist_rows' parameter below).</p>
+<p>If you just want to output the pattern matched rows and not compute any aggregates, you can put NULL or '' in the 'aggregate_func' parameter. </p>
+<p class="enddd"></p>
+</dd>
+<dt>persist_rows (optional) </dt>
+<dd><p class="startdd">BOOLEAN, default FALSE. If TRUE the matched rows are persisted in a separate output table. This table is named as &lt;output_table&gt;_tuples (the string "_tuples" is added as suffix to the value of <em>output_table</em>). </p>
+<p class="enddd"></p>
+</dd>
+<dt>overlapping_patterns (optional) </dt>
+<dd><p class="startdd">BOOLEAN, default FALSE. If TRUE find every occurrence of the pattern in the partition, regardless of whether it might have been part of a previously found match. </p>
+<p class="enddd"></p>
+</dd>
+</dl>
+<p><a class="anchor" id="examples"></a></p><dl class="section user"><dt>Examples</dt><dd></dd></dl>
+<p>The data set describes shopper behavior on a notional web site that sells beer and wine. A beacon fires an event to a log file when the shopper visits different pages on the site: landing page, beer selection page, wine selection page, and checkout. Other pages on the site like help pages show up in the logs as well. Let’s assume that the log has been sessionized.</p>
+<ol type="1">
+<li>Create the date table: <pre class="example">
+DROP TABLE IF EXISTS eventlog;
+CREATE TABLE eventlog (event_timestamp TIMESTAMP,
+            user_id INT,
+            session_id INT,
+            page TEXT,
+            revenue FLOAT);
+INSERT INTO eventlog VALUES
+('04/15/2015 01:03:00', 100821, 100, 'LANDING', 0),
+('04/15/2015 01:04:00', 100821, 100, 'WINE', 0),
+('04/15/2015 01:05:00', 100821, 100, 'CHECKOUT', 39),
+('04/15/2015 02:06:00', 100821, 101, 'WINE', 0),
+('04/15/2015 02:09:00', 100821, 101, 'WINE', 0),
+('04/15/2015 01:15:00', 101121, 102, 'LANDING', 0),
+('04/15/2015 01:16:00', 101121, 102, 'WINE', 0),
+('04/15/2015 01:17:00', 101121, 102, 'CHECKOUT', 15),
+('04/15/2015 01:18:00', 101121, 102, 'LANDING', 0),
+('04/15/2015 01:19:00', 101121, 102, 'HELP', 0),
+('04/15/2015 01:21:00', 101121, 102, 'WINE', 0),
+('04/15/2015 01:22:00', 101121, 102, 'CHECKOUT', 23),
+('04/15/2015 02:15:00', 101331, 103, 'LANDING', 0),
+('04/15/2015 02:16:00', 101331, 103, 'WINE', 0),
+('04/15/2015 02:17:00', 101331, 103, 'HELP', 0),
+('04/15/2015 02:18:00', 101331, 103, 'WINE', 0),
+('04/15/2015 02:19:00', 101331, 103, 'CHECKOUT', 16),
+('04/15/2015 02:22:00', 101443, 104, 'BEER', 0),
+('04/15/2015 02:25:00', 101443, 104, 'CHECKOUT', 12),
+('04/15/2015 02:29:00', 101881, 105, 'LANDING', 0),
+('04/15/2015 02:30:00', 101881, 105, 'BEER', 0),
+('04/15/2015 01:05:00', 102201, 106, 'LANDING', 0),
+('04/15/2015 01:06:00', 102201, 106, 'HELP', 0),
+('04/15/2015 01:09:00', 102201, 106, 'LANDING', 0),
+('04/15/2015 02:15:00', 102201, 107, 'WINE', 0),
+('04/15/2015 02:16:00', 102201, 107, 'BEER', 0),
+('04/15/2015 02:17:00', 102201, 107, 'WINE', 0),
+('04/15/2015 02:18:00', 102871, 108, 'BEER', 0),
+('04/15/2015 02:19:00', 102871, 108, 'WINE', 0),
+('04/15/2015 02:22:00', 102871, 108, 'CHECKOUT', 21),
+('04/15/2015 02:25:00', 102871, 108, 'LANDING', 0),
+('04/15/2015 02:17:00', 103711, 109, 'BEER', 0),
+('04/15/2015 02:18:00', 103711, 109, 'LANDING', 0),
+('04/15/2015 02:19:00', 103711, 109, 'WINE', 0);
+</pre></li>
+<li>Calculate the revenue by checkout: <pre class="example">
+DROP TABLE IF EXISTS path_output, path_output_tuples;
+SELECT madlib.path(
+     'eventlog',                -- Name of input table
+     'path_output',             -- Table name to store path results
+     'session_id',              -- Partition input table by session
+     'event_timestamp ASC',     -- Order partitions in input table by time
+     'buy:=page=''CHECKOUT''',  -- Define a symbol for checkout events
+     '(buy)',                   -- Pattern search: purchase
+     'sum(revenue) as checkout_rev',    -- Aggregate:  sum revenue by checkout
+     TRUE                       -- Persist matches
+     );
+SELECT * FROM path_output ORDER BY session_id, match_id;
+</pre> Result: <pre class="result">
+ session_id | match_id | checkout_rev
+------------+----------+--------------
+        100 |        1 |           39
+        102 |        1 |           15
+        102 |        2 |           23
+        103 |        1 |           16
+        104 |        1 |           12
+        108 |        1 |           21
+(6 rows)
+</pre> Note that there are 2 checkouts within session 102, which is apparent from the 'match_id' column. This serves to illustrate that the 'aggregate_func' operates on a <em>per pattern match</em> basis, not on a <em>per partition</em> basis. If in fact we wanted revenue by partition ('session_id' in this example), then we could do: <pre class="example">
+SELECT session_id, sum(checkout_rev) FROM path_output GROUP BY session_id ORDER BY session_id;
+</pre> Result: <pre class="result">
+ session_id | sum
+------------+-----
+        100 |  39
+        102 |  38
+        103 |  16
+        104 |  12
+        108 |  21
+(5 rows)
+</pre> Since we set TRUE for 'persist_rows', we can view the associated pattern matches: <pre class="example">
+SELECT * FROM path_output_tuples ORDER BY session_id ASC, event_timestamp ASC;
+</pre> Result: <pre class="result">
+   event_timestamp   | user_id | session_id |   page   | revenue | symbol | match_id
+---------------------+---------+------------+----------+---------+--------+----------
+ 2015-04-15 01:05:00 |  100821 |        100 | CHECKOUT |      39 | buy    |        1
+ 2015-04-15 01:17:00 |  101121 |        102 | CHECKOUT |      15 | buy    |        1
+ 2015-04-15 01:22:00 |  101121 |        102 | CHECKOUT |      23 | buy    |        2
+ 2015-04-15 02:19:00 |  101331 |        103 | CHECKOUT |      16 | buy    |        1
+ 2015-04-15 02:25:00 |  101443 |        104 | CHECKOUT |      12 | buy    |        1
+ 2015-04-15 02:22:00 |  102871 |        108 | CHECKOUT |      21 | buy    |        1
+(6 rows)
+</pre> Notice that the 'symbol' and 'match_id' columns are added to the right of the matched rows.</li>
+<li>We are interested in sessions with an order placed within 4 pages of entering the shopping site via the landing page. We represent this by the regular expression: '(land)[^(land)(buy)]{0,2}(buy)'. In other words, visit to the landing page followed by from 0 to 2 non-entry, non-sale pages, followed by a purchase. The SQL is as follows: <pre class="example">
+DROP TABLE IF EXISTS path_output, path_output_tuples;
+SELECT madlib.path(
+     'eventlog',                -- Name of input table
+     'path_output',             -- Table name to store path results
+     'session_id',              -- Partition input table by session
+     'event_timestamp ASC',     -- Order partitions in input table by time
+     'land:=page=''LANDING'',
+        wine:=page=''WINE'',
+        beer:=page=''BEER'',
+        buy:=page=''CHECKOUT'',
+        other:=page&lt;&gt;''LANDING'' AND page&lt;&gt;''WINE'' AND page&lt;&gt;''BEER'' AND  page&lt;&gt;''CHECKOUT''',    -- Symbols for  page types
+      '(land)[^(land)(buy)]{0,2}(buy)', -- Purchase within 4 pages entering site
+     'sum(revenue) as checkout_rev',    -- Aggregate:  sum revenue by checkout
+     TRUE                       -- Persist matches
+     );
+SELECT * FROM path_output ORDER BY session_id, match_id;
+</pre> Result: <pre class="result">
+ session_id | match_id | session_rev
+------------+----------+-------------
+        100 |        1 |          39
+        102 |        1 |          15
+        102 |        2 |          23
+(3 rows)
+</pre> Now view the associated pattern matches: <pre class="example">
+SELECT * FROM path_output_tuples ORDER BY session_id ASC, event_timestamp ASC;
+</pre> Result: <pre class="result">
+   event_timestamp   | user_id | session_id |   page   | revenue | symbol | match_id
+---------------------+---------+------------+----------+---------+--------+----------
+ 2015-04-15 01:03:00 |  100821 |        100 | LANDING  |       0 | land   |        1
+ 2015-04-15 01:04:00 |  100821 |        100 | WINE     |       0 | wine   |        1
+ 2015-04-15 01:05:00 |  100821 |        100 | CHECKOUT |      39 | buy    |        1
+ 2015-04-15 01:15:00 |  101121 |        102 | LANDING  |       0 | land   |        1
+ 2015-04-15 01:16:00 |  101121 |        102 | WINE     |       0 | wine   |        1
+ 2015-04-15 01:17:00 |  101121 |        102 | CHECKOUT |      15 | buy    |        1
+ 2015-04-15 01:18:00 |  101121 |        102 | LANDING  |       0 | land   |        2
+ 2015-04-15 01:19:00 |  101121 |        102 | HELP     |       0 | other  |        2
+ 2015-04-15 01:21:00 |  101121 |        102 | WINE     |       0 | wine   |        2
+ 2015-04-15 01:22:00 |  101121 |        102 | CHECKOUT |      23 | buy    |        2
+(10 rows)
+</pre></li>
+<li>We may want to use a window function instead of an aggregate. Currently, only aggregates are supported in the core path function in the parameter 'aggregate_func'. However, you can write window functions on the output tuples to achieve the desired result. &#160; Continuing the previous example, let’s say we want to compute average revenue for checkouts within 4 pages of entering the shopping site via the landing page: <pre class="example">
+SELECT DATE(event_timestamp), user_id, session_id, revenue,
+    avg(revenue) OVER (PARTITION BY DATE(event_timestamp)) as avg_checkout_rev
+    FROM path_output_tuples
+    WHERE page='CHECKOUT'
+    ORDER BY user_id, session_id;
+</pre> Result: <pre class="result">
+    date    | user_id | session_id | revenue | avg_checkout_rev
+------------+---------+------------+---------+------------------
+ 2015-04-15 |  100821 |        100 |      39 | 25.6666666666667
+ 2015-04-15 |  101121 |        102 |      15 | 25.6666666666667
+ 2015-04-15 |  101121 |        102 |      23 | 25.6666666666667
+(3 rows)
+</pre> Here we are partitioning the window function by day because we want daily averages, although our sample data set only has a single day.</li>
+<li>Now we want to do a golden path analysis to find the most successful shopper paths through the site. Since our data set is small, we decide this means the most frequently viewed page just before a checkout is made: <pre class="example">
+DROP TABLE IF EXISTS path_output, path_output_tuples;
+SELECT madlib.path(
+     'eventlog',                -- Name of input table
+     'path_output',             -- Table name to store path results
+     'session_id',              -- Partition input table by session
+     'event_timestamp ASC',     -- Order partitions in input table by time
+     'land:=page=''LANDING'',
+        wine:=page=''WINE'',
+        beer:=page=''BEER'',
+        buy:=page=''CHECKOUT'',
+        other:=page&lt;&gt;''LANDING'' AND page&lt;&gt;''WINE'' AND page&lt;&gt;''BEER'' AND  page&lt;&gt;''CHECKOUT''',    -- Symbols for  page types
+      '[^(buy)](buy)',          -- Pattern to match
+     'array_agg(page ORDER BY session_id ASC, event_timestamp ASC) as page_path',    -- Build array with shopper paths
+     FALSE                       -- Don't persist matches
+     );
+</pre> Now count the common paths and print the most frequent: <pre class="example">
+SELECT count(*), page_path from
+    (SELECT * FROM path_output) q
+GROUP BY page_path
+ORDER BY count(*) DESC
+LIMIT 10;
+</pre> Result: <pre class="result">
+ count |    page_path
+-------+-----------------
+     5 | {WINE,CHECKOUT}
+     1 | {BEER,CHECKOUT}
+(2 rows)
+</pre> There are only 2 different paths. The wine page is viewed more frequently than the beer page just before checkout.</li>
+<li>To demonstrate the use of 'overlapping_patterns', consider a pattern with at least one page followed by and ending with a checkout: <pre class="example">
+DROP TABLE IF EXISTS path_output, path_output_tuples;
+SELECT madlib.path(
+     'eventlog',                    -- Name of the table
+     'path_output',                 -- Table name to store the path results
+     'session_id',                  -- Partition by session
+     'event_timestamp ASC',         -- Order partitions in input table by time
+     $$ nobuy:=page&lt;&gt;'CHECKOUT',
+        buy:=page='CHECKOUT'
+     $$,  -- Definition of symbols used in the pattern definition
+     '(nobuy)+(buy)',         -- At least one page followed by and ending with a CHECKOUT.
+     'array_agg(page ORDER BY session_id ASC, event_timestamp ASC) as page_path',
+     FALSE,                        -- Don't persist matches
+     TRUE                          -- Turn on overlapping patterns
+     );
+SELECT * FROM path_output ORDER BY session_id, match_id;
+</pre> Result with overlap turned on: <pre class="result">
+ session_id | match_id |             page_path
+------------+----------+-----------------------------------
+        100 |        1 | {LANDING,WINE,CHECKOUT}
+        100 |        2 | {WINE,CHECKOUT}
+        102 |        1 | {LANDING,WINE,CHECKOUT}
+        102 |        2 | {WINE,CHECKOUT}
+        102 |        3 | {LANDING,HELP,WINE,CHECKOUT}
+        102 |        4 | {HELP,WINE,CHECKOUT}
+        102 |        5 | {WINE,CHECKOUT}
+        103 |        1 | {LANDING,WINE,HELP,WINE,CHECKOUT}
+        103 |        2 | {WINE,HELP,WINE,CHECKOUT}
+        103 |        3 | {HELP,WINE,CHECKOUT}
+        103 |        4 | {WINE,CHECKOUT}
+        104 |        1 | {BEER,CHECKOUT}
+        108 |        1 | {BEER,WINE,CHECKOUT}
+        108 |        2 | {WINE,CHECKOUT}
+(14 rows)
+</pre> With overlap turned off, the result would be: <pre class="result">
+ session_id | match_id |             page_path
+------------+----------+-----------------------------------
+        100 |        1 | {LANDING,WINE,CHECKOUT}
+        102 |        1 | {LANDING,WINE,CHECKOUT}
+        102 |        2 | {LANDING,HELP,WINE,CHECKOUT}
+        103 |        1 | {LANDING,WINE,HELP,WINE,CHECKOUT}
+        104 |        1 | {BEER,CHECKOUT}
+        108 |        1 | {BEER,WINE,CHECKOUT}
+(6 rows)
+</pre></li>
+</ol>
+<p><a class="anchor" id="note"></a></p><dl class="section note"><dt>Note</dt><dd>Please note some current limitations of the path algorithm.<ul>
+<li>Window functions cannot currently be used in the parameter 'aggregate_func'. Instead, output the pattern matches and write a SQL query with a window function over the output tuples.</li>
+<li>A given row can only match one symbol. If a row matches multiple symbols, the symbol that comes <em>first</em> in the symbol definition list will take precedence.</li>
+<li>Maximum number of symbols that can be defined is 35.</li>
+<li>The columns 'match_id' and 'symbol' are generated by the path algorithm. If coincidently you have columns in your input data named 'match_id' or 'symbol', the system generated column names will be changed to "__madlib_path_match_id__" and "__madlib_path_symbol__"</li>
+</ul>
+</dd></dl>
+<p><a class="anchor" id="nomenclature"></a></p><dl class="section user"><dt>Nomenclature</dt><dd></dd></dl>
+<p>Partition</p><ul>
+<li>scope of rows to be searched for pattern match</li>
+<li>typical examples: user id, session id, portfolio id</li>
+</ul>
+<p>Order</p><ul>
+<li>sort order of input rows in partition</li>
+<li>typical example: time</li>
+</ul>
+<p>Symbol</p><ul>
+<li>a row of a particular type that you’re searching for, that you want to include in a pattern</li>
+</ul>
+<p>Pattern</p><ul>
+<li>regular PostgreSQL pattern match expression of symbols and operators that you want to match across rows</li>
+</ul>
+<p>Pattern match</p><ul>
+<li>rows that result from a pattern match expression of symbols</li>
+<li>can be multiple matches per partition</li>
+</ul>
+<p><a class="anchor" id="literature"></a></p><dl class="section user"><dt>Literature</dt><dd></dd></dl>
+<p>NOTE: The following links refer to documentation resources for the current PostgreSQL database version. Depending upon your database platform version, you may need to change "current" references in the links to your database version.</p>
+<p>If your database platform uses the Greenplum Database (or related variants), please check with the project community and/or your database vendor to identify the PostgreSQL version it is based on.</p>
+<p>[1] PostgreSQL basic statements/assignment operator, <a href="http://www.postgresql.org/docs/current/static/plpgsql-statements.html">http://www.postgresql.org/docs/current/static/plpgsql-statements.html</a></p>
+<p>[2] PostgreSQL pattern matching, <a href="http://www.postgresql.org/docs/current/static/functions-matching.html">http://www.postgresql.org/docs/current/static/functions-matching.html</a></p>
+<p>[3] PostgreSQL aggregate functions, <a href="http://www.postgresql.org/docs/current/static/tutorial-agg.html">http://www.postgresql.org/docs/current/static/tutorial-agg.html</a></p>
+<p>[4] PostgreSQL window functions, <a href="http://www.postgresql.org/docs/current/static/tutorial-window.html">http://www.postgresql.org/docs/current/static/tutorial-window.html</a> </p>
+</div><!-- contents -->
+</div><!-- doc-content -->
+<!-- start footer part -->
+<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
+  <ul>
+    <li class="footer">Generated on Mon Oct 15 2018 11:24:30 for MADlib by
+    <a href="http://www.doxygen.org/index.html">
+    <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.14 </li>
+  </ul>
+</div>
+</body>
+</html>