You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by ok...@apache.org on 2017/12/28 22:51:52 UTC

[18/51] [abbrv] [partial] madlib-site git commit: Additional updates for 1.13 release

http://git-wip-us.apache.org/repos/asf/madlib-site/blob/6c103d3e/docs/v1.13/group__grp__robust.html
----------------------------------------------------------------------
diff --git a/docs/v1.13/group__grp__robust.html b/docs/v1.13/group__grp__robust.html
new file mode 100644
index 0000000..5746ecf
--- /dev/null
+++ b/docs/v1.13/group__grp__robust.html
@@ -0,0 +1,440 @@
+<!-- HTML header for doxygen 1.8.4-->
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.13"/>
+<meta name="keywords" content="madlib,postgres,greenplum,machine learning,data mining,deep learning,ensemble methods,data science,market basket analysis,affinity analysis,pca,lda,regression,elastic net,huber white,proportional hazards,k-means,latent dirichlet allocation,bayes,support vector machines,svm"/>
+<title>MADlib: Robust Variance</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="navtree.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="resize.js"></script>
+<script type="text/javascript" src="navtreedata.js"></script>
+<script type="text/javascript" src="navtree.js"></script>
+<script type="text/javascript">
+  $(document).ready(initResizable);
+</script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/searchdata.js"></script>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+  $(document).ready(function() { init_search(); });
+</script>
+<script type="text/x-mathjax-config">
+  MathJax.Hub.Config({
+    extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
+    jax: ["input/TeX","output/HTML-CSS"],
+});
+</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
+<!-- hack in the navigation tree -->
+<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
+<!-- google analytics -->
+<script>
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+  ga('create', 'UA-45382226-1', 'madlib.apache.org');
+  ga('send', 'pageview');
+</script>
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td id="projectlogo"><a href="http://madlib.apache.org"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">
+   <span id="projectnumber">1.13</span>
+   </div>
+   <div id="projectbrief">User Documentation for MADlib</div>
+  </td>
+   <td>        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+</td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.13 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
+</div><!-- top -->
+<div id="side-nav" class="ui-resizable side-nav-resizable">
+  <div id="nav-tree">
+    <div id="nav-tree-contents">
+      <div id="nav-sync" class="sync"></div>
+    </div>
+  </div>
+  <div id="splitbar" style="-moz-user-select:none;" 
+       class="ui-resizable-handle">
+  </div>
+</div>
+<script type="text/javascript">
+$(document).ready(function(){initNavTree('group__grp__robust.html','');});
+</script>
+<div id="doc-content">
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+</div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+<div class="header">
+  <div class="headertitle">
+<div class="title">Robust Variance<div class="ingroups"><a class="el" href="group__grp__super.html">Supervised Learning</a> &raquo; <a class="el" href="group__grp__regml.html">Regression Models</a></div></div>  </div>
+</div><!--header-->
+<div class="contents">
+<div class="toc"><b>Contents</b> <ul>
+<li class="level1">
+<a href="#train_linregr">Robust Linear Regression Training Function</a> </li>
+<li class="level1">
+<a href="#train_logregr">Robust Logistic Regression Training Function</a> </li>
+<li class="level1">
+<a href="#train_mlogregr">Robust Multinomial Logistic Regression Training Function</a> </li>
+<li class="level1">
+<a href="#robust_variance_coxph">Robust Variance Function For Cox Proportional Hazards</a> </li>
+<li class="level1">
+<a href="#examples">Examples</a> </li>
+<li class="level1">
+<a href="#background">Technical Background</a> </li>
+<li class="level1">
+<a href="#literature">Literature</a> </li>
+<li class="level1">
+<a href="#related">Related Topics</a> </li>
+</ul>
+</div><p>The functions in this module calculate robust variance (Huber-White estimates) for linear regression, logistic regression, multinomial logistic regression, and Cox proportional hazards. They are useful in calculating variances in a dataset with potentially noisy outliers. The Huber-White implemented here is identical to the "HC0" sandwich operator in the R module "sandwich".</p>
+<p>The interfaces for robust linear, logistic, and multinomial logistic regression are similar. Each regression type has its own training function. The regression results are saved in an output table with small differences, depending on the regression type.</p>
+<dl class="section warning"><dt>Warning</dt><dd>Please note that the interface for Cox proportional hazards, unlike the interface of other regression methods, accepts an output model table produced by <a class="el" href="cox__prop__hazards_8sql__in.html#a737450bbfe0f10204b0074a9d45b0cef">coxph_train()</a> function.</dd></dl>
+<p><a class="anchor" id="train_linregr"></a></p><dl class="section user"><dt>Robust Linear Regression Training Function</dt><dd></dd></dl>
+<p>The <a class="el" href="robust_8sql__in.html#a390473d2fd45e268f0fc13ca971b49b4">robust_variance_linregr()</a> function has the following syntax: </p><pre class="syntax">
+robust_variance_linregr( source_table,
+                         out_table,
+                         dependent_varname,
+                         independent_varname,
+                         grouping_cols
+                       )
+</pre> <dl class="arglist">
+<dt>source_table </dt>
+<dd>VARCHAR. The name of the table containing the training data. </dd>
+<dt>out_table </dt>
+<dd><p class="startdd">VARCHAR. Name of the generated table containing the output model. The output table contains the following columns. </p><table class="output">
+<tr>
+<th>coef </th><td>DOUBLE PRECISION[]. Vector of the coefficients of the regression.  </td></tr>
+<tr>
+<th>std_err </th><td>DOUBLE PRECISION[]. Vector of the standard error of the coefficients.  </td></tr>
+<tr>
+<th>t_stats </th><td>DOUBLE PRECISION[]. Vector of the t-stats of the coefficients.  </td></tr>
+<tr>
+<th>p_values </th><td>DOUBLE PRECISION[]. Vector of the p-values of the coefficients.  </td></tr>
+</table>
+<p class="enddd">A summary table named &lt;out_table&gt;_summary is also created, which is the same as the summary table created by linregr_train function. Please refer to the documentation for linear regression for details.  </p>
+</dd>
+<dt>dependent_varname </dt>
+<dd>VARCHAR. The name of the column containing the dependent variable. </dd>
+<dt>independent_varname </dt>
+<dd>VARCHAR. Expression list to evaluate for the independent variables. An intercept variable is not assumed. It is common to provide an explicit intercept term by including a single constant 1 term in the independent variable list.  </dd>
+<dt>grouping_cols (optional) </dt>
+<dd>VARCHAR, default: NULL. An expression list used to group the input dataset into discrete groups, running one regression per group. Similar to the SQL "GROUP BY" clause. When this value is NULL, no grouping is used and a single result model is generated. Default value: NULL.  </dd>
+</dl>
+<p><a class="anchor" id="train_logregr"></a></p><dl class="section user"><dt>Robust Logistic Regression Training Function</dt><dd></dd></dl>
+<p>The <a class="el" href="robust_8sql__in.html#abc20ec2c5e74f268e7727c33a4bb9054">robust_variance_logregr()</a> function has the following syntax: </p><pre class="syntax">
+robust_variance_logregr( source_table,
+                         out_table,
+                         dependent_varname,
+                         independent_varname,
+                         grouping_cols,
+                         max_iter,
+                         optimizer,
+                         tolerance,
+                         verbose_mode
+                       )
+</pre> <dl class="arglist">
+<dt>source_table </dt>
+<dd>VARCHAR. The name of the table containing the training data. </dd>
+<dt>out_table </dt>
+<dd><p class="startdd">VARCHAR. Name of the generated table containing the output model. The output table has the following columns: </p><table class="output">
+<tr>
+<th>coef </th><td>Vector of the coefficients of the regression.  </td></tr>
+<tr>
+<th>std_err </th><td>Vector of the standard error of the coefficients.  </td></tr>
+<tr>
+<th>z_stats </th><td>Vector of the z-stats of the coefficients.  </td></tr>
+<tr>
+<th>p_values </th><td>Vector of the p-values of the coefficients.  </td></tr>
+</table>
+<p class="enddd">A summary table named &lt;out_table&gt;_summary is also created, which is the same as the summary table created by logregr_train function. Please refer to the documentation for logistic regression for details.  </p>
+</dd>
+<dt>dependent_varname </dt>
+<dd>VARCHAR. The name of the column containing the independent variable. </dd>
+<dt>independent_varname </dt>
+<dd>VARCHAR. Expression list to evaluate for the independent variables. An intercept variable is not assumed. It is common to provide an explicit intercept term by including a single constant 1 term in the independent variable list. </dd>
+<dt>grouping_cols (optional) </dt>
+<dd>VARCHAR, default: NULL. An expression list used to group the input dataset into discrete groups, running one regression per group. Similar to the SQL "GROUP BY" clause. When this value is NULL, no grouping is used and a single result model is generated.  </dd>
+<dt>max_iter (optional) </dt>
+<dd>INTEGER, default: 20. The maximum number of iterations that are allowed. </dd>
+<dt>optimizer </dt>
+<dd>VARCHAR, default: 'fista'. Name of optimizer, either 'fista' or 'igd'. </dd>
+<dt>tolerance (optional) </dt>
+<dd>DOUBLE PRECISION, default: 1e-6. The criteria to end iterations. Both the 'fista' and 'igd' optimizers compute the average difference between the coefficients of two consecutive iterations, and when the difference is smaller than tolerance or the iteration number is larger than max_iter, the computation stops.  </dd>
+<dt>verbose_mode (optional) </dt>
+<dd>BOOLEAN, default: FALSE. Whether the regression fit should print any warning messages.  </dd>
+</dl>
+<p><a class="anchor" id="train_mlogregr"></a></p><dl class="section user"><dt>Robust Multinomial Logistic Regression Function</dt><dd></dd></dl>
+<p>The <a class="el" href="robust_8sql__in.html#a1f27c072a4ef885a55825f75d12b3bd8">robust_variance_mlogregr()</a> function has the following syntax: </p><pre class="syntax">
+robust_variance_mlogregr( source_table,
+                          out_table,
+                          dependent_varname,
+                          independent_varname,
+                          ref_category,
+                          grouping_cols,
+                          optimizer_params,
+                          verbose_mode
+                        )
+</pre> <dl class="arglist">
+<dt>source_table </dt>
+<dd>VARCHAR. The name of the table containing training data, properly qualified. </dd>
+<dt>out_table </dt>
+<dd><p class="startdd">VARCHAR. The name of the table where the regression model will be stored. The output table has the following columns: </p><table class="output">
+<tr>
+<th>category </th><td>The category.  </td></tr>
+<tr>
+<th>ref_category </th><td>The refererence category used for modeling.  </td></tr>
+<tr>
+<th>coef </th><td>Vector of the coefficients of the regression.  </td></tr>
+<tr>
+<th>std_err </th><td>Vector of the standard error of the coefficients.  </td></tr>
+<tr>
+<th>z_stats </th><td>Vector of the z-stats of the coefficients.  </td></tr>
+<tr>
+<th>p_values </th><td>Vector of the p-values of the coefficients.  </td></tr>
+</table>
+<p class="enddd">A summary table named &lt;out_table&gt;_summary is also created, which is the same as the summary table created by mlogregr_train function. Please refer to the documentation for multinomial logistic regression for details.  </p>
+</dd>
+<dt>dependent_varname </dt>
+<dd>VARCHAR. The name of the column containing the dependent variable. </dd>
+<dt>independent_varname </dt>
+<dd>VARCHAR. Expression list to evaluate for the independent variables. An intercept variable is not assumed. It is common to provide an explicit intercept term by including a single constant 1 term in the independent variable list. The <em>independent_varname</em> can be the name of a column that contains an array of numeric values. It can also be a string with the format 'ARRAY[1, x1, x2, x3]', where <em>x1</em>, <em>x2</em> and <em>x3</em> are each column names. </dd>
+<dt>ref_category (optional) </dt>
+<dd>INTEGER, default: 0. The reference category. </dd>
+<dt>grouping_cols (optional) </dt>
+<dd>VARCHAR, default: NULL. <em>Not currently implemented. Any non-NULL value is ignored.</em> An expression list used to group the input dataset into discrete groups, running one regression per group. Similar to the SQL "GROUP BY" clause. When this value is NULL, no grouping is used and a single result model is generated. </dd>
+<dt>optimizer_params (optional) </dt>
+<dd>TEXT, default: NULL, which uses the default values of optimizer parameters: max_iter=20, optimizer='newton', tolerance=1e-4. It should be a string that contains pairs of 'key=value' separated by commas. </dd>
+<dt>verbose_mode (optional) </dt>
+<dd>BOOLEAN, default FALSE. <em>Not currently implemented.</em> TRUE if the regression fit should print warning messages. </dd>
+</dl>
+<p><a class="anchor" id="robust_variance_coxph"></a></p><dl class="section user"><dt>Robust Variance Function For Cox Proportional Hazards</dt><dd></dd></dl>
+<p>The <a class="el" href="clustered__variance__coxph_8sql__in.html#abaeae5d6cd30db4b06a49d24d714812e">robust_variance_coxph()</a> function has the following syntax: </p><pre class="syntax">
+robust_variance_coxph(model_table, output_table)
+</pre><p><b>Arguments</b> </p><dl class="arglist">
+<dt>model_table </dt>
+<dd>TEXT. The name of the model table, which is exactaly the same as the 'output_table' parameter of <a class="el" href="cox__prop__hazards_8sql__in.html#a737450bbfe0f10204b0074a9d45b0cef" title="Compute cox-regression coefficients and diagnostic statistics. ">coxph_train()</a> function. </dd>
+<dt>output_table </dt>
+<dd>TEXT. The name of the table where the output is saved. It has the following columns: <table class="output">
+<tr>
+<th>coef </th><td>FLOAT8[]. Vector of the coefficients.  </td></tr>
+<tr>
+<th>loglikelihood </th><td>FLOAT8. Log-likelihood value of the MLE estimate.  </td></tr>
+<tr>
+<th>std_err </th><td>FLOAT8[]. Vector of the standard error of the coefficients.  </td></tr>
+<tr>
+<th>robust_se </th><td>FLOAT8[]. Vector of the robust standard errors of the coefficients.  </td></tr>
+<tr>
+<th>robust_z </th><td>FLOAT8[]. Vector of the robust z-stats of the coefficients.  </td></tr>
+<tr>
+<th>robust_p </th><td>FLOAT8[]. Vector of the robust p-values of the coefficients.  </td></tr>
+<tr>
+<th>hessian </th><td>FLOAT8[]. The Hessian matrix.  </td></tr>
+</table>
+</dd>
+</dl>
+<p><a class="anchor" id="examples"></a></p><dl class="section user"><dt>Examples</dt><dd></dd></dl>
+<p><b> Logistic Regression Example </b></p><ol type="1">
+<li>View online help for the logistic regression training function. <pre class="example">
+SELECT madlib.robust_variance_logregr();
+</pre></li>
+<li>Create the training data table. <pre class="example">
+DROP TABLE IF EXISTS patients;
+CREATE TABLE patients (id INTEGER NOT NULL, second_attack INTEGER,
+    treatment INTEGER, trait_anxiety INTEGER);
+COPY patients FROM STDIN WITH DELIMITER '|';
+  1 |             1 |         1 |            70
+  3 |             1 |         1 |            50
+  5 |             1 |         0 |            40
+  7 |             1 |         0 |            75
+  9 |             1 |         0 |            70
+ 11 |             0 |         1 |            65
+ 13 |             0 |         1 |            45
+ 15 |             0 |         1 |            40
+ 17 |             0 |         0 |            55
+ 19 |             0 |         0 |            50
+  2 |             1 |         1 |            80
+  4 |             1 |         0 |            60
+  6 |             1 |         0 |            65
+  8 |             1 |         0 |            80
+ 10 |             1 |         0 |            60
+ 12 |             0 |         1 |            50
+ 14 |             0 |         1 |            35
+ 16 |             0 |         1 |            50
+ 18 |             0 |         0 |            45
+ 20 |             0 |         0 |            60
+\.
+</pre></li>
+<li>Run the logistic regression training function and compute the robust logistic variance of the regression: <pre class="example">
+DROP TABLE IF EXISTS patients_logregr;
+SELECT madlib.robust_variance_logregr( 'patients',
+                                       'patients_logregr',
+                                       'second_attack',
+                                       'ARRAY[1, treatment, trait_anxiety]'
+                                     );
+</pre></li>
+<li>View the regression results. <pre class="example">
+\x on
+Expanded display is on.
+SELECT * FROM patients_logregr;
+</pre> Result: <pre class="result">
+&#160;-[ RECORD 1 ]-------------------------------------------------------
+ coef     | {-6.36346994178179,-1.02410605239327,0.119044916668605}
+ std_err  | {3.45872062333648,1.1716192578234,0.0534328864185018}
+ z_stats  | {-1.83983346294192,-0.874094587943036,2.22793348156809}
+ p_values | {0.0657926909738889,0.382066744585541,0.0258849510757339}
+</pre> Alternatively, unnest the arrays in the results for easier reading of output. <pre class="example">
+\x off
+SELECT unnest(array['intercept', 'treatment', 'trait_anxiety' ]) as attribute,
+       unnest(coef) as coefficient,
+       unnest(std_err) as standard_error,
+       unnest(z_stats) as z_stat,
+       unnest(p_values) as pvalue
+FROM patients_logregr;
+</pre></li>
+</ol>
+<p><b> Cox Proportional Hazards Example </b></p><ol type="1">
+<li>View online help for the robust Cox Proportional hazards training method. <pre class="example">
+SELECT madlib.robust_variance_coxph();
+</pre></li>
+<li>Create an input data set. <pre class="example">
+DROP TABLE IF EXISTS sample_data;
+CREATE TABLE sample_data (
+    id INTEGER NOT NULL,
+    grp DOUBLE PRECISION,
+    wbc DOUBLE PRECISION,
+    timedeath INTEGER,
+    status BOOLEAN
+);
+COPY sample_data FROM STDIN DELIMITER '|';
+  0 |   0 | 1.45 |        35 | t
+  1 |   0 | 1.47 |        34 | t
+  3 |   0 |  2.2 |        32 | t
+  4 |   0 | 1.78 |        25 | t
+  5 |   0 | 2.57 |        23 | t
+  6 |   0 | 2.32 |        22 | t
+  7 |   0 | 2.01 |        20 | t
+  8 |   0 | 2.05 |        19 | t
+  9 |   0 | 2.16 |        17 | t
+ 10 |   0 |  3.6 |        16 | t
+ 11 |   1 |  2.3 |        15 | t
+ 12 |   0 | 2.88 |        13 | t
+ 13 |   1 |  1.5 |        12 | t
+ 14 |   0 |  2.6 |        11 | t
+ 15 |   0 |  2.7 |        10 | t
+ 16 |   0 |  2.8 |         9 | t
+ 17 |   1 | 2.32 |         8 | t
+ 18 |   0 | 4.43 |         7 | t
+ 19 |   0 | 2.31 |         6 | t
+ 20 |   1 | 3.49 |         5 | t
+ 21 |   1 | 2.42 |         4 | t
+ 22 |   1 | 4.01 |         3 | t
+ 23 |   1 | 4.91 |         2 | t
+ 24 |   1 |    5 |         1 | t
+\.
+</pre></li>
+<li>Run the Cox regression function. <pre class="example">
+SELECT madlib.coxph_train( 'sample_data',
+                           'sample_cox',
+                           'timedeath',
+                           'ARRAY[grp,wbc]',
+                           'status'
+                         );
+</pre></li>
+<li>Run the Robust Cox regression function. <pre class="example">
+SELECT madlib.robust_variance_coxph( 'sample_cox',
+                           'sample_robust_cox'
+                         );
+</pre></li>
+<li>View the results of the robust Cox regression. <pre class="example">
+\x on
+SELECT * FROM sample_robust_cox;
+</pre> Results: <pre class="result">
+-[ RECORD 1 ]-+----------------------------------------------------------------------------
+coef          | {2.54407073265105,1.67172094780081}
+loglikelihood | -37.8532498733452
+std_err       | {0.677180599295459,0.387195514577754}
+robust_se     | {0.621095581073685,0.274773521439328}
+robust_z      | {4.09610180811965,6.08399579058399}
+robust_p      | {4.2016521208424e-05,1.17223683104729e-09}
+hessian       | {{2.78043065745405,-2.25848560642669},{-2.25848560642669,8.50472838284265}}
+</pre></li>
+</ol>
+<p><a class="anchor" id="background"></a></p><dl class="section user"><dt>Technical Background</dt><dd></dd></dl>
+<p>When doing regression analysis, we are sometimes interested in the variance of the computed coefficients \( \boldsymbol c \). While the built-in regression functions provide variance estimates, we may prefer a <em>robust</em> variance estimate.</p>
+<p>The robust variance calculation can be expressed in a sandwich formation, which is the form </p><p class="formulaDsp">
+\[ S( \boldsymbol c) = B( \boldsymbol c) M( \boldsymbol c) B( \boldsymbol c) \]
+</p>
+<p> where \( B( \boldsymbol c)\) and \( M( \boldsymbol c)\) are matrices. The \( B( \boldsymbol c) \) matrix, also known as the bread, is relatively straight forward, and can be computed as </p><p class="formulaDsp">
+\[ B( \boldsymbol c) = n\left(\sum_i^n -H(y_i, x_i, \boldsymbol c) \right)^{-1} \]
+</p>
+<p> where \( H \) is the hessian matrix.</p>
+<p>The \( M( \boldsymbol c)\) matrix has several variations, each with different robustness properties. The form implemented here is the Huber-White sandwich operator, which takes the form </p><p class="formulaDsp">
+\[ M_{H} =\frac{1}{n} \sum_i^n \psi(y_i,x_i, \boldsymbol c)^T \psi(y_i,x_i, \boldsymbol c). \]
+</p>
+<p>The above method for calculating robust variance (Huber-White estimates) is implemented for linear regression, logistic regression, and multinomial logistic regression. It is useful in calculating variances in a dataset with potentially noisy outliers. The Huber-White implemented here is identical to the "HC0" sandwich operator in the R module "sandwich".</p>
+<p>When multinomial logistic regression is computed before the multinomial robust regression, it uses a default reference category of zero and the regression coefficients are included in the output table. The regression coefficients in the output are in the same order as the multinomial logistic regression function, which is described below. For a problem with \( K \) dependent variables \( (1, ..., K) \) and \( J \) categories \( (0, ..., J-1) \), let \( {m_{k,j}} \) denote the coefficient for dependent variable \( k \) and category \( j \) . The output is \( {m_{k_1, j_0}, m_{k_1, j_1} \ldots m_{k_1, j_{J-1}}, m_{k_2, j_0}, m_{k_2, j_1} \ldots m_{k_K, j_{J-1}}} \). The order is NOT CONSISTENT with the multinomial regression marginal effect calculation with function <em>marginal_mlogregr</em>. This is deliberate because the interfaces of all multinomial regressions (robust, clustered, ...) will be moved to match that used in marginal.</p>
+<p>The robust variance of Cox proportional hazards is more complex because coeeficients are trained by maximizing a partial log-likelihood. Therefore, one cannot directly use the formula for \( M( \boldsymbol c) \) as in Huber-White robust estimator. Extra terms are needed. See [4] for details.</p>
+<p><a class="anchor" id="literature"></a></p><dl class="section user"><dt>Literature</dt><dd></dd></dl>
+<p>[1] vce(cluster) function in STATA: <a href="http://www.stata.com/help.cgi?vce_option">http://www.stata.com/help.cgi?vce_option</a></p>
+<p>[2] clustered estimators in R: <a href="http://people.su.se/~ma/clustering.pdf">http://people.su.se/~ma/clustering.pdf</a></p>
+<p>[3] Achim Zeileis: Object-oriented Computation of Sandwich Estimators. Research Report Series / Department of Statistics and Mathematics, 37. Department of Statistics and Mathematics, WU Vienna University of Economics and Business, Vienna. <a href="http://cran.r-project.org/web/packages/sandwich/vignettes/sandwich-OOP.pdf">http://cran.r-project.org/web/packages/sandwich/vignettes/sandwich-OOP.pdf</a></p>
+<p>[4] D. Y. Lin and L . J. Wei, <em>The Robust Inference for the Cox Proportional Hazards Model</em>, Journal of the American Statistical Association, Vol. 84, No. 408, p.1074 (1989).</p>
+<p><a class="anchor" id="related"></a></p><dl class="section user"><dt>Related Topics</dt><dd>File <a class="el" href="robust_8sql__in.html" title="SQL functions for robust variance linear and logistic regression. ">robust.sql_in</a> documenting the SQL functions File <a class="el" href="robust__variance__coxph_8sql__in.html" title="SQL functions for robust cox proportional hazards regression. ">robust_variance_coxph.sql_in</a> documenting more the SQL functions</dd></dl>
+</div><!-- contents -->
+</div><!-- doc-content -->
+<!-- start footer part -->
+<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
+  <ul>
+    <li class="footer">Generated on Wed Dec 27 2017 19:05:57 for MADlib by
+    <a href="http://www.doxygen.org/index.html">
+    <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.13 </li>
+  </ul>
+</div>
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/madlib-site/blob/6c103d3e/docs/v1.13/group__grp__sample.html
----------------------------------------------------------------------
diff --git a/docs/v1.13/group__grp__sample.html b/docs/v1.13/group__grp__sample.html
new file mode 100644
index 0000000..03f1351
--- /dev/null
+++ b/docs/v1.13/group__grp__sample.html
@@ -0,0 +1,150 @@
+<!-- HTML header for doxygen 1.8.4-->
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.13"/>
+<meta name="keywords" content="madlib,postgres,greenplum,machine learning,data mining,deep learning,ensemble methods,data science,market basket analysis,affinity analysis,pca,lda,regression,elastic net,huber white,proportional hazards,k-means,latent dirichlet allocation,bayes,support vector machines,svm"/>
+<title>MADlib: Random Sampling</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="navtree.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="resize.js"></script>
+<script type="text/javascript" src="navtreedata.js"></script>
+<script type="text/javascript" src="navtree.js"></script>
+<script type="text/javascript">
+  $(document).ready(initResizable);
+</script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/searchdata.js"></script>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+  $(document).ready(function() { init_search(); });
+</script>
+<script type="text/x-mathjax-config">
+  MathJax.Hub.Config({
+    extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
+    jax: ["input/TeX","output/HTML-CSS"],
+});
+</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
+<!-- hack in the navigation tree -->
+<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
+<!-- google analytics -->
+<script>
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+  ga('create', 'UA-45382226-1', 'madlib.apache.org');
+  ga('send', 'pageview');
+</script>
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td id="projectlogo"><a href="http://madlib.apache.org"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">
+   <span id="projectnumber">1.13</span>
+   </div>
+   <div id="projectbrief">User Documentation for MADlib</div>
+  </td>
+   <td>        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+</td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.13 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
+</div><!-- top -->
+<div id="side-nav" class="ui-resizable side-nav-resizable">
+  <div id="nav-tree">
+    <div id="nav-tree-contents">
+      <div id="nav-sync" class="sync"></div>
+    </div>
+  </div>
+  <div id="splitbar" style="-moz-user-select:none;" 
+       class="ui-resizable-handle">
+  </div>
+</div>
+<script type="text/javascript">
+$(document).ready(function(){initNavTree('group__grp__sample.html','');});
+</script>
+<div id="doc-content">
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+</div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+<div class="header">
+  <div class="headertitle">
+<div class="title">Random Sampling<div class="ingroups"><a class="el" href="group__grp__early__stage.html">Early Stage Development</a></div></div>  </div>
+</div><!--header-->
+<div class="contents">
+<div class="toc"><b>Contents</b> <ul>
+<li>
+<a href="#func_list">Functions</a> </li>
+<li>
+<a href="#related">Related Topics</a> </li>
+</ul>
+</div><dl class="section warning"><dt>Warning</dt><dd><em> This MADlib method is still in early stage development. There may be some issues that will be addressed in a future version. Interface and implementation is subject to change. </em></dd></dl>
+<p>The random sampling module consists of useful utility functions for sampling operations. These functions can be used while implementing new algorithms.</p>
+<p><a class="anchor" id="syntax"></a></p><dl class="section user"><dt>Functions</dt><dd></dd></dl>
+<p>Sample a single row according to weights. </p><pre class="syntax">
+weighted_sample( value,
+                 weight
+               )
+</pre><p><b>Arguments</b> </p><dl class="arglist">
+<dt>value </dt>
+<dd>BIGINT or FLOAT8[]. Value of row. Uniqueness is not enforced. If a value occurs multiple times, the probability of sampling this value is proportional to the sum of its weights.  </dd>
+<dt>weight </dt>
+<dd>FLOAT8. Weight for row. A negative value here is treated has zero weight.  </dd>
+</dl>
+<p>Refer to the file for documentation on each of the utility functions.</p>
+<p><a class="anchor" id="related"></a></p><dl class="section user"><dt>Related Topics</dt><dd></dd></dl>
+<dl class="section see"><dt>See also</dt><dd>File <a class="el" href="sample_8sql__in.html" title="SQL functions for random sampling. ">sample.sql_in</a> documenting the SQL functions. </dd></dl>
+</div><!-- contents -->
+</div><!-- doc-content -->
+<!-- start footer part -->
+<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
+  <ul>
+    <li class="footer">Generated on Wed Dec 27 2017 19:05:58 for MADlib by
+    <a href="http://www.doxygen.org/index.html">
+    <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.13 </li>
+  </ul>
+</div>
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/madlib-site/blob/6c103d3e/docs/v1.13/group__grp__sampling.html
----------------------------------------------------------------------
diff --git a/docs/v1.13/group__grp__sampling.html b/docs/v1.13/group__grp__sampling.html
new file mode 100644
index 0000000..3cc3294
--- /dev/null
+++ b/docs/v1.13/group__grp__sampling.html
@@ -0,0 +1,139 @@
+<!-- HTML header for doxygen 1.8.4-->
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.13"/>
+<meta name="keywords" content="madlib,postgres,greenplum,machine learning,data mining,deep learning,ensemble methods,data science,market basket analysis,affinity analysis,pca,lda,regression,elastic net,huber white,proportional hazards,k-means,latent dirichlet allocation,bayes,support vector machines,svm"/>
+<title>MADlib: Sampling</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="navtree.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="resize.js"></script>
+<script type="text/javascript" src="navtreedata.js"></script>
+<script type="text/javascript" src="navtree.js"></script>
+<script type="text/javascript">
+  $(document).ready(initResizable);
+</script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/searchdata.js"></script>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+  $(document).ready(function() { init_search(); });
+</script>
+<script type="text/x-mathjax-config">
+  MathJax.Hub.Config({
+    extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
+    jax: ["input/TeX","output/HTML-CSS"],
+});
+</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
+<!-- hack in the navigation tree -->
+<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
+<!-- google analytics -->
+<script>
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+  ga('create', 'UA-45382226-1', 'madlib.apache.org');
+  ga('send', 'pageview');
+</script>
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td id="projectlogo"><a href="http://madlib.apache.org"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">
+   <span id="projectnumber">1.13</span>
+   </div>
+   <div id="projectbrief">User Documentation for MADlib</div>
+  </td>
+   <td>        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+</td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.13 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
+</div><!-- top -->
+<div id="side-nav" class="ui-resizable side-nav-resizable">
+  <div id="nav-tree">
+    <div id="nav-tree-contents">
+      <div id="nav-sync" class="sync"></div>
+    </div>
+  </div>
+  <div id="splitbar" style="-moz-user-select:none;" 
+       class="ui-resizable-handle">
+  </div>
+</div>
+<script type="text/javascript">
+$(document).ready(function(){initNavTree('group__grp__sampling.html','');});
+</script>
+<div id="doc-content">
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+</div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+<div class="header">
+  <div class="summary">
+<a href="#groups">Modules</a>  </div>
+  <div class="headertitle">
+<div class="title">Sampling<div class="ingroups"><a class="el" href="group__grp__utility__functions.html">Utility Functions</a></div></div>  </div>
+</div><!--header-->
+<div class="contents">
+<a name="details" id="details"></a><h2 class="groupheader">Detailed Description</h2>
+<p>A collection of methods for sampling from a population. </p>
+<table class="memberdecls">
+<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="groups"></a>
+Modules</h2></td></tr>
+<tr class="memitem:group__grp__strs"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group__grp__strs.html">Stratified Sampling</a></td></tr>
+<tr class="memdesc:group__grp__strs"><td class="mdescLeft">&#160;</td><td class="mdescRight">A method for independently sampling subpopulations (strata). <br /></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+</table>
+</div><!-- contents -->
+</div><!-- doc-content -->
+<!-- start footer part -->
+<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
+  <ul>
+    <li class="footer">Generated on Wed Dec 27 2017 19:05:58 for MADlib by
+    <a href="http://www.doxygen.org/index.html">
+    <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.13 </li>
+  </ul>
+</div>
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/madlib-site/blob/6c103d3e/docs/v1.13/group__grp__sampling.js
----------------------------------------------------------------------
diff --git a/docs/v1.13/group__grp__sampling.js b/docs/v1.13/group__grp__sampling.js
new file mode 100644
index 0000000..0c0c508
--- /dev/null
+++ b/docs/v1.13/group__grp__sampling.js
@@ -0,0 +1,4 @@
+var group__grp__sampling =
+[
+    [ "Stratified Sampling", "group__grp__strs.html", null ]
+];
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/madlib-site/blob/6c103d3e/docs/v1.13/group__grp__sessionize.html
----------------------------------------------------------------------
diff --git a/docs/v1.13/group__grp__sessionize.html b/docs/v1.13/group__grp__sessionize.html
new file mode 100644
index 0000000..52f8e67
--- /dev/null
+++ b/docs/v1.13/group__grp__sessionize.html
@@ -0,0 +1,276 @@
+<!-- HTML header for doxygen 1.8.4-->
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.13"/>
+<meta name="keywords" content="madlib,postgres,greenplum,machine learning,data mining,deep learning,ensemble methods,data science,market basket analysis,affinity analysis,pca,lda,regression,elastic net,huber white,proportional hazards,k-means,latent dirichlet allocation,bayes,support vector machines,svm"/>
+<title>MADlib: Sessionize</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="navtree.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="resize.js"></script>
+<script type="text/javascript" src="navtreedata.js"></script>
+<script type="text/javascript" src="navtree.js"></script>
+<script type="text/javascript">
+  $(document).ready(initResizable);
+</script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/searchdata.js"></script>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+  $(document).ready(function() { init_search(); });
+</script>
+<script type="text/x-mathjax-config">
+  MathJax.Hub.Config({
+    extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
+    jax: ["input/TeX","output/HTML-CSS"],
+});
+</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
+<!-- hack in the navigation tree -->
+<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
+<!-- google analytics -->
+<script>
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+  ga('create', 'UA-45382226-1', 'madlib.apache.org');
+  ga('send', 'pageview');
+</script>
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td id="projectlogo"><a href="http://madlib.apache.org"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">
+   <span id="projectnumber">1.13</span>
+   </div>
+   <div id="projectbrief">User Documentation for MADlib</div>
+  </td>
+   <td>        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+</td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.13 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
+</div><!-- top -->
+<div id="side-nav" class="ui-resizable side-nav-resizable">
+  <div id="nav-tree">
+    <div id="nav-tree-contents">
+      <div id="nav-sync" class="sync"></div>
+    </div>
+  </div>
+  <div id="splitbar" style="-moz-user-select:none;" 
+       class="ui-resizable-handle">
+  </div>
+</div>
+<script type="text/javascript">
+$(document).ready(function(){initNavTree('group__grp__sessionize.html','');});
+</script>
+<div id="doc-content">
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+</div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+<div class="header">
+  <div class="headertitle">
+<div class="title">Sessionize<div class="ingroups"><a class="el" href="group__grp__utility__functions.html">Utility Functions</a></div></div>  </div>
+</div><!--header-->
+<div class="contents">
+<div class="toc"><b>Contents</b> <ul>
+<li>
+<a href="#syntax">Function Syntax</a> </li>
+<li>
+<a href="#examples">Examples</a> </li>
+<li>
+<a href="#literature">Literature</a> </li>
+</ul>
+</div><p>The MADlib sessionize function performs time-oriented session reconstruction on a data set comprising a sequence of events. A defined period of inactivity indicates the end of one session and beginning of the next session. Sessions can be useful in many domains including web analytics [1], network security, manufacturing, finance, and operational analytics.</p>
+<p><a class="anchor" id="syntax"></a></p><dl class="section user"><dt>Function Syntax</dt><dd><pre class="syntax">
+sessionize(
+   source_table,
+   output_table,
+   partition_expr,
+   time_stamp,
+   max_time,
+   output_cols,
+   create_view
+)
+</pre></dd></dl>
+<p><b>Arguments</b> </p><dl class="arglist">
+<dt>source_table </dt>
+<dd><p class="startdd">VARCHAR. Name of the source table that contains the data to be sessionized.</p>
+<p class="enddd"></p>
+</dd>
+<dt>output_table </dt>
+<dd><p class="startdd">VARCHAR. Name of the output view or table. (The parameter create_view described below defines whether the output is actually a view or a table.) In addition to the columns in the source_table, the output also contains a new column called session_id: </p><ul>
+<li>
+session_id=1,2,...n where n is the number of the session in the partition. </li>
+</ul>
+<p class="enddd"></p>
+</dd>
+<dt>partition_expr </dt>
+<dd><p class="startdd">VARCHAR. The 'partition_expr' is a single column or a list of comma-separated columns/expressions to divide all rows into groups, or partitions. Sessionization is applied across the rows that fall into the same partition. This parameter can be set to NULL or '' to indicate the sessionization operation is to be applied to the whole input table.</p>
+<p class="enddd"></p>
+</dd>
+<dt>time_stamp </dt>
+<dd><p class="startdd">VARCHAR. The time stamp column name that is used for sessionization calculation. Note that the time_stamp column will be sorted in ascending order before the session reconstruction is done within a partition.</p>
+<p class="enddd"></p>
+</dd>
+<dt>max_time </dt>
+<dd><p class="startdd">INTERVAL. Maximum delta time (i.e., time out) between subsequent events that define a session. If the elapsed time between subsequent events is longer than max_time, a new session is created.</p>
+<p class="enddd"><a class="anchor" id="note"></a></p><dl class="section note"><dt>Note</dt><dd>Note that max_time is of time type INTERVAL which is a PostgreSQL way of describing elapsed time. For more information on INTERVAL please refer to reference [2].</dd></dl>
+</dd>
+<dt>output_cols (optional) </dt>
+<dd><p class="startdd">VARCHAR. An optional comma separated list of columns to be written to the output_table. Must be a valid SELECT expression. This is set to '*' by default, which means all columns in the input table will be written to the output_table plus the session_id column. Note that this parameter could include a list containing the partition_expr or any other expressions of interest. E.g., '*, expr1, expr2, etc.' where this means output all columns from the input table plus the expressions listed plus the session_id column.</p>
+<p class="enddd"></p>
+</dd>
+<dt>create_view (optional) </dt>
+<dd>BOOLEAN default: TRUE. Determines whether to create a view or materialize the output as a table. If you only need session info once, creating a view could be significantly faster than materializing as a table. Please note that if you set create_view to NULL (allowed by PostgreSQL) it will get set to the default value of TRUE. </dd>
+</dl>
+<p><a class="anchor" id="examples"></a></p><dl class="section user"><dt>Examples</dt><dd></dd></dl>
+<p>The data set describes shopper behavior on a notional web site that sells beer and wine. A beacon fires an event to a log file when the shopper visits different pages on the site: landing page, beer selection page, wine selection page, and checkout. Each user is identified by a a user id, and every time a page is visited, the page and time stamp are logged.</p>
+<p>Create the data table:</p>
+<pre class="example">
+DROP TABLE IF EXISTS eventlog CASCADE;  -- Using CASCADE in case you are running through this example more than once (views used below)
+CREATE TABLE eventlog (event_timestamp TIMESTAMP,
+            user_id INT,
+            page TEXT,
+            revenue FLOAT);
+INSERT INTO eventlog VALUES
+('04/15/2015 02:19:00', 101331, 'CHECKOUT', 16),
+('04/15/2015 02:17:00', 202201, 'WINE', 0),
+('04/15/2015 03:18:00', 202201, 'BEER', 0),
+('04/15/2015 01:03:00', 100821, 'LANDING', 0),
+('04/15/2015 01:04:00', 100821, 'WINE', 0),
+('04/15/2015 01:05:00', 100821, 'CHECKOUT', 39),
+('04/15/2015 02:06:00', 100821, 'WINE', 0),
+('04/15/2015 02:09:00', 100821, 'WINE', 0),
+('04/15/2015 02:15:00', 101331, 'LANDING', 0),
+('04/15/2015 02:16:00', 101331, 'WINE', 0),
+('04/15/2015 02:17:00', 101331, 'HELP', 0),
+('04/15/2015 02:18:00', 101331, 'WINE', 0),
+('04/15/2015 02:29:00', 201881, 'LANDING', 0),
+('04/15/2015 02:30:00', 201881, 'BEER', 0),
+('04/15/2015 01:05:00', 202201, 'LANDING', 0),
+('04/15/2015 01:06:00', 202201, 'HELP', 0),
+('04/15/2015 01:09:00', 202201, 'LANDING', 0),
+('04/15/2015 02:15:00', 202201, 'WINE', 0),
+('04/15/2015 02:16:00', 202201, 'BEER', 0),
+('04/15/2015 03:19:00', 202201, 'WINE', 0),
+('04/15/2015 03:22:00', 202201, 'CHECKOUT', 21);
+</pre><p>Sessionize the table by each user_id: </p><pre class="example">
+ DROP VIEW IF EXISTS sessionize_output_view;
+ SELECT madlib.sessionize(
+     'eventlog',             -- Name of input table
+     'sessionize_output_view',   -- View to store sessionize results
+     'user_id',             -- Partition input table by user id
+     'event_timestamp',      -- Time column used to compute sessions
+     '0:30:0'                -- Use 30 minute time out to define sessions
+    );
+SELECT * FROM sessionize_output_view ORDER BY user_id, event_timestamp;
+</pre><p>Result: </p><pre class="result">
+   event_timestamp   | user_id |   page   | revenue | session_id 
+---------------------+---------+----------+---------+------------
+ 2015-04-15 01:03:00 |  100821 | LANDING  |       0 |          1
+ 2015-04-15 01:04:00 |  100821 | WINE     |       0 |          1
+ 2015-04-15 01:05:00 |  100821 | CHECKOUT |      39 |          1
+ 2015-04-15 02:06:00 |  100821 | WINE     |       0 |          2
+ 2015-04-15 02:09:00 |  100821 | WINE     |       0 |          2
+ 2015-04-15 02:15:00 |  101331 | LANDING  |       0 |          1
+ 2015-04-15 02:16:00 |  101331 | WINE     |       0 |          1
+ 2015-04-15 02:17:00 |  101331 | HELP     |       0 |          1
+ 2015-04-15 02:18:00 |  101331 | WINE     |       0 |          1
+ 2015-04-15 02:19:00 |  101331 | CHECKOUT |      16 |          1
+ 2015-04-15 02:29:00 |  201881 | LANDING  |       0 |          1
+ 2015-04-15 02:30:00 |  201881 | BEER     |       0 |          1
+ 2015-04-15 01:05:00 |  202201 | LANDING  |       0 |          1
+ 2015-04-15 01:06:00 |  202201 | HELP     |       0 |          1
+ 2015-04-15 01:09:00 |  202201 | LANDING  |       0 |          1
+ 2015-04-15 02:15:00 |  202201 | WINE     |       0 |          2
+ 2015-04-15 02:16:00 |  202201 | BEER     |       0 |          2
+ 2015-04-15 02:17:00 |  202201 | WINE     |       0 |          2
+ 2015-04-15 03:18:00 |  202201 | BEER     |       0 |          3
+ 2015-04-15 03:19:00 |  202201 | WINE     |       0 |          3
+ 2015-04-15 03:22:00 |  202201 | CHECKOUT |      21 |          3
+(21 rows)
+</pre><p>Now let's say we want to see 3 minute sessions by a group of users with a certain range of user IDs. To do this, we need to sessionize the table based on a partition expression. Also, we want to persist a table output with a reduced set of columns in the table. </p><pre class="example">
+ DROP TABLE IF EXISTS sessionize_output_table;
+ SELECT madlib.sessionize(
+     'eventlog',                    -- Name of input table
+     'sessionize_output_table',     -- Table to store sessionize results
+     'user_id &lt; 200000',            -- Partition input table by subset of users
+     'event_timestamp',             -- Order partitions in input table by time
+     '180',                         -- Use 180 second time out to define sessions (same as '0:03:0')
+     'event_timestamp, user_id, user_id &lt; 200000 AS "Department-A1"',    -- Select only user_id and event_timestamp columns, along with the session id as output
+     'f'                            -- create a table
+     );
+ SELECT * FROM sessionize_output_table WHERE "Department-A1"='TRUE' ORDER BY event_timestamp;
+</pre><p>Result showing 2 users and 3 total sessions across the group: </p><pre class="result">
+   event_timestamp   | user_id | Department-A1 | session_id 
+---------------------+---------+---------------+------------
+ 2015-04-15 01:03:00 |  100821 | t             |          1
+ 2015-04-15 01:04:00 |  100821 | t             |          1
+ 2015-04-15 01:05:00 |  100821 | t             |          1
+ 2015-04-15 02:06:00 |  100821 | t             |          2
+ 2015-04-15 02:09:00 |  100821 | t             |          2
+ 2015-04-15 02:15:00 |  101331 | t             |          3
+ 2015-04-15 02:16:00 |  101331 | t             |          3
+ 2015-04-15 02:17:00 |  101331 | t             |          3
+ 2015-04-15 02:18:00 |  101331 | t             |          3
+ 2015-04-15 02:19:00 |  101331 | t             |          3
+(10 rows)
+</pre><p><a class="anchor" id="literature"></a></p><dl class="section user"><dt>Literature</dt><dd></dd></dl>
+<p>NOTE: The following PostgreSQL link refers to documentation resources for the current PostgreSQL database version. Depending upon your database platform version, you may need to change "current" reference in the link to your database version.</p>
+<p>If your database platform uses the Greenplum Database (or related variants), please check with the project community and/or your database vendor to identify the PostgreSQL version it is based on.</p>
+<p>[1] Sesssions in web analytics <a href="https://en.wikipedia.org/wiki/Session_(web_analytics)">https://en.wikipedia.org/wiki/Session_(web_analytics)</a></p>
+<p>[2] PostgreSQL date/time types <a href="https://www.postgresql.org/docs/current/static/datatype-datetime.html">https://www.postgresql.org/docs/current/static/datatype-datetime.html</a> </p>
+</div><!-- contents -->
+</div><!-- doc-content -->
+<!-- start footer part -->
+<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
+  <ul>
+    <li class="footer">Generated on Wed Dec 27 2017 19:05:58 for MADlib by
+    <a href="http://www.doxygen.org/index.html">
+    <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.13 </li>
+  </ul>
+</div>
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/madlib-site/blob/6c103d3e/docs/v1.13/group__grp__sketches.html
----------------------------------------------------------------------
diff --git a/docs/v1.13/group__grp__sketches.html b/docs/v1.13/group__grp__sketches.html
new file mode 100644
index 0000000..5d4785e
--- /dev/null
+++ b/docs/v1.13/group__grp__sketches.html
@@ -0,0 +1,159 @@
+<!-- HTML header for doxygen 1.8.4-->
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.13"/>
+<meta name="keywords" content="madlib,postgres,greenplum,machine learning,data mining,deep learning,ensemble methods,data science,market basket analysis,affinity analysis,pca,lda,regression,elastic net,huber white,proportional hazards,k-means,latent dirichlet allocation,bayes,support vector machines,svm"/>
+<title>MADlib: Cardinality Estimators</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="navtree.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="resize.js"></script>
+<script type="text/javascript" src="navtreedata.js"></script>
+<script type="text/javascript" src="navtree.js"></script>
+<script type="text/javascript">
+  $(document).ready(initResizable);
+</script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/searchdata.js"></script>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+  $(document).ready(function() { init_search(); });
+</script>
+<script type="text/x-mathjax-config">
+  MathJax.Hub.Config({
+    extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
+    jax: ["input/TeX","output/HTML-CSS"],
+});
+</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
+<!-- hack in the navigation tree -->
+<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
+<!-- google analytics -->
+<script>
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+  ga('create', 'UA-45382226-1', 'madlib.apache.org');
+  ga('send', 'pageview');
+</script>
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td id="projectlogo"><a href="http://madlib.apache.org"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">
+   <span id="projectnumber">1.13</span>
+   </div>
+   <div id="projectbrief">User Documentation for MADlib</div>
+  </td>
+   <td>        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+</td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.13 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
+</div><!-- top -->
+<div id="side-nav" class="ui-resizable side-nav-resizable">
+  <div id="nav-tree">
+    <div id="nav-tree-contents">
+      <div id="nav-sync" class="sync"></div>
+    </div>
+  </div>
+  <div id="splitbar" style="-moz-user-select:none;" 
+       class="ui-resizable-handle">
+  </div>
+</div>
+<script type="text/javascript">
+$(document).ready(function(){initNavTree('group__grp__sketches.html','');});
+</script>
+<div id="doc-content">
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+</div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+<div class="header">
+  <div class="summary">
+<a href="#groups">Modules</a>  </div>
+  <div class="headertitle">
+<div class="title">Cardinality Estimators<div class="ingroups"><a class="el" href="group__grp__stats.html">Statistics</a> &raquo; <a class="el" href="group__grp__desc__stats.html">Descriptive Statistics</a></div></div>  </div>
+</div><!--header-->
+<div class="contents">
+<a name="details" id="details"></a><h2 class="groupheader">Detailed Description</h2>
+<p>Sketches (sometimes called "synopsis data structures") are small randomized in-memory data structures that capture statistical properties of a large set of values (e.g., a column of a table). Sketches can be formed in a single pass of the data, and used to approximate a variety of descriptive statistics.</p>
+<p>We implement sketches as SQL User-Defined Aggregates (UDAs). Because they are single-pass, small-space and parallelized, a single query can use many sketches to gather summary statistics on many columns of a table efficiently.</p>
+<p>This module currently implements user-defined aggregates based on three main sketch methods:</p><ul>
+<li><em>Count-Min (CM)</em> sketches, which can be used to approximate a number of descriptive statistics including<ul>
+<li><code>COUNT</code> of rows whose column value matches a given value in a set</li>
+<li><code>COUNT</code> of rows whose column value falls in a range (*)</li>
+<li>order statistics including <em>median</em> and <em>centiles</em> (*)</li>
+<li><em>histograms</em>: both <em>equi-width</em> and <em>equi-depth</em> (*)</li>
+</ul>
+</li>
+<li><em>Flajolet-Martin (FM)</em> sketches for approximating <code>COUNT(DISTINCT)</code>.</li>
+<li><em>Most Frequent Value (MFV)</em> sketches, which output the most frequently-occuring values in a column, along with their associated counts.</li>
+</ul>
+<p><em>Note:</em> Features marked with a star (*) only work for discrete types that can be cast to int8.</p>
+<p>The sketch methods consist of a number of SQL UDAs (user-defined aggregates) and UDFs (user-defined functions), to be used directly in SQL queries. </p>
+<table class="memberdecls">
+<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="groups"></a>
+Modules</h2></td></tr>
+<tr class="memitem:group__grp__countmin"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group__grp__countmin.html">CountMin (Cormode-Muthukrishnan)</a></td></tr>
+<tr class="memdesc:group__grp__countmin"><td class="mdescLeft">&#160;</td><td class="mdescRight">Implements Cormode-Mathukrishnan <em>CountMin</em> sketches on integer values as a user-defined aggregate. <br /></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:group__grp__fmsketch"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group__grp__fmsketch.html">FM (Flajolet-Martin)</a></td></tr>
+<tr class="memdesc:group__grp__fmsketch"><td class="mdescLeft">&#160;</td><td class="mdescRight">Implements Flajolet-Martin's distinct count estimation as a user-defined aggregate. <br /></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:group__grp__mfvsketch"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group__grp__mfvsketch.html">MFV (Most Frequent Values)</a></td></tr>
+<tr class="memdesc:group__grp__mfvsketch"><td class="mdescLeft">&#160;</td><td class="mdescRight">Implements the most frequent values variant of the CountMin sketch as a user-defined aggregate. <br /></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+</table>
+</div><!-- contents -->
+</div><!-- doc-content -->
+<!-- start footer part -->
+<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
+  <ul>
+    <li class="footer">Generated on Wed Dec 27 2017 19:05:57 for MADlib by
+    <a href="http://www.doxygen.org/index.html">
+    <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.13 </li>
+  </ul>
+</div>
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/madlib-site/blob/6c103d3e/docs/v1.13/group__grp__sketches.js
----------------------------------------------------------------------
diff --git a/docs/v1.13/group__grp__sketches.js b/docs/v1.13/group__grp__sketches.js
new file mode 100644
index 0000000..1e443dd
--- /dev/null
+++ b/docs/v1.13/group__grp__sketches.js
@@ -0,0 +1,6 @@
+var group__grp__sketches =
+[
+    [ "CountMin (Cormode-Muthukrishnan)", "group__grp__countmin.html", null ],
+    [ "FM (Flajolet-Martin)", "group__grp__fmsketch.html", null ],
+    [ "MFV (Most Frequent Values)", "group__grp__mfvsketch.html", null ]
+];
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/madlib-site/blob/6c103d3e/docs/v1.13/group__grp__sparse__linear__solver.html
----------------------------------------------------------------------
diff --git a/docs/v1.13/group__grp__sparse__linear__solver.html b/docs/v1.13/group__grp__sparse__linear__solver.html
new file mode 100644
index 0000000..2ea9333
--- /dev/null
+++ b/docs/v1.13/group__grp__sparse__linear__solver.html
@@ -0,0 +1,354 @@
+<!-- HTML header for doxygen 1.8.4-->
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.13"/>
+<meta name="keywords" content="madlib,postgres,greenplum,machine learning,data mining,deep learning,ensemble methods,data science,market basket analysis,affinity analysis,pca,lda,regression,elastic net,huber white,proportional hazards,k-means,latent dirichlet allocation,bayes,support vector machines,svm"/>
+<title>MADlib: Sparse Linear Systems</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="navtree.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="resize.js"></script>
+<script type="text/javascript" src="navtreedata.js"></script>
+<script type="text/javascript" src="navtree.js"></script>
+<script type="text/javascript">
+  $(document).ready(initResizable);
+</script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/searchdata.js"></script>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+  $(document).ready(function() { init_search(); });
+</script>
+<script type="text/x-mathjax-config">
+  MathJax.Hub.Config({
+    extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
+    jax: ["input/TeX","output/HTML-CSS"],
+});
+</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
+<!-- hack in the navigation tree -->
+<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
+<!-- google analytics -->
+<script>
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+  ga('create', 'UA-45382226-1', 'madlib.apache.org');
+  ga('send', 'pageview');
+</script>
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td id="projectlogo"><a href="http://madlib.apache.org"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">
+   <span id="projectnumber">1.13</span>
+   </div>
+   <div id="projectbrief">User Documentation for MADlib</div>
+  </td>
+   <td>        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+</td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.13 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
+</div><!-- top -->
+<div id="side-nav" class="ui-resizable side-nav-resizable">
+  <div id="nav-tree">
+    <div id="nav-tree-contents">
+      <div id="nav-sync" class="sync"></div>
+    </div>
+  </div>
+  <div id="splitbar" style="-moz-user-select:none;" 
+       class="ui-resizable-handle">
+  </div>
+</div>
+<script type="text/javascript">
+$(document).ready(function(){initNavTree('group__grp__sparse__linear__solver.html','');});
+</script>
+<div id="doc-content">
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+</div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+<div class="header">
+  <div class="headertitle">
+<div class="title">Sparse Linear Systems<div class="ingroups"><a class="el" href="group__grp__utility__functions.html">Utility Functions</a> &raquo; <a class="el" href="group__grp__linear__solver.html">Linear Solvers</a></div></div>  </div>
+</div><!--header-->
+<div class="contents">
+<div class="toc"><b>Contents</b> <ul>
+<li class="level1">
+<a href="#sls_usage">Solution Function</a> </li>
+<li class="level1">
+<a href="#sls_opt_params">Optimizer Parameters</a> </li>
+<li class="level1">
+<a href="#sls_output">Output Tables</a> </li>
+<li class="level1">
+<a href="#sls_examples">Examples</a> </li>
+<li>
+<a href="related">Related Topics</a> </li>
+</ul>
+</div><p>The sparse linear systems module implements solution methods for systems of consistent linear equations. Systems of linear equations take the form: </p><p class="formulaDsp">
+\[ Ax = b \]
+</p>
+<p>where \(x \in \mathbb{R}^{n}\), \(A \in \mathbb{R}^{m \times n} \) and \(b \in \mathbb{R}^{m}\). This module accepts sparse matrix input formats for \(A\) and \(b\). We assume that there are no rows of \(A\) where all elements are zero.</p>
+<dl class="section note"><dt>Note</dt><dd>Algorithms with fail if there is an row of the input matrix containing all zeros.</dd></dl>
+<p>The algorithms implemented in this module can handle large sparse square linear systems. Currently, the algorithms implemented in this module solve the linear system using direct or iterative methods.</p>
+<p><a class="anchor" id="sls_usage"></a></p><dl class="section user"><dt>Sparse Linear Systems Solution Function</dt><dd></dd></dl>
+<pre class="syntax">
+linear_solver_sparse( tbl_source_lhs,
+                      tbl_source_rhs,
+                      tbl_result,
+                      lhs_row_id,
+                      lhs_col_id,
+                      lhs_value,
+                      rhs_row_id,
+                      rhs_value,
+                      grouping_cols := NULL,
+                      optimizer := 'direct',
+                      optimizer_params :=
+                      'algorithm = llt'
+                    )
+</pre><p> <b>Arguments</b> </p><dl class="arglist">
+<dt>tbl_source_lhs </dt>
+<dd><p class="startdd">The name of the table containing the left hand side matrix. For the LHS matrix, the input data is expected to be of the following form: </p><pre>
+{TABLE|VIEW} <em>sourceName</em> (
+    ...
+    <em>row_id</em> FLOAT8,
+    <em>col_id</em> FLOAT8,
+    <em>value</em> FLOAT8,
+    ...
+)</pre><p> Each row represents a single equation. The <em>rhs</em> columns refer to the right hand side of the equations and the <em>lhs</em> columns refer to the multipliers on the variables on the left hand side of the same equations. </p>
+<p class="enddd"></p>
+</dd>
+<dt>tbl_source_rhs </dt>
+<dd><p class="startdd">TEXT. The name of the table containing the right hand side vector. For the RHS matrix, the input data is expected to be of the following form: </p><pre class="fragment">{TABLE|VIEW} &lt;em&gt;sourceName&lt;/em&gt; (
+    ...
+    &lt;em&gt;row_id&lt;/em&gt; FLOAT8,
+    &lt;em&gt;value&lt;/em&gt; FLOAT8
+    ...
+)</pre><p> Each row represents a single equation. The <em>rhs</em> columns refer to the right hand side of the equations while the <em>lhs</em> columns refers to the multipliers on the variables on the left hand side of the same equations. </p>
+<p class="enddd"></p>
+</dd>
+<dt>tbl_result </dt>
+<dd><p class="startdd">TEXT. The name of the table where the output is saved. Output is stored in the tabled named by the <em>tbl_result</em> argument. The table contains the following columns. The output contains the following columns: </p><table class="output">
+<tr>
+<th>solution </th><td>FLOAT8[]. The solution is an array with the variables in the same order as that provided as input in the 'left_hand_side' column name of the 'source_table'   </td></tr>
+<tr>
+<th>residual_norm </th><td>FLOAT8. Scaled residual norm, defined as \( \frac{|Ax - b|}{|b|} \). This value is an indication of the accuracy of the solution.   </td></tr>
+<tr>
+<th>iters </th><td>INTEGER. Number of iterations required by the algorithm (only applicable for iterative algorithms) . The output is NULL for 'direct' methods.   </td></tr>
+</table>
+<p class="enddd"></p>
+</dd>
+<dt>lhs_row_id </dt>
+<dd>TEXT. The name of the column storing the 'row id' of the equations. <dl class="section note"><dt>Note</dt><dd>For a system with N equations, the row_id's must be a continuous range of integers from \( 0 \ldots n-1 \).</dd></dl>
+</dd>
+<dt>lhs_col_id </dt>
+<dd><p class="startdd">TEXT. The name of the column (in tbl_source_lhs) storing the 'col id' of the equations.</p>
+<p class="enddd"></p>
+</dd>
+<dt>lhs_value </dt>
+<dd><p class="startdd">TEXT. The name of the column (in tbl_source_lhs) storing the 'value' of the equations.</p>
+<p class="enddd"></p>
+</dd>
+<dt>rhs_row_id </dt>
+<dd><p class="startdd">TEXT. The name of the column (in tbl_source_rhs) storing the 'col id' of the equations.</p>
+<p class="enddd"></p>
+</dd>
+<dt>rhs_value </dt>
+<dd><p class="startdd">TEXT. The name of the column (in tbl_source_rhs) storing the 'value' of the equations.</p>
+<p class="enddd"></p>
+</dd>
+<dt>num_vars </dt>
+<dd><p class="startdd">INTEGER. The number of variables in the linear system equations.</p>
+<p class="enddd"></p>
+</dd>
+<dt>grouping_col (optional)  </dt>
+<dd>TEXT, default: NULL. Group by column names. <dl class="section note"><dt>Note</dt><dd>The grouping feature is currently not implemented and this parameter is only a placeholder.</dd></dl>
+</dd>
+<dt>optimizer (optional)  </dt>
+<dd><p class="startdd">TEXT, default: 'direct'. Type of optimizer.</p>
+<p class="enddd"></p>
+</dd>
+<dt>optimizer_params (optional) </dt>
+<dd>TEXT, default: NULL. Optimizer specific parameters. </dd>
+</dl>
+<p><a class="anchor" id="sls_opt_params"></a></p><dl class="section user"><dt>Optimizer Parameters</dt><dd></dd></dl>
+<p>For each optimizer, there are specific parameters that can be tuned for better performance.</p>
+<dl class="arglist">
+<dt>algorithm (default: ldlt) </dt>
+<dd><p class="startdd"></p>
+<p>There are several algorithms that can be classified as 'direct' methods of solving linear systems. Madlib functions provide various algorithmic options available for users.</p>
+<p>The following table provides a guideline on the choice of algorithm based on conditions on the A matrix, speed of the algorithms and numerical stability.</p>
+<pre class="fragment">    Algorithm          | Conditions on A  | Speed | Memory
+    ----------------------------------------------------------
+    llt                | Sym. Pos Def     |  ++   |  ---
+    ldlt               | Sym. Pos Def     |  ++   |  ---
+
+    For speed '++' is faster than '+', which is faster than '-'.
+    For accuracy '+++' is better than '++'.
+    For memory, '-' uses less memory than '--'.
+
+    Note: ldlt is often preferred over llt
+</pre><p>There are several algorithms that can be classified as 'iterative' methods of solving linear systems. Madlib functions provide various algorithmic options available for users.</p>
+<p>The following table provides a guideline on the choice of algorithm based on conditions on the A matrix, speed of the algorithms and numerical stability.</p>
+<pre class="fragment">    Algorithm            | Conditions on A  | Speed | Memory | Convergence
+    ----------------------------------------------------------------------
+    cg-mem               | Sym. Pos Def     |  +++  |   -    |    ++
+    bicgstab-mem         | Square           |  ++   |   -    |    +
+    precond-cg-mem       | Sym. Pos Def     |  ++   |   -    |    +++
+    precond-bicgstab-mem | Square           |  +    |   -    |    ++
+
+    For memory, '-' uses less memory than '--'.
+    For speed, '++' is faster than '+'.
+</pre><p>Algorithm Details: </p><table class="output">
+<tr>
+<th>cg-mem</th><td>In memory conjugate gradient with diagonal preconditioners.  </td></tr>
+<tr>
+<th>bicgstab-mem</th><td>Bi-conjugate gradient (equivalent to performing CG on the least squares formulation of Ax=b) with incomplete LU preconditioners.  </td></tr>
+<tr>
+<th>precond-cg-mem</th><td>In memory conjugate gradient with diagonal preconditioners.  </td></tr>
+<tr>
+<th>bicgstab-mem</th><td>Bi-conjugate gradient (equivalent to performing CG on the least squares formulation of Ax=b) with incomplete LU preconditioners.  </td></tr>
+</table>
+<p class="enddd"></p>
+</dd>
+<dt>toler (default: 1e-5) </dt>
+<dd><p class="startdd">Termination tolerance (applicable only for iterative methods) which determines the stopping criterion (with respect to residual norm) for iterative methods. </p>
+<p class="enddd"></p>
+</dd>
+</dl>
+<p><a class="anchor" id="sls_examples"></a></p><dl class="section user"><dt>Examples</dt><dd></dd></dl>
+<ol type="1">
+<li>View online help for the sparse linear systems solver function. <pre class="example">
+SELECT madlib.linear_solver_sparse();
+</pre></li>
+<li>Create the sample data set. <pre class="example">
+DROP TABLE IF EXISTS sparse_linear_systems_lhs;
+CREATE TABLE sparse_linear_systems_lhs (
+    rid INTEGER NOT NULL,
+    cid  INTEGER,
+    val DOUBLE PRECISION
+);
+DROP TABLE IF EXISTS sparse_linear_systems_rhs;
+CREATE TABLE sparse_linear_systems_rhs (
+    rid INTEGER NOT NULL,
+    val DOUBLE PRECISION
+);
+INSERT INTO sparse_linear_systems_lhs(rid, cid, val) VALUES
+(0, 0, 1),
+(1, 1, 1),
+(2, 2, 1),
+(3, 3, 1);
+INSERT INTO sparse_linear_systems_rhs(rid, val) VALUES
+(0, 10),
+(1, 20),
+(2, 30);
+</pre></li>
+<li>Solve the linear systems with default parameters. <pre class="example">
+SELECT madlib.linear_solver_sparse( 'sparse_linear_systems_lhs',
+                                    'sparse_linear_systems_rhs',
+                                    'output_table',
+                                    'rid',
+                                    'cid',
+                                    'val',
+                                    'rid',
+                                    'val',
+                                    4
+                                  );
+</pre></li>
+<li>View the contents of the output table. <pre class="example">
+\x on
+SELECT * FROM output_table;
+</pre> Result: <pre class="result">
+--------------------+-------------------------------------
+solution            | {10,20,30,0}
+residual_norm       | 0
+iters               | NULL
+</pre></li>
+<li>Choose a different algorithm than the default algorithm. <pre class="example">
+DROP TABLE IF EXISTS output_table;
+SELECT madlib.linear_solver_sparse( 'sparse_linear_systems_lhs',
+                                    'sparse_linear_systems_rhs',
+                                    'output_table',
+                                    'rid',
+                                    'cid',
+                                    'val',
+                                    'rid',
+                                    'val',
+                                    4,
+                                    NULL,
+                                    'direct',
+                                    'algorithm=llt'
+                                  );
+</pre></li>
+<li>Choose a different algorithm than the default algorithm. <pre class="example">
+DROP TABLE IF EXISTS output_table;
+SELECT madlib.linear_solver_sparse(
+                                    'sparse_linear_systems_lhs',
+                                    'sparse_linear_systems_rhs',
+                                    'output_table',
+                                    'rid',
+                                    'cid',
+                                    'val',
+                                    'rid',
+                                    'val',
+                                    4,
+                                    NULL,
+                                    'iterative',
+                                    'algorithm=cg-mem, toler=1e-5'
+                                  );
+</pre></li>
+</ol>
+<p><a class="anchor" id="related"></a></p><dl class="section user"><dt>Related Topics</dt><dd>File sparse_linear_sytems.sql_in documenting the SQL functions.</dd></dl>
+</div><!-- contents -->
+</div><!-- doc-content -->
+<!-- start footer part -->
+<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
+  <ul>
+    <li class="footer">Generated on Wed Dec 27 2017 19:05:57 for MADlib by
+    <a href="http://www.doxygen.org/index.html">
+    <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.13 </li>
+  </ul>
+</div>
+</body>
+</html>