You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by ok...@apache.org on 2017/12/28 22:52:04 UTC

[30/51] [abbrv] [partial] madlib-site git commit: Additional updates for 1.13 release

http://git-wip-us.apache.org/repos/asf/madlib-site/blob/6c103d3e/docs/v1.13/group__grp__decision__tree.html
----------------------------------------------------------------------
diff --git a/docs/v1.13/group__grp__decision__tree.html b/docs/v1.13/group__grp__decision__tree.html
new file mode 100644
index 0000000..4ddb055
--- /dev/null
+++ b/docs/v1.13/group__grp__decision__tree.html
@@ -0,0 +1,833 @@
+<!-- HTML header for doxygen 1.8.4-->
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.13"/>
+<meta name="keywords" content="madlib,postgres,greenplum,machine learning,data mining,deep learning,ensemble methods,data science,market basket analysis,affinity analysis,pca,lda,regression,elastic net,huber white,proportional hazards,k-means,latent dirichlet allocation,bayes,support vector machines,svm"/>
+<title>MADlib: Decision Tree</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="navtree.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="resize.js"></script>
+<script type="text/javascript" src="navtreedata.js"></script>
+<script type="text/javascript" src="navtree.js"></script>
+<script type="text/javascript">
+  $(document).ready(initResizable);
+</script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/searchdata.js"></script>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+  $(document).ready(function() { init_search(); });
+</script>
+<script type="text/x-mathjax-config">
+  MathJax.Hub.Config({
+    extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
+    jax: ["input/TeX","output/HTML-CSS"],
+});
+</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
+<!-- hack in the navigation tree -->
+<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
+<!-- google analytics -->
+<script>
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+  ga('create', 'UA-45382226-1', 'madlib.apache.org');
+  ga('send', 'pageview');
+</script>
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td id="projectlogo"><a href="http://madlib.apache.org"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">
+   <span id="projectnumber">1.13</span>
+   </div>
+   <div id="projectbrief">User Documentation for MADlib</div>
+  </td>
+   <td>        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+</td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.13 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
+</div><!-- top -->
+<div id="side-nav" class="ui-resizable side-nav-resizable">
+  <div id="nav-tree">
+    <div id="nav-tree-contents">
+      <div id="nav-sync" class="sync"></div>
+    </div>
+  </div>
+  <div id="splitbar" style="-moz-user-select:none;" 
+       class="ui-resizable-handle">
+  </div>
+</div>
+<script type="text/javascript">
+$(document).ready(function(){initNavTree('group__grp__decision__tree.html','');});
+</script>
+<div id="doc-content">
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+</div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+<div class="header">
+  <div class="headertitle">
+<div class="title">Decision Tree<div class="ingroups"><a class="el" href="group__grp__super.html">Supervised Learning</a> &raquo; <a class="el" href="group__grp__tree.html">Tree Methods</a></div></div>  </div>
+</div><!--header-->
+<div class="contents">
+<div class="toc"><b>Contents</b><ul>
+<li class="level1">
+<a href="#train">Training Function</a> </li>
+<li class="level1">
+<a href="#predict">Prediction Function</a> </li>
+<li class="level1">
+<a href="#display">Display Function</a> </li>
+<li class="level1">
+<a href="#examples">Examples</a> </li>
+<li class="level1">
+<a href="#related">Related Topics</a> </li>
+</ul>
+</div><p>A decision tree is a supervised learning method that can be used for classification and regression. It consists of a structure in which internal nodes represent tests on attributes, and the branches from nodes represent the result of those tests. Each leaf node is a class label and the paths from root to leaf nodes define the set of classification or regression rules.</p>
+<p><a class="anchor" id="train"></a></p><dl class="section user"><dt>Training Function</dt><dd>We implement the decision tree using the CART algorithm, introduced by Breiman et al. [1]. The training function has the following syntax: <pre class="syntax">
+tree_train(
+    training_table_name,
+    output_table_name,
+    id_col_name,
+    dependent_variable,
+    list_of_features,
+    list_of_features_to_exclude,
+    split_criterion,
+    grouping_cols,
+    weights,
+    max_depth,
+    min_split,
+    min_bucket,
+    num_splits,
+    pruning_params,
+    null_handling_params,
+    verbosity
+    )
+</pre> <b>Arguments</b> <dl class="arglist">
+<dt>training_table_name </dt>
+<dd><p class="startdd">TEXT. The name of the table containing the training data.</p>
+<p class="enddd"></p>
+</dd>
+<dt>output_table_name </dt>
+<dd><p class="startdd">TEXT. The name of the generated table containing the model. If a table with the same name already exists, then the function will return an error. A summary table named <em>&lt;output_table_name&gt;_summary</em> is also created at the same time. </p>
+<p class="enddd"></p>
+</dd>
+<dt>id_col_name </dt>
+<dd><p class="startdd">TEXT. Name of the column containing id information in the training data. This is a mandatory argument and is used for prediction and cross-validation. The values are expected to be unique for each row. </p>
+<p class="enddd"></p>
+</dd>
+<dt>dependent_variable </dt>
+<dd><p class="startdd">TEXT. Name of the column that contains the output (response) for training. Boolean, integer and text types are considered classification outputs, while double precision values are considered regression outputs. The response variable for a classification tree can be multinomial, but the time and space complexity of the training function increases linearly as the number of response classes increases.</p>
+<p class="enddd"></p>
+</dd>
+<dt>list_of_features </dt>
+<dd><p class="startdd">TEXT. Comma-separated string of column names or expressions to use as predictors. Can also be a '*' implying all columns are to be used as predictors (except for the ones included in the next argument that lists exclusions). The types of the features can be mixed - boolean, integer, and text columns are considered categorical and double precision columns are considered continuous. Categorical variables are not encoded and used as is for the training.</p>
+<p>Array columns can also be included in the list, where the array is expanded to treat each element of the array as a feature.</p>
+<p>It is important to note that not every combination of the levels of a categorical variable is checked when evaluating a split. The levels of the non-integer categorical variable are ordered by the entropy of the variable in predicting the response. The split at each node is evaluated between these ordered levels. Integer categorical variables, however, are simply ordered by their value. </p>
+<p class="enddd"></p>
+</dd>
+<dt>list_of_features_to_exclude </dt>
+<dd><p class="startdd">TEXT. Comma-separated string of column names to exclude from the predictors list. If the <em>dependent_variable</em> is an expression (including cast of a column name), then this list should include the columns present in the <em>dependent_variable</em> expression, otherwise those columns will be included in the features. The names in this parameter should be identical to the names used in the table and quoted appropriately. </p>
+<p class="enddd"></p>
+</dd>
+<dt>split_criterion </dt>
+<dd><p class="startdd">TEXT, default = 'gini' for classification, 'mse' for regression. Impurity function to compute the feature to use for the split. Supported criteria are 'gini', 'entropy', 'misclassification' for classification trees. For regression trees, split_criterion of 'mse' is always used (irrespective of the input for this argument). </p>
+<p class="enddd"></p>
+</dd>
+<dt>grouping_cols (optional) </dt>
+<dd><p class="startdd">TEXT, default: NULL. Comma-separated list of column names to group the data by. This will result in multiple decision trees, one for each group. </p>
+<p class="enddd"></p>
+</dd>
+<dt>weights (optional) </dt>
+<dd><p class="startdd">TEXT. Column name containing numerical weights for each observation. This can be used to handle the case of unbalanced data sets. If this parameter is not set, all observations (tuples) are treated equally with a weight of 1.0.</p>
+<p class="enddd"></p>
+</dd>
+<dt>max_depth (optional) </dt>
+<dd><p class="startdd">INTEGER, default: 7. Maximum depth of any node of the final tree, with the root node counted as depth 0. A deeper tree can lead to better prediction but will also result in longer processing time and higher memory usage.</p>
+<p class="enddd"></p>
+</dd>
+<dt>min_split (optional) </dt>
+<dd><p class="startdd">INTEGER, default: 20. Minimum number of observations that must exist in a node for a split to be attempted. The best value for this parameter depends on the number of tuples in the dataset.</p>
+<p class="enddd"></p>
+</dd>
+<dt>min_bucket (optional) </dt>
+<dd><p class="startdd">INTEGER, default: min_split/3. Minimum number of observations in any terminal node. If only one of min_bucket or min_split is specified, min_split is set to min_bucket*3 or min_bucket to min_split/3, as appropriate.</p>
+<p class="enddd"></p>
+</dd>
+<dt>num_splits (optional) </dt>
+<dd><p class="startdd">INTEGER, default: 20. Continuous-valued features are binned into discrete quantiles to compute split boundaries. This global parameter is used to compute the resolution of splits for continuous features. Higher number of bins will lead to better prediction, but will also result in longer processing time and higher memory usage.</p>
+<p class="enddd"></p>
+</dd>
+<dt>pruning_params (optional) </dt>
+<dd><p class="startdd">TEXT. Comma-separated string of key-value pairs giving the parameters for pruning the tree. The parameters currently accepted are: </p><table class="output">
+<tr>
+<th>cp </th><td><p class="starttd">Default: 0. A split on a node is attempted only if it decreases the overall lack of fit by a factor of 'cp', else the split is pruned away. This value is used to create an initial tree before running cross-validation (see below).</p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>n_folds </th><td><p class="starttd">Default: 0 (i.e. no cross-validation). Number of cross-validation folds to use to compute the best value of <em>cp</em>. To perform cross-validation, a positive value of <em>n_folds</em> (greater than 2) should be given. An additional output table <em>&lt;model_table&gt;_cv</em> is created containing the values of evaluated <em>cp</em> and the cross-validation error. The tree returned in the output table corresponds to the <em>cp</em> with the lowest cross-validation error (we pick the maximum <em>cp</em> if multiple values have same error).</p>
+<p>The list of <em>cp</em> values is automatically computed by parsing through the tree initially trained on the complete dataset. The tree output is a subset of this initial tree corresponding to the best computed <em>cp</em>.</p>
+<p class="endtd"></p>
+</td></tr>
+</table>
+<p class="enddd"></p>
+</dd>
+<dt>null_handling_params (optional) </dt>
+<dd><p class="startdd">TEXT. Comma-separated string of key-value pairs controlling the behavior of various features handling missing values. </p><table class="output">
+<tr>
+<th>max_surrogates </th><td>Default: 0. Number of surrogates to store for each node. One of the approaches of handling NULLs is to use surrogate splits for each node. A surrogate variable is another predictor variable that is associated (correlated) with the primary split variable. The surrogate variable comes into use when the primary predictior value is NULL.  </td></tr>
+<tr>
+<th>null_as_category </th><td><p class="starttd">Default: FALSE. Whether to treat NULL as a special categorical value.</p>
+<p class="endtd">If this is set to TRUE, NULL values are considered a categorical value and placed at the end of the ordering of categorical levels. Placing it at the end ensures that NULL is never used as a value to split a node on. This parameter is ignored for continuous-valued features.   </p>
+</td></tr>
+</table>
+<p class="enddd"></p>
+</dd>
+<dt>verbosity (optional) </dt>
+<dd>BOOLEAN, default: FALSE. Provides verbose output of the training result. </dd>
+</dl>
+</dd></dl>
+<p><b>Output</b> </p><dl class="arglist">
+</dl>
+<p>The model table produced by the training function contains the following columns:</p>
+<table class="output">
+<tr>
+<th>&lt;...&gt; </th><td>Grouping columns, if provided as input, in the same types as the training table. This could be multiple columns depending on the <code>grouping_cols</code> input.  </td></tr>
+<tr>
+<th>tree </th><td>BYTEA8. Trained decision tree model stored in a binary format.  </td></tr>
+<tr>
+<th>cat_levels_in_text </th><td>TEXT[]. Ordered levels of categorical variables.  </td></tr>
+<tr>
+<th>cat_n_levels </th><td><p class="starttd">INTEGER[]. Number of levels for each categorical variable. </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>tree_depth </th><td><p class="starttd">INTEGER. The maximum depth the tree obtained after training (root has depth 0). </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>pruning_cp </th><td><p class="starttd">DOUBLE PRECISION. The cost-complexity parameter used for pruning the trained tree(s). This would be different from the 'input_cp' value if cross-validation is used.  </p>
+<p class="endtd"></p>
+</td></tr>
+</table>
+<p>A summary table named <em>&lt;output_table_name&gt;_summary</em> is also created at the same time, which has the following columns: </p><table class="output">
+<tr>
+<th>method </th><td><p class="starttd">TEXT. 'tree_train' </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>is_classification </th><td><p class="starttd">BOOLEAN. TRUE if the decision trees are for classification, FALSE if for regression. </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>source_table </th><td><p class="starttd">TEXT. The data source table name. </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>model_table </th><td><p class="starttd">TEXT. The model table name. </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>id_col_name </th><td><p class="starttd">TEXT. The ID column name. </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>dependent_varname </th><td><p class="starttd">TEXT. The dependent variable. </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>independent_varname </th><td><p class="starttd">TEXT. The independent variables. </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>cat_features </th><td><p class="starttd">TEXT. The list of categorical feature names as a comma-separated string. </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>con_features </th><td><p class="starttd">TEXT. The list of continuous feature names as a comma-separated string. </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>grouping_col </th><td><p class="starttd">TEXT. Names of grouping columns. </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>num_all_groups </th><td><p class="starttd">INTEGER. Number of groups in decision tree training. </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>num_failed_groups </th><td><p class="starttd">INTEGER. Number of failed groups in decision tree training. </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>total_rows_processed </th><td><p class="starttd">BIGINT. Total numbers of rows processed in all groups. </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>total_rows_skipped </th><td><p class="starttd">BIGINT. Total numbers of rows skipped in all groups due to missing values or failures. </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>dependent_var_levels </th><td><p class="starttd">TEXT. For classification, the distinct levels of the dependent variable. </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>dependent_var_type </th><td><p class="starttd">TEXT. The type of dependent variable. </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>input_cp </th><td><p class="starttd">DOUBLE PRECISION. The complexity parameter (cp) used for pruning the trained tree(s) before cross-validation is run. This is same as the cp value input using the <em>pruning_params</em>. </p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th>independent_var_types </th><td>TEXT. A comma separated string for the types of independent variables.  </td></tr>
+</table>
+<dl class="section note"><dt>Note</dt><dd><ul>
+<li>Many of the parameters are designed to be similar to the popular R package 'rpart'. An important distinction between rpart and the MADlib function is that for both response and feature variables, MADlib considers integer values as categorical values, while rpart considers them as continuous. To use integers as continuous, cast them to double precision.</li>
+<li>Integer values are ordered by value for computing the split boundaries. Cast to TEXT if the entropy-based ordering method is desired.</li>
+<li>When using no surrogates (<em>max_surrogates</em>=0), all rows containing NULL values for any of the features used for training will be ignored from training and prediction.</li>
+<li>When cross-validation is not used (<em>n_folds</em>=0), each tree output is pruned by the input cost-complextity (<em>cp</em>). With cross-validation, the input <em>cp</em> is the minimum value of all the explored values of 'cp'. During cross-validation, we train an initial tree using the provided <em>cp</em> and explore all possible sub-trees (up to a single-node tree) to compute the optimal sub-tree. The optimal sub-tree and the 'cp' corresponding to this optimal sub-tree is placed in the <em>output_table</em>, with the columns named as <em>tree</em> and <em>pruning_cp</em> respectively.</li>
+<li>The main parameters that affect memory usage are: depth of tree (‘max_depth’), number of features, number of values per categorical feature, and number of bins for continuous features (‘num_splits’). If you are hitting memory limits, consider reducing one or more of these parameters.</li>
+</ul>
+</dd></dl>
+<p><a class="anchor" id="predict"></a></p><dl class="section user"><dt>Prediction Function</dt><dd>The prediction function estimates the conditional mean given a new predictor. It has the following syntax: <pre class="syntax">
+tree_predict(tree_model,
+             new_data_table,
+             output_table,
+             type)
+</pre></dd></dl>
+<p><b>Arguments</b> </p><dl class="arglist">
+<dt>tree_model </dt>
+<dd><p class="startdd">TEXT. Name of the table containing the decision tree model. This should be the output table returned from <em>tree_train.</em></p>
+<p class="enddd"></p>
+</dd>
+<dt>new_data_table </dt>
+<dd><p class="startdd">TEXT. Name of the table containing prediction data. This table is expected to contain the same features that were used during training. The table should also contain <em>id_col_name</em> used for identifying each row.</p>
+<p class="enddd"></p>
+</dd>
+<dt>output_table </dt>
+<dd><p class="startdd">TEXT. Name of the table to output prediction results. If this table already exists, an error is returned. The table contains the <em>id_col_name</em> column giving the 'id' for each prediction and the prediction columns for the dependent variable.</p>
+<p>If <em>type</em> = 'response', then the table has a single additional column with the prediction value of the response. The type of this column depends on the type of the response variable used during training.</p>
+<p>If <em>type</em> = 'prob', then the table has multiple additional columns, one for each possible value of the response variable. The columns are labeled as 'estimated_prob_<em>dep_value</em>', where <em>dep_value</em> represents each value of the response variable.</p>
+<p class="enddd"></p>
+</dd>
+<dt>type </dt>
+<dd>TEXT, optional, default: 'response'. For regression trees, the output is always the predicted value of the dependent variable. For classification trees, the <em>type</em> variable can be 'response', giving the classification prediction as output, or 'prob', giving the class probabilities as output. For each value of the dependent variable, a column with the probabilities is added to the output table.  </dd>
+</dl>
+<dl class="section note"><dt>Note</dt><dd>If the <em>new_data_table</em> contains categories of categorical variables not seen in the training data, the prediction for that row will be NULL.</dd></dl>
+<p><a class="anchor" id="display"></a></p><dl class="section user"><dt>Display Function</dt><dd>The display function outputs a graph representation of the decision tree. The output can either be in the popular 'dot' format that can be visualized using various programs including those in the GraphViz package, or in a simple text format. The details of the text format are output with the tree. <pre class="syntax">
+tree_display(tree_model, dot_format, verbosity)
+</pre></dd></dl>
+<p>An additional display function is provided to output the surrogate splits chosen for each internal node: </p><pre class="syntax">
+tree_surr_display(tree_model)
+</pre><p>The output contains the list of surrogate splits for each internal node. The nodes are sorted in ascending order by id. This is equivalent to viewing the tree in a breadth-first manner. For each surrogate, we output the surrogate split (variable and threshold) and also give the number of rows that were common between the primary split and the surrogate split. Finally, the number of rows present in the majority branch of the primary split is also shown. Only surrogates that perform better than this majority branch are included in the surrogate list. When the primary variable has a NULL value the surrogate variables are used in order to compute the split for that node. If all surrogates variables are NULL, then the majority branch is used to compute the split for a tuple.</p>
+<p><b>Arguments</b> </p><dl class="arglist">
+<dt>tree_model </dt>
+<dd>TEXT. Name of the table containing the decision tree model. </dd>
+<dt>dot_format </dt>
+<dd>BOOLEAN, default = TRUE. Output can either be in a dot format or a text format. If TRUE, the result is in the dot format, else output is in text format. </dd>
+<dt>verbosity </dt>
+<dd>BOOLEAN, default = FALSE. If set to TRUE, the dot format output will contain additional information (impurity, sample size, number of weighted rows for each response variable, classification or prediction if the tree was pruned at this level) </dd>
+</dl>
+<p>The output is always returned as a 'TEXT'. For the dot format, the output can be redirected to a file on the client side and then rendered using visualization programs.</p>
+<p>To export the dot format result to an external file, use the method below. Use unaligned table output mode for psql with '-A' flag. And inside the psql client, both '\t' and '\o' should be used):</p>
+<pre class="example">
+&gt; # under bash
+&gt; psql -A my_database
+# -- in psql now
+# \t
+# \o test.dot -- export to a file
+# select madlib.tree_display('tree_out');
+# \o
+# \t
+</pre><p>After the dot file has been generated, use third-party plotting software to plot the trees in a nice format: </p><pre class="example">
+&gt; # under bash, convert the dot file into a PDF file
+&gt; dot -Tpdf test.dot &gt; test.pdf
+&gt; xpdf test.pdf&amp;
+</pre><p>Please see the examples below for more details on the contents of the tree output formats.</p>
+<p><a class="anchor" id="examples"></a></p><dl class="section user"><dt>Examples</dt><dd><h4>Decision Tree Classification Example</h4>
+</dd></dl>
+<ol type="1">
+<li>Prepare input data: <pre class="example">
+DROP TABLE IF EXISTS dt_golf CASCADE;
+CREATE TABLE dt_golf (
+    id integer NOT NULL,
+    "OUTLOOK" text,
+    temperature double precision,
+    humidity double precision,
+    "Cont_features" double precision[],
+    cat_features text[],
+    windy boolean,
+    class text
+);
+</pre> <pre class="example">
+COPY dt_golf (id,"OUTLOOK",temperature,humidity,"Cont_features",cat_features,windy,class) FROM stdin WITH DELIMITER '|';
+1|sunny|85|85|{85, 85}|{'a', 'b'}|false|'Don't Play'
+2|sunny|80|90|{80, 90}|{'a', 'b'}|true|'Don't Play'
+3|overcast|83|78|{83, 78}|{'a', 'b'}|false|'Play'
+4|rain|70|96|{70, 96}|{'c', 'd'}|false|'Play'
+5|rain|68|80|{68, 80}|{'a', 'b'}|false|'Play'
+6|rain|65|70|{65, 70}|{'a', 'b'}|true|'Don't Play'
+7|overcast|64|65|{64, 65}|{'a', 'b'}|true|'Play'
+8|sunny|72|95|{72, 95}|{'c', 'd'}|false|'Don't Play'
+9|sunny|69|70|{69, 70}|{'a', 'b'}|false|'Play'
+10|rain|75|80|{75, 80}|{'a', 'b'}|false|'Play'
+11|sunny|75|70|{75, 70}|{'c', 'd'}|true|'Play'
+12|overcast|72|90|{72, 90}|{'a', 'b'}|true|'Play'
+13|overcast|81|75|{81, 75}|{'a', 'b'}|false|'Play'
+14|rain|71|80|{71, 80}|{'a', 'b'}|true|'Don't Play'
+\.
+</pre></li>
+<li>Run the decision tree training function: <pre class="example">
+DROP TABLE IF EXISTS train_output, train_output_summary;
+SELECT madlib.tree_train('dt_golf',         -- source table
+                         'train_output',    -- output model table
+                         'id',              -- id column
+                         'class',           -- response
+                         '"OUTLOOK", "Cont_features", windy',   -- features
+                         NULL::text,        -- exclude columns
+                         'gini',            -- split criterion
+                         NULL::text,        -- no grouping
+                         NULL::text,        -- no weights, all observations treated equally
+                         5,                 -- max depth
+                         3,                 -- min split
+                         1,                 -- min bucket
+                         10                 -- number of bins per continuous variable
+                         );
+</pre></li>
+<li>Predict output categories for the same data that was used for input: <pre class="example">
+DROP TABLE IF EXISTS prediction_results;
+SELECT madlib.tree_predict('train_output',          -- tree model
+                           'dt_golf',               -- new data table
+                           'prediction_results',    -- output table
+                           'response');             -- show prediction
+SELECT g.id, class, estimated_class FROM prediction_results p, dt_golf g where p.id = g.id ORDER BY g.id;
+</pre> Result: <pre class="result">
+id |    class     | estimated_class
+----+--------------+-----------------
+  1 | 'Don't Play' | 'Don't Play'
+  2 | 'Don't Play' | 'Don't Play'
+  3 | 'Play'       | 'Play'
+  4 | 'Play'       | 'Play'
+  5 | 'Play'       | 'Play'
+  6 | 'Don't Play' | 'Don't Play'
+  7 | 'Play'       | 'Play'
+  8 | 'Don't Play' | 'Don't Play'
+  9 | 'Play'       | 'Play'
+ 10 | 'Play'       | 'Play'
+ 11 | 'Play'       | 'Play'
+ 12 | 'Play'       | 'Play'
+ 13 | 'Play'       | 'Play'
+ 14 | 'Don't Play' | 'Don't Play'
+(14 rows)
+</pre></li>
+<li>Create a text display of the tree: <pre class="example">
+SELECT madlib.tree_display('train_output', FALSE);
+</pre> Result: <pre class="result">
+&#160;-------------------------------------
+&#160;- Each node represented by 'id' inside ().
+&#160;- Each internal nodes has the split condition at the end, while each
+        leaf node has a * at the end.
+&#160;- For each internal node (i), its child nodes are indented by 1 level
+        with ids (2i+1) for True node and (2i+2) for False node.
+&#160;- Number of (weighted) rows for each response variable inside [].'
+        The response label order is given as ['"\'Don\'t Play\'"', '"\'Play\'"'].
+        For each leaf, the prediction is given after the '--&gt;'
+&#160;-------------------------------------
+(0)[5 9]  "OUTLOOK" in {overcast}
+   (1)[0 4]  * --&gt; "'Play'"
+   (2)[5 5]  "Cont_features"[1] &lt;= 75
+      (5)[3 5]  "Cont_features"[1] &lt;= 65
+         (11)[1 0]  * --&gt; "'Don't Play'"
+         (12)[2 5]  "Cont_features"[1] &lt;= 70
+            (25)[0 3]  * --&gt; "'Play'"
+            (26)[2 2]  "Cont_features"[1] &lt;= 72
+               (53)[2 0]  * --&gt; "'Don't Play'"
+               (54)[0 2]  * --&gt; "'Play'"
+      (6)[2 0]  * --&gt; "'Don't Play'"
+&#160;-------------------------------------
+</pre> Here are some more details on how to interpret the tree display above...<ul>
+<li>Node numbering starts at 0 for the root node and would be contiguous 1,2...n if the tree was completely full (no pruning).</li>
+<li>Since the tree has been pruned, the node numbering is not contiguous.</li>
+<li>The order of values [x y] indicates the number of weighted rows that correspond to ["Don't play" "Play"] <em>before</em> the node test. For example, at (root) node 0, there are 5 rows that are "Don't play" and 9 rows that are "Play" in the raw data.</li>
+<li>If we apply the test of "OUTLOOK" being overcast, then the True result is leaf node 1 which is "Play". There are 0 "Don't play" rows and 4 "Play" rows that correspond to this case (overcast). The remaining 5 "Don't play" rows and 5 "Play rows" are then tested at node 2 on "Cont_features[1]"&lt;=75. And so on down the tree.</li>
+</ul>
+</li>
+<li>Create a dot format display of the tree: <pre class="example">
+SELECT madlib.tree_display('train_output', TRUE);
+</pre> Result: <pre class="result">
+digraph "Classification tree for dt_golf" {
+         subgraph "cluster0"{
+         label=""
+"g0_0" [label="\"OUTLOOK" &lt;= overcast", shape=ellipse];
+"g0_0" -&gt; "g0_1"[label="yes"];
+"g0_1" [label=""'Play'"",shape=box];
+"g0_0" -&gt; "g0_2"[label="no"];
+"g0_2" [label=""Cont_features"[1] &lt;= 75", shape=ellipse];
+"g0_2" -&gt; "g0_5"[label="yes"];
+"g0_2" -&gt; "g0_6"[label="no"];
+"g0_6" [label=""'Don't Play'"",shape=box];
+"g0_5" [label=""Cont_features"[1] &lt;= 65", shape=ellipse];
+"g0_5" -&gt; "g0_11"[label="yes"];
+"g0_11" [label=""'Don't Play'"",shape=box];
+"g0_5" -&gt; "g0_12"[label="no"];
+"g0_12" [label=""Cont_features"[1] &lt;= 70", shape=ellipse];
+"g0_12" -&gt; "g0_25"[label="yes"];
+"g0_25" [label=""'Play'"",shape=box];
+"g0_12" -&gt; "g0_26"[label="no"];
+"g0_26" [label=""Cont_features"[1] &lt;= 72", shape=ellipse];
+"g0_26" -&gt; "g0_53"[label="yes"];
+"g0_53" [label=""'Don't Play'"",shape=box];
+"g0_26" -&gt; "g0_54"[label="no"];
+"g0_54" [label=""'Play'"",shape=box];
+&#160;&#160;&#160;} //--- end of subgraph------------
+&#160;} //---end of digraph---------
+</pre></li>
+<li>Now create a dot format display of the tree with additional information: <pre class="example">
+SELECT madlib.tree_display('train_output', TRUE, TRUE);
+</pre> Result: <pre class="result">
+digraph "Classification tree for dt_golf" {
+         subgraph "cluster0"{
+         label=""
+"g0_0" [label="\"OUTLOOK" &lt;= overcast\n impurity = 0.459184\n samples = 14\n value = [5 9
+]\n class = "'Play'"", shape=ellipse];
+"g0_0" -&gt; "g0_1"[label="yes"];
+"g0_1" [label=""'Play'"\n impurity = 0\n samples = 4\n value = [0 4
+]",shape=box];
+"g0_0" -&gt; "g0_2"[label="no"];
+"g0_2" [label=""Cont_features"[1] &lt;= 75\n impurity = 0.5\n samples = 10\n value = [5 5
+]\n class = "'Don't Play'"", shape=ellipse];
+"g0_2" -&gt; "g0_5"[label="yes"];
+"g0_2" -&gt; "g0_6"[label="no"];
+"g0_6" [label=""'Don't Play'"\n impurity = 0\n samples = 2\n value = [2 0
+]",shape=box];
+"g0_5" [label=""Cont_features"[1] &lt;= 65\n impurity = 0.46875\n samples = 8\n value = [3 5
+]\n class = "'Play'"", shape=ellipse];
+"g0_5" -&gt; "g0_11"[label="yes"];
+"g0_11" [label=""'Don't Play'"\n impurity = 0\n samples = 1\n value = [1 0
+]",shape=box];
+"g0_5" -&gt; "g0_12"[label="no"];
+"g0_12" [label=""Cont_features"[1] &lt;= 70\n impurity = 0.408163\n samples = 7\n value = [2 5
+]\n class = "'Play'"", shape=ellipse];
+"g0_12" -&gt; "g0_25"[label="yes"];
+"g0_25" [label=""'Play'"\n impurity = 0\n samples = 3\n value = [0 3
+]",shape=box];
+"g0_12" -&gt; "g0_26"[label="no"];
+"g0_26" [label=""Cont_features"[1] &lt;= 72\n impurity = 0.5\n samples = 4\n value = [2 2
+]\n class = "'Don't Play'"", shape=ellipse];
+"g0_26" -&gt; "g0_53"[label="yes"];
+"g0_53" [label=""'Don't Play'"\n impurity = 0\n samples = 2\n value = [2 0
+]",shape=box];
+"g0_26" -&gt; "g0_54"[label="no"];
+"g0_54" [label=""'Play'"\n impurity = 0\n samples = 2\n value = [0 2
+]",shape=box];
+&#160;&#160;&#160;} //--- end of subgraph------------
+&#160;} //---end of digraph---------
+</pre> The additional information in each node is: impurity, sample size, number of weighted rows for each response variable, and classification if the tree was pruned at this level.</li>
+</ol>
+<h4>Decision Tree Regression Example</h4>
+<ol type="1">
+<li>Prepare input data. <pre class="example">
+DROP TABLE IF EXISTS mt_cars;
+CREATE TABLE mt_cars (
+    id integer NOT NULL,
+    mpg double precision,
+    cyl integer,
+    disp double precision,
+    hp integer,
+    drat double precision,
+    wt double precision,
+    qsec double precision,
+    vs integer,
+    am integer,
+    gear integer,
+    carb integer
+);
+</pre> <pre class="example">
+COPY mt_cars (id,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb) FROM stdin WITH DELIMITER '|' NULL 'null';
+1|18.7|8|360|175|3.15|3.44|17.02|0|0|3|2
+2|21|6|160|110|3.9|2.62|16.46|0|1|4|4
+3|24.4|4|146.7|62|3.69|3.19|20|1|0|4|2
+4|21|6|160|110|3.9|2.875|17.02|0|1|4|4
+5|17.8|6|167.6|123|3.92|3.44|18.9|1|0|4|4
+6|16.4|8|275.8|180|3.078|4.07|17.4|0|0|3|3
+7|22.8|4|108|93|3.85|2.32|18.61|1|1|4|1
+8|17.3|8|275.8|180|3.078|3.73|17.6|0|0|3|3
+9|21.4|null|258|110|3.08|3.215|19.44|1|0|3|1
+10|15.2|8|275.8|180|3.078|3.78|18|0|0|3|3
+11|18.1|6|225|105|2.768|3.46|20.22|1|0|3|1
+12|32.4|4|78.7|66|4.08|2.20|19.47|1|1|4|1
+13|14.3|8|360|245|3.21|3.578|15.84|0|0|3|4
+14|22.8|4|140.8|95|3.92|3.15|22.9|1|0|4|2
+15|30.4|4|75.7|52|4.93|1.615|18.52|1|1|4|2
+16|19.2|6|167.6|123|3.92|3.44|18.3|1|0|4|4
+17|33.9|4|71.14|65|4.22|1.835|19.9|1|1|4|1
+18|15.2|null|304|150|3.15|3.435|17.3|0|0|3|2
+19|10.4|8|472|205|2.93|5.25|17.98|0|0|3|4
+20|27.3|4|79|66|4.08|1.935|18.9|1|1|4|1
+21|10.4|8|460|215|3|5.424|17.82|0|0|3|4
+22|26|4|120.3|91|4.43|2.14|16.7|0|1|5|2
+23|14.7|8|440|230|3.23|5.345|17.42|0|0|3|4
+24|30.4|4|95.14|113|3.77|1.513|16.9|1|1|5|2
+25|21.5|4|120.1|97|3.70|2.465|20.01|1|0|3|1
+26|15.8|8|351|264|4.22|3.17|14.5|0|1|5|4
+27|15.5|8|318|150|2.768|3.52|16.87|0|0|3|2
+28|15|8|301|335|3.54|3.578|14.6|0|1|5|8
+29|13.3|8|350|245|3.73|3.84|15.41|0|0|3|4
+30|19.2|8|400|175|3.08|3.845|17.05|0|0|3|2
+31|19.7|6|145|175|3.62|2.77|15.5|0|1|5|6
+32|21.4|4|121|109|4.11|2.78|18.6|1|1|4|2
+\.
+</pre></li>
+<li>Run the decision tree training function: <pre class="example">
+DROP TABLE IF EXISTS train_output, train_output_summary;
+SELECT madlib.tree_train('mt_cars',         -- source table
+                         'train_output',    -- output model table
+                         'id',              -- id column
+                         'mpg',             -- dependent variable
+                         '*',               -- features
+                         'id, hp, drat, am, gear, carb',  -- exclude columns
+                         'mse',             -- split criterion
+                         NULL::text,        -- no grouping
+                         NULL::text,        -- no weights, all observations treated equally
+                         10,                -- max depth
+                         8,                 -- min split
+                         3,                 -- number of bins per continuous variable
+                         10,                -- number of splits
+                         NULL,              -- pruning parameters
+                         'max_surrogates=2' -- number of surrogates
+                         );
+</pre></li>
+<li>Display the decision tree in basic text format: <pre class="example">
+SELECT madlib.tree_display('train_output', FALSE);
+</pre> Result: <pre class="result">
+&#160; -------------------------------------
+&#160;- Each node represented by 'id' inside ().
+&#160;- Each internal nodes has the split condition at the end, while each
+&#160;    leaf node has a * at the end.
+&#160;- For each internal node (i), its child nodes are indented by 1 level
+&#160;    with ids (2i+1) for True node and (2i+2) for False node.
+&#160;- Number of rows and average response value inside []. For a leaf node, this is the prediction.
+&#160;-------------------------------------
+ (0)[32, 20.0906]  cyl in {8,6}
+    (1)[21, 16.6476]  disp &lt;= 258
+       (3)[7, 19.7429]  *
+       (4)[14, 15.1]  qsec &lt;= 17.42
+          (9)[10, 15.81]  qsec &lt;= 16.9
+             (19)[5, 14.78]  *
+             (20)[5, 16.84]  *
+          (10)[4, 13.325]  *
+    (2)[11, 26.6636]  wt &lt;= 2.2
+       (5)[6, 30.0667]  *
+       (6)[5, 22.58]  *
+ &#160;-------------------------------------
+(1 row)
+</pre></li>
+<li>Display the surrogates in the decision tree: <pre class="example">
+SELECT madlib.tree_surr_display('train_output');
+</pre> Result: <pre class="result">
+&#160;-------------------------------------
+       Surrogates for internal nodes
+&#160;-------------------------------------
+ (0) cyl in {8,6}
+      1: disp &gt; 146.7    [common rows = 29]
+      2: vs in {0}    [common rows = 26]
+      [Majority branch = 19 ]
+ (1) disp &lt;= 258
+      1: cyl in {6,4}    [common rows = 19]
+      2: vs in {1}    [common rows = 18]
+      [Majority branch = 14 ]
+ (2) wt &lt;= 2.2
+      1: disp &lt;= 108    [common rows = 9]
+      2: qsec &lt;= 18.52    [common rows = 8]
+      [Majority branch = 6 ]
+ (4) qsec &lt;= 17.42
+      1: disp &gt; 275.8    [common rows = 11]
+      2: vs in {0}    [common rows = 10]
+      [Majority branch = 10 ]
+ (9) qsec &lt;= 16.9
+      1: wt &lt;= 3.84    [common rows = 8]
+      2: disp &lt;= 360    [common rows = 7]
+      [Majority branch = 5 ]
+&#160;-------------------------------------
+(1 row)
+</pre> <dl class="section note"><dt>Note</dt><dd>The 'cyl' parameter above has two tuples with null values. In the prediction example below, the surrogate splits for the <em>cyl in {8, 6}</em> split are used to predict those two tuples (<em>id = 9</em> and <em>id = 18</em>). The splits are used in descending order till a surrogate variable is found that is not NULL. In this case, the two tuples have non-NULL values for <em>disp</em>, hence the <em>disp &gt; 146.7</em> split is used to make the prediction. If all the surrogate variables had been NULL then the majority branch would have been followed.</dd></dl>
+</li>
+<li>Predict regression output for the same data and compare with original: <pre class="example">
+DROP TABLE IF EXISTS prediction_results;
+SELECT madlib.tree_predict('train_output',
+                           'mt_cars',
+                           'prediction_results',
+                           'response');
+SELECT s.id, mpg, estimated_mpg FROM prediction_results p, mt_cars s where s.id = p.id ORDER BY id;
+</pre> Result: <pre class="result">
+  id | mpg  |  estimated_mpg
+----+------+------------------
+  1 | 18.7 |            16.84
+  2 |   21 | 19.7428571428571
+  3 | 24.4 |            22.58
+  4 |   21 | 19.7428571428571
+  5 | 17.8 | 19.7428571428571
+  6 | 16.4 |            16.84
+  7 | 22.8 |            22.58
+  8 | 17.3 |           13.325
+  9 | 21.4 | 19.7428571428571
+ 10 | 15.2 |           13.325
+ 11 | 18.1 | 19.7428571428571
+ 12 | 32.4 | 30.0666666666667
+ 13 | 14.3 |            14.78
+ 14 | 22.8 |            22.58
+ 15 | 30.4 | 30.0666666666667
+ 16 | 19.2 | 19.7428571428571
+ 17 | 33.9 | 30.0666666666667
+ 18 | 15.2 |            16.84
+ 19 | 10.4 |           13.325
+ 20 | 27.3 | 30.0666666666667
+ 21 | 10.4 |           13.325
+ 22 |   26 | 30.0666666666667
+ 23 | 14.7 |            16.84
+ 24 | 30.4 | 30.0666666666667
+ 25 | 21.5 |            22.58
+ 26 | 15.8 |            14.78
+ 27 | 15.5 |            14.78
+ 28 |   15 |            14.78
+ 29 | 13.3 |            14.78
+ 30 | 19.2 |            16.84
+ 31 | 19.7 | 19.7428571428571
+ 32 | 21.4 |            22.58
+(32 rows)
+</pre></li>
+</ol>
+<h4>NULL Handling Example</h4>
+<ol type="1">
+<li>Create toy example to illustrate null-as-category handling <pre class="example">
+DROP TABLE IF EXISTS null_handling_example;
+CREATE TABLE null_handling_example (
+    id integer,
+    country text,
+    city text,
+    weather text,
+    response text
+);
+&#160;
+INSERT INTO null_handling_example VALUES
+(1,null,null,null,'a'),
+(2,'US',null,null,'b'),
+(3,'US','NY',null,'c'),
+(4,'US','NY','rainy','d');
+&#160;
+DROP TABLE IF EXISTS train_output, train_output_summary;
+SELECT madlib.tree_train('null_handling_example',         -- source table
+                         'train_output',                  -- output model table
+                         'id',                            -- id column
+                         'response',                      -- dependent variable
+                         'country, weather, city',        -- features
+                         NULL,                            -- features to exclude
+                         'gini',                          -- split criterion
+                         NULL::text,                      -- no grouping
+                         NULL::text,                      -- no weights, all observations treated equally
+                         4,                               -- max depth
+                         1,                               -- min split
+                         1,                               -- number of bins per continuous variable
+                         10,                              -- number of splits
+                         NULL,                            -- pruning parameters
+                         'null_as_category=true'          -- null handling
+                         );
+SELECT cat_levels_in_text, cat_n_levels FROM train_output;
+</pre> <pre class="result">
+            cat_levels_in_text            | cat_n_levels
+------------------------------------------+--------------
+ {US,__NULL__,rainy,__NULL__,NY,__NULL__} | {2,2,2}
+</pre></li>
+<li>Predict for previously not seen data by assuming NULL value as the default <pre class="example">
+DROP TABLE IF EXISTS table_test;
+CREATE TABLE table_test (
+    id integer,
+    country text,
+    city text,
+    weather text,
+    expected_response text
+);
+INSERT INTO table_test VALUES
+(1,'IN','MUM','cloudy','a'),
+(2,'US','HOU','humid','b'),
+(3,'US','NY','sunny','c'),
+(4,'US','NY','rainy','d');
+&#160;
+DROP TABLE IF EXISTS prediction_results;
+SELECT madlib.tree_predict('train_output',
+                           'table_test',
+                           'prediction_results',
+                           'response');
+SELECT s.id, expected_response, estimated_response
+FROM prediction_results p, table_test s
+WHERE s.id = p.id ORDER BY id;
+</pre> <pre class="result">
+ id | expected_response | estimated_response
+----+-------------------+--------------------
+  1 | a                 | a
+  2 | b                 | b
+  3 | c                 | c
+  4 | d                 | d
+(4 rows)
+</pre></li>
+</ol>
+<p><a class="anchor" id="literature"></a></p><dl class="section user"><dt>Literature</dt><dd>[1] Breiman, Leo; Friedman, J. H.; Olshen, R. A.; Stone, C. J. (1984). Classification and regression trees. Monterey, CA: Wadsworth &amp; Brooks/Cole Advanced Books &amp; Software.</dd></dl>
+<p><a class="anchor" id="related"></a></p><dl class="section user"><dt>Related Topics</dt><dd></dd></dl>
+<p>File <a class="el" href="decision__tree_8sql__in.html">decision_tree.sql_in</a> documenting the training function</p>
+<p><a class="el" href="group__grp__random__forest.html">Random Forest</a></p>
+</div><!-- contents -->
+</div><!-- doc-content -->
+<!-- start footer part -->
+<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
+  <ul>
+    <li class="footer">Generated on Wed Dec 27 2017 19:05:57 for MADlib by
+    <a href="http://www.doxygen.org/index.html">
+    <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.13 </li>
+  </ul>
+</div>
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/madlib-site/blob/6c103d3e/docs/v1.13/group__grp__dense__linear__solver.html
----------------------------------------------------------------------
diff --git a/docs/v1.13/group__grp__dense__linear__solver.html b/docs/v1.13/group__grp__dense__linear__solver.html
new file mode 100644
index 0000000..5fc3986
--- /dev/null
+++ b/docs/v1.13/group__grp__dense__linear__solver.html
@@ -0,0 +1,262 @@
+<!-- HTML header for doxygen 1.8.4-->
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.13"/>
+<meta name="keywords" content="madlib,postgres,greenplum,machine learning,data mining,deep learning,ensemble methods,data science,market basket analysis,affinity analysis,pca,lda,regression,elastic net,huber white,proportional hazards,k-means,latent dirichlet allocation,bayes,support vector machines,svm"/>
+<title>MADlib: Dense Linear Systems</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="navtree.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="resize.js"></script>
+<script type="text/javascript" src="navtreedata.js"></script>
+<script type="text/javascript" src="navtree.js"></script>
+<script type="text/javascript">
+  $(document).ready(initResizable);
+</script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/searchdata.js"></script>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+  $(document).ready(function() { init_search(); });
+</script>
+<script type="text/x-mathjax-config">
+  MathJax.Hub.Config({
+    extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
+    jax: ["input/TeX","output/HTML-CSS"],
+});
+</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
+<!-- hack in the navigation tree -->
+<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
+<!-- google analytics -->
+<script>
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+  ga('create', 'UA-45382226-1', 'madlib.apache.org');
+  ga('send', 'pageview');
+</script>
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td id="projectlogo"><a href="http://madlib.apache.org"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">
+   <span id="projectnumber">1.13</span>
+   </div>
+   <div id="projectbrief">User Documentation for MADlib</div>
+  </td>
+   <td>        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+</td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.13 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
+</div><!-- top -->
+<div id="side-nav" class="ui-resizable side-nav-resizable">
+  <div id="nav-tree">
+    <div id="nav-tree-contents">
+      <div id="nav-sync" class="sync"></div>
+    </div>
+  </div>
+  <div id="splitbar" style="-moz-user-select:none;" 
+       class="ui-resizable-handle">
+  </div>
+</div>
+<script type="text/javascript">
+$(document).ready(function(){initNavTree('group__grp__dense__linear__solver.html','');});
+</script>
+<div id="doc-content">
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+</div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+<div class="header">
+  <div class="headertitle">
+<div class="title">Dense Linear Systems<div class="ingroups"><a class="el" href="group__grp__utility__functions.html">Utility Functions</a> &raquo; <a class="el" href="group__grp__linear__solver.html">Linear Solvers</a></div></div>  </div>
+</div><!--header-->
+<div class="contents">
+<div class="toc"><b>Contents</b> <ul>
+<li class="level1">
+<a href="#dls_usage">Solution Function</a> </li>
+<li class="level1">
+<a href="#dls_opt_params">Optimizer Parameters</a> </li>
+<li class="level1">
+<a href="#dls_examples">Examples</a> </li>
+<li class="level1">
+<a href="#related">Related Topics</a> </li>
+</ul>
+</div><p>The linear systems module implements solution methods for systems of consistent linear equations. Systems of linear equations take the form: </p><p class="formulaDsp">
+\[ Ax = b \]
+</p>
+<p>where \(x \in \mathbb{R}^{n}\), \(A \in \mathbb{R}^{m \times n} \) and \(b \in \mathbb{R}^{m}\). We assume that there are no rows of \(A\) where all elements are zero. The algorithms implemented in this module can handle large dense linear systems. Currently, the algorithms implemented in this module solve the linear system by a direct decomposition. Hence, these methods are known as <em>direct method</em>.</p>
+<p><a class="anchor" id="dls_usage"></a></p><dl class="section user"><dt>Solution Function</dt><dd><pre class="syntax">
+linear_solver_dense( tbl_source,
+                     tbl_result,
+                     row_id,
+                     LHS,
+                     RHS,
+                     grouping_col,
+                     optimizer,
+                     optimizer_params
+                   )
+</pre> <b>Arguments</b> <dl class="arglist">
+<dt>tbl_source </dt>
+<dd><p class="startdd">TEXT. The name of the table containing the training data. The input data is expected to be of the following form: </p><pre>{TABLE|VIEW} <em>sourceName</em> (
+    ...
+    <em>row_id</em>          FLOAT8,
+    <em>left_hand_side</em>  FLOAT8[],
+    <em>right_hand_side</em> FLOAT8,
+    ...
+)</pre><p>Each row represents a single equation. The <em>right_hand_side</em> column refers to the right hand side of the equations while the <em>left_hand_side</em> column refers to the multipliers on the variables on the left hand side of the same equations.</p>
+<p class="enddd"></p>
+</dd>
+<dt>tbl_result </dt>
+<dd><p class="startdd">TEXT. The name of the table where the output is saved. The output is stored in the table named by the <em>tbl_result</em> argument. It contains the following columns: </p><table class="output">
+<tr>
+<th>solution </th><td>FLOAT8[]. The solution variables in the same order as that provided as input in the 'left_hand_side' column name of the <em>source_table</em>  </td></tr>
+<tr>
+<th>residual_norm </th><td>FLOAT8. The scaled residual norm, defined as \( \frac{|Ax - b|}{|b|} \). This value is an indication of the accuracy of the solution.  </td></tr>
+<tr>
+<th>iters </th><td>INTEGER. Number of iterations required by the algorithm (only applicable for iterative algorithms). The output is NULL for 'direct' methods.   </td></tr>
+</table>
+<p class="enddd"></p>
+</dd>
+<dt>row_id </dt>
+<dd><p class="startdd">TEXT. The name of the column storing the 'row id' of the equations.</p>
+<p>For a system with N equations, the row_id's must be a continuous range of integers from \( 0 \ldots n-1 \). </p>
+<p class="enddd"></p>
+</dd>
+<dt>LHS </dt>
+<dd><p class="startdd">TEXT. The name of the column storing the 'left hand side' of the equations, stored as an array.</p>
+<p class="enddd"></p>
+</dd>
+<dt>RHS </dt>
+<dd><p class="startdd">TEXT. The name of the column storing the 'right hand side' of the equations.</p>
+<p class="enddd"></p>
+</dd>
+<dt>grouping_cols (optional)  </dt>
+<dd>TEXT, default: NULL. Group by column names. <em>Not currently implemented. Any non-NULL value is ignored.</em> </dd>
+<dt>optimizer (optional)  </dt>
+<dd><p class="startdd">TEXT, default: 'direct'. The type of optimizer.</p>
+<p class="enddd"></p>
+</dd>
+<dt>optimizer_params (optional)  </dt>
+<dd>TEXT, default: NULL. Optimizer specific parameters. </dd>
+</dl>
+</dd></dl>
+<p><a class="anchor" id="dls_opt_params"></a></p><dl class="section user"><dt>Optimizer Parameters</dt><dd></dd></dl>
+<p>For each optimizer, there are specific parameters that can be tuned for better performance.</p>
+<dl class="arglist">
+<dt>algorithm (default: householderqr) </dt>
+<dd><p class="startdd">There are several algorithms that can be classified as 'direct' methods of solving linear systems. MADlib dense linear system solvers provide various algorithmic options for users.</p>
+<p>The following table provides a guideline on the choice of algorithm based on conditions on the A matrix, speed of the algorithms and numerical stability. </p><pre class="fragment"> Algorithm            | Conditions on A  | Speed | Accuracy
+ ----------------------------------------------------------
+ householderqr        | None             |  ++   |  +
+ partialpivlu         | Invertable       |  ++   |  +
+ fullpivlu            | None             |  -    |  +++
+ colpivhouseholderqr  | None             |  +    |  ++
+ fullpivhouseholderqr | None             |  -    |  +++
+ llt                  | Pos. Definite    |  +++  |  +
+ ldlt                 | Pos. or Neg Def  |  +++  |  ++
+</pre><p>For speed '++' is faster than '+', which is faster than '-'. For accuracy '+++' is better than '++'.</p>
+<p class="enddd">More details about the individual algorithms can be found in the <a href="http://eigen.tuxfamily.org/dox-devel/group__TutorialLinearAlgebra.html">Eigen documentation</a>. Eigen is an open source library for linear algebra.  </p>
+</dd>
+</dl>
+<p><a class="anchor" id="dls_examples"></a></p><dl class="section user"><dt>Examples</dt><dd></dd></dl>
+<ol type="1">
+<li>View online help for the linear systems solver function. <pre class="example">
+SELECT madlib.linear_solver_dense();
+</pre></li>
+<li>Create the sample data set. <pre class="example">
+CREATE TABLE linear_systems_test_data( id INTEGER NOT NULL,
+                                       lhs DOUBLE PRECISION[],
+                                       rhs DOUBLE PRECISION
+                                     );
+INSERT INTO linear_systems_test_data(id, lhs, rhs)
+       VALUES
+        (0, ARRAY[1,0,0], 20),
+        (1, ARRAY[0,1,0], 15),
+        (2, ARRAY[0,0,1], 20);
+</pre></li>
+<li>Solve the linear systems with default parameters. <pre class="example">
+SELECT madlib.linear_solver_dense( 'linear_systems_test_data',
+                                   'output_table',
+                                   'id',
+                                   'lhs',
+                                   'rhs'
+                                 );
+</pre></li>
+<li>Obtain the output from the output table. <pre class="example">
+\x on
+SELECT * FROM output_table;
+</pre> Result: <pre class="result">
+--------------------+-------------------------------------
+solution            | {20,15,20}
+residual_norm       | 0
+iters               | NULL
+</pre></li>
+<li>Choose an algorithm different than the default. <pre class="example">
+DROP TABLE IF EXISTS result_table;
+SELECT madlib.linear_solver_dense( 'linear_systems_test_data',
+                                   'result_table',
+                                   'id',
+                                   'lhs',
+                                   'rhs',
+                                   NULL,
+                                   'direct',
+                                   'algorithm=llt'
+                                 );
+</pre></li>
+</ol>
+<p><a class="anchor" id="related"></a></p><dl class="section user"><dt>Related Topics</dt><dd>File <a class="el" href="dense__linear__systems_8sql__in.html" title="SQL functions for linear systems. ">dense_linear_systems.sql_in</a> documenting the SQL functions</dd></dl>
+</div><!-- contents -->
+</div><!-- doc-content -->
+<!-- start footer part -->
+<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
+  <ul>
+    <li class="footer">Generated on Wed Dec 27 2017 19:05:57 for MADlib by
+    <a href="http://www.doxygen.org/index.html">
+    <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.13 </li>
+  </ul>
+</div>
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/madlib-site/blob/6c103d3e/docs/v1.13/group__grp__deprecated.html
----------------------------------------------------------------------
diff --git a/docs/v1.13/group__grp__deprecated.html b/docs/v1.13/group__grp__deprecated.html
new file mode 100644
index 0000000..ee8224d
--- /dev/null
+++ b/docs/v1.13/group__grp__deprecated.html
@@ -0,0 +1,141 @@
+<!-- HTML header for doxygen 1.8.4-->
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.13"/>
+<meta name="keywords" content="madlib,postgres,greenplum,machine learning,data mining,deep learning,ensemble methods,data science,market basket analysis,affinity analysis,pca,lda,regression,elastic net,huber white,proportional hazards,k-means,latent dirichlet allocation,bayes,support vector machines,svm"/>
+<title>MADlib: Deprecated Modules</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="navtree.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="resize.js"></script>
+<script type="text/javascript" src="navtreedata.js"></script>
+<script type="text/javascript" src="navtree.js"></script>
+<script type="text/javascript">
+  $(document).ready(initResizable);
+</script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/searchdata.js"></script>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+  $(document).ready(function() { init_search(); });
+</script>
+<script type="text/x-mathjax-config">
+  MathJax.Hub.Config({
+    extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
+    jax: ["input/TeX","output/HTML-CSS"],
+});
+</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
+<!-- hack in the navigation tree -->
+<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
+<!-- google analytics -->
+<script>
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+  ga('create', 'UA-45382226-1', 'madlib.apache.org');
+  ga('send', 'pageview');
+</script>
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td id="projectlogo"><a href="http://madlib.apache.org"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">
+   <span id="projectnumber">1.13</span>
+   </div>
+   <div id="projectbrief">User Documentation for MADlib</div>
+  </td>
+   <td>        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+</td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.13 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
+</div><!-- top -->
+<div id="side-nav" class="ui-resizable side-nav-resizable">
+  <div id="nav-tree">
+    <div id="nav-tree-contents">
+      <div id="nav-sync" class="sync"></div>
+    </div>
+  </div>
+  <div id="splitbar" style="-moz-user-select:none;" 
+       class="ui-resizable-handle">
+  </div>
+</div>
+<script type="text/javascript">
+$(document).ready(function(){initNavTree('group__grp__deprecated.html','');});
+</script>
+<div id="doc-content">
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+</div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+<div class="header">
+  <div class="summary">
+<a href="#groups">Modules</a>  </div>
+  <div class="headertitle">
+<div class="title">Deprecated Modules</div>  </div>
+</div><!--header-->
+<div class="contents">
+<a name="details" id="details"></a><h2 class="groupheader">Detailed Description</h2>
+<table class="memberdecls">
+<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="groups"></a>
+Modules</h2></td></tr>
+<tr class="memitem:group__grp__indicator"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group__grp__indicator.html">Create Indicator Variables</a></td></tr>
+<tr class="memdesc:group__grp__indicator"><td class="mdescLeft">&#160;</td><td class="mdescRight">Provides utility functions helpful for data preparation before modeling. <br /></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:group__grp__mlogreg"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group__grp__mlogreg.html">Multinomial Logistic Regression</a></td></tr>
+<tr class="memdesc:group__grp__mlogreg"><td class="mdescLeft">&#160;</td><td class="mdescRight">Also called as softmax regression, models the relationship between one or more independent variables and a categorical dependent variable. <br /></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+</table>
+</div><!-- contents -->
+</div><!-- doc-content -->
+<!-- start footer part -->
+<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
+  <ul>
+    <li class="footer">Generated on Wed Dec 27 2017 19:05:58 for MADlib by
+    <a href="http://www.doxygen.org/index.html">
+    <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.13 </li>
+  </ul>
+</div>
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/madlib-site/blob/6c103d3e/docs/v1.13/group__grp__deprecated.js
----------------------------------------------------------------------
diff --git a/docs/v1.13/group__grp__deprecated.js b/docs/v1.13/group__grp__deprecated.js
new file mode 100644
index 0000000..05ef03b
--- /dev/null
+++ b/docs/v1.13/group__grp__deprecated.js
@@ -0,0 +1,5 @@
+var group__grp__deprecated =
+[
+    [ "Create Indicator Variables", "group__grp__indicator.html", null ],
+    [ "Multinomial Logistic Regression", "group__grp__mlogreg.html", null ]
+];
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/madlib-site/blob/6c103d3e/docs/v1.13/group__grp__desc__stats.html
----------------------------------------------------------------------
diff --git a/docs/v1.13/group__grp__desc__stats.html b/docs/v1.13/group__grp__desc__stats.html
new file mode 100644
index 0000000..1fe8a9c
--- /dev/null
+++ b/docs/v1.13/group__grp__desc__stats.html
@@ -0,0 +1,145 @@
+<!-- HTML header for doxygen 1.8.4-->
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.13"/>
+<meta name="keywords" content="madlib,postgres,greenplum,machine learning,data mining,deep learning,ensemble methods,data science,market basket analysis,affinity analysis,pca,lda,regression,elastic net,huber white,proportional hazards,k-means,latent dirichlet allocation,bayes,support vector machines,svm"/>
+<title>MADlib: Descriptive Statistics</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="navtree.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="resize.js"></script>
+<script type="text/javascript" src="navtreedata.js"></script>
+<script type="text/javascript" src="navtree.js"></script>
+<script type="text/javascript">
+  $(document).ready(initResizable);
+</script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/searchdata.js"></script>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+  $(document).ready(function() { init_search(); });
+</script>
+<script type="text/x-mathjax-config">
+  MathJax.Hub.Config({
+    extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
+    jax: ["input/TeX","output/HTML-CSS"],
+});
+</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
+<!-- hack in the navigation tree -->
+<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
+<!-- google analytics -->
+<script>
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+  ga('create', 'UA-45382226-1', 'madlib.apache.org');
+  ga('send', 'pageview');
+</script>
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td id="projectlogo"><a href="http://madlib.apache.org"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">
+   <span id="projectnumber">1.13</span>
+   </div>
+   <div id="projectbrief">User Documentation for MADlib</div>
+  </td>
+   <td>        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+</td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.13 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
+</div><!-- top -->
+<div id="side-nav" class="ui-resizable side-nav-resizable">
+  <div id="nav-tree">
+    <div id="nav-tree-contents">
+      <div id="nav-sync" class="sync"></div>
+    </div>
+  </div>
+  <div id="splitbar" style="-moz-user-select:none;" 
+       class="ui-resizable-handle">
+  </div>
+</div>
+<script type="text/javascript">
+$(document).ready(function(){initNavTree('group__grp__desc__stats.html','');});
+</script>
+<div id="doc-content">
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+</div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+<div class="header">
+  <div class="summary">
+<a href="#groups">Modules</a>  </div>
+  <div class="headertitle">
+<div class="title">Descriptive Statistics<div class="ingroups"><a class="el" href="group__grp__stats.html">Statistics</a></div></div>  </div>
+</div><!--header-->
+<div class="contents">
+<a name="details" id="details"></a><h2 class="groupheader">Detailed Description</h2>
+<p>A collection of methods to compute descriptive statistics of the dataset </p>
+<table class="memberdecls">
+<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="groups"></a>
+Modules</h2></td></tr>
+<tr class="memitem:group__grp__sketches"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group__grp__sketches.html">Cardinality Estimators</a></td></tr>
+<tr class="memdesc:group__grp__sketches"><td class="mdescLeft">&#160;</td><td class="mdescRight">A collection of methods to estimate the number of unique values contained in the data. <br /></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:group__grp__correlation"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group__grp__correlation.html">Pearson's Correlation</a></td></tr>
+<tr class="memdesc:group__grp__correlation"><td class="mdescLeft">&#160;</td><td class="mdescRight">Generates a cross-correlation matrix for all pairs of numeric columns in a table. <br /></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:group__grp__summary"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group__grp__summary.html">Summary</a></td></tr>
+<tr class="memdesc:group__grp__summary"><td class="mdescLeft">&#160;</td><td class="mdescRight">Calculates general descriptive statistics for any data table. <br /></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+</table>
+</div><!-- contents -->
+</div><!-- doc-content -->
+<!-- start footer part -->
+<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
+  <ul>
+    <li class="footer">Generated on Wed Dec 27 2017 19:05:57 for MADlib by
+    <a href="http://www.doxygen.org/index.html">
+    <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.13 </li>
+  </ul>
+</div>
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/madlib-site/blob/6c103d3e/docs/v1.13/group__grp__desc__stats.js
----------------------------------------------------------------------
diff --git a/docs/v1.13/group__grp__desc__stats.js b/docs/v1.13/group__grp__desc__stats.js
new file mode 100644
index 0000000..cbc784a
--- /dev/null
+++ b/docs/v1.13/group__grp__desc__stats.js
@@ -0,0 +1,6 @@
+var group__grp__desc__stats =
+[
+    [ "Cardinality Estimators", "group__grp__sketches.html", "group__grp__sketches" ],
+    [ "Pearson's Correlation", "group__grp__correlation.html", null ],
+    [ "Summary", "group__grp__summary.html", null ]
+];
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/madlib-site/blob/6c103d3e/docs/v1.13/group__grp__early__stage.html
----------------------------------------------------------------------
diff --git a/docs/v1.13/group__grp__early__stage.html b/docs/v1.13/group__grp__early__stage.html
new file mode 100644
index 0000000..715bcca
--- /dev/null
+++ b/docs/v1.13/group__grp__early__stage.html
@@ -0,0 +1,146 @@
+<!-- HTML header for doxygen 1.8.4-->
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.13"/>
+<meta name="keywords" content="madlib,postgres,greenplum,machine learning,data mining,deep learning,ensemble methods,data science,market basket analysis,affinity analysis,pca,lda,regression,elastic net,huber white,proportional hazards,k-means,latent dirichlet allocation,bayes,support vector machines,svm"/>
+<title>MADlib: Early Stage Development</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="navtree.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="resize.js"></script>
+<script type="text/javascript" src="navtreedata.js"></script>
+<script type="text/javascript" src="navtree.js"></script>
+<script type="text/javascript">
+  $(document).ready(initResizable);
+</script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/searchdata.js"></script>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+  $(document).ready(function() { init_search(); });
+</script>
+<script type="text/x-mathjax-config">
+  MathJax.Hub.Config({
+    extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
+    jax: ["input/TeX","output/HTML-CSS"],
+});
+</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
+<!-- hack in the navigation tree -->
+<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
+<!-- google analytics -->
+<script>
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+  ga('create', 'UA-45382226-1', 'madlib.apache.org');
+  ga('send', 'pageview');
+</script>
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td id="projectlogo"><a href="http://madlib.apache.org"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">
+   <span id="projectnumber">1.13</span>
+   </div>
+   <div id="projectbrief">User Documentation for MADlib</div>
+  </td>
+   <td>        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+</td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.13 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
+</div><!-- top -->
+<div id="side-nav" class="ui-resizable side-nav-resizable">
+  <div id="nav-tree">
+    <div id="nav-tree-contents">
+      <div id="nav-sync" class="sync"></div>
+    </div>
+  </div>
+  <div id="splitbar" style="-moz-user-select:none;" 
+       class="ui-resizable-handle">
+  </div>
+</div>
+<script type="text/javascript">
+$(document).ready(function(){initNavTree('group__grp__early__stage.html','');});
+</script>
+<div id="doc-content">
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+</div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+<div class="header">
+  <div class="summary">
+<a href="#groups">Modules</a>  </div>
+  <div class="headertitle">
+<div class="title">Early Stage Development</div>  </div>
+</div><!--header-->
+<div class="contents">
+<a name="details" id="details"></a><h2 class="groupheader">Detailed Description</h2>
+<table class="memberdecls">
+<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="groups"></a>
+Modules</h2></td></tr>
+<tr class="memitem:group__grp__cg"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group__grp__cg.html">Conjugate Gradient</a></td></tr>
+<tr class="memdesc:group__grp__cg"><td class="mdescLeft">&#160;</td><td class="mdescRight">Finds the solution to the function \( \boldsymbol Ax = \boldsymbol b \), where \(A\) is a symmetric, positive-definite matrix and \(x\) and \( \boldsymbol b \) are vectors. <br /></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:group__grp__bayes"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group__grp__bayes.html">Naive Bayes Classification</a></td></tr>
+<tr class="memdesc:group__grp__bayes"><td class="mdescLeft">&#160;</td><td class="mdescRight">Constructs a classification model from a dataset where each attribute independently contributes to the probability that a data point belongs to a category. <br /></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:group__grp__sample"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group__grp__sample.html">Random Sampling</a></td></tr>
+<tr class="memdesc:group__grp__sample"><td class="mdescLeft">&#160;</td><td class="mdescRight">Provides utility functions for sampling operations. <br /></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:group__grp__nene"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group__grp__nene.html">Nearest Neighbors</a></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+</table>
+</div><!-- contents -->
+</div><!-- doc-content -->
+<!-- start footer part -->
+<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
+  <ul>
+    <li class="footer">Generated on Wed Dec 27 2017 19:05:58 for MADlib by
+    <a href="http://www.doxygen.org/index.html">
+    <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.13 </li>
+  </ul>
+</div>
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/madlib-site/blob/6c103d3e/docs/v1.13/group__grp__early__stage.js
----------------------------------------------------------------------
diff --git a/docs/v1.13/group__grp__early__stage.js b/docs/v1.13/group__grp__early__stage.js
new file mode 100644
index 0000000..7e4773a
--- /dev/null
+++ b/docs/v1.13/group__grp__early__stage.js
@@ -0,0 +1,7 @@
+var group__grp__early__stage =
+[
+    [ "Conjugate Gradient", "group__grp__cg.html", null ],
+    [ "Naive Bayes Classification", "group__grp__bayes.html", null ],
+    [ "Random Sampling", "group__grp__sample.html", null ],
+    [ "Nearest Neighbors", "group__grp__nene.html", "group__grp__nene" ]
+];
\ No newline at end of file