You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by xt...@apache.org on 2016/04/07 23:47:26 UTC

[23/51] [abbrv] [partial] incubator-madlib-site git commit: Update doc for 1.9 release

http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/blob/c506dd05/docs/latest/group__grp__svm.html
----------------------------------------------------------------------
diff --git a/docs/latest/group__grp__svm.html b/docs/latest/group__grp__svm.html
new file mode 100644
index 0000000..ffbd6fe
--- /dev/null
+++ b/docs/latest/group__grp__svm.html
@@ -0,0 +1,489 @@
+<!-- HTML header for doxygen 1.8.4-->
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.10"/>
+<meta name="keywords" content="madlib,postgres,greenplum,machine learning,data mining,deep learning,ensemble methods,data science,market basket analysis,affinity analysis,pca,lda,regression,elastic net,huber white,proportional hazards,k-means,latent dirichlet allocation,bayes,support vector machines,svm"/>
+<title>MADlib: Support Vector Machines</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="navtree.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="resize.js"></script>
+<script type="text/javascript" src="navtreedata.js"></script>
+<script type="text/javascript" src="navtree.js"></script>
+<script type="text/javascript">
+  $(document).ready(initResizable);
+  $(window).load(resizeHeight);
+</script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/searchdata.js"></script>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+  $(document).ready(function() { init_search(); });
+</script>
+<!-- hack in the navigation tree -->
+<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
+<!-- google analytics -->
+<script>
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+  ga('create', 'UA-45382226-1', 'madlib.net');
+  ga('send', 'pageview');
+</script>
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td id="projectlogo"><a href="http://madlib.net"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">
+   <span id="projectnumber">1.9</span>
+   </div>
+   <div id="projectbrief">User Documentation for MADlib</div>
+  </td>
+   <td>        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+</td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.10 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
+</div><!-- top -->
+<div id="side-nav" class="ui-resizable side-nav-resizable">
+  <div id="nav-tree">
+    <div id="nav-tree-contents">
+      <div id="nav-sync" class="sync"></div>
+    </div>
+  </div>
+  <div id="splitbar" style="-moz-user-select:none;" 
+       class="ui-resizable-handle">
+  </div>
+</div>
+<script type="text/javascript">
+$(document).ready(function(){initNavTree('group__grp__svm.html','');});
+</script>
+<div id="doc-content">
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+</div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+<div class="header">
+  <div class="headertitle">
+<div class="title">Support Vector Machines<div class="ingroups"><a class="el" href="group__grp__super.html">Supervised Learning</a></div></div>  </div>
+</div><!--header-->
+<div class="contents">
+<div class="toc"><b>Contents</b></p><ul>
+<li class="level1">
+<a href="#svm_classification">Classification Function</a> </li>
+<li class="level1">
+<a href="#svm_regression">Regression Function</a> </li>
+<li class="level1">
+<a href="#kernel_params">Kernel Parameters</a> </li>
+<li class="level1">
+<a href="#parameters">Other Parameters</a> </li>
+<li class="level1">
+<a href="#predict">Prediction Functions</a> </li>
+<li class="level1">
+<a href="#example">Examples</a> </li>
+<li class="level1">
+<a href="#background">Technical Background</a> </li>
+<li class="level1">
+<a href="#literature">Literature</a> </li>
+<li class="level1">
+<a href="#related">Related Topics</a> </li>
+</ul>
+</div><p>Support Vector Machines (SVMs) are models for regression and classification tasks. SVM models have two particularly desirable features: robustness in the presence of noisy data and applicability to a variety of data configurations. At its core, a <em>linear</em> SVM model is a hyperplane separating two distinct classes of data (in the case of classification problems), in such a way that the distance between the hyperplane and the nearest training data point (called the <em>margin</em>) is maximized. Vectors that lie on this margin are called support vectors. With the support vectors fixed, perturbations of vectors beyond the margin will not affect the model; this contributes to the model’s robustness. By substituting a kernel function for the usual inner product, one can approximate a large variety of decision boundaries in addition to linear hyperplanes. <a class="anchor" id="svm_classification"></a></p><dl class="section user"><dt>Classification Training Function</dt><d
 d>The SVM classification training function has the following format: <pre class="syntax">
+svm_classification(
+    source_table,
+    model_table,
+    dependent_varname,
+    independent_varname,
+    kernel_func,
+    kernel_params,
+    grouping_col,
+    params,
+    verbose
+    )
+</pre> <b>Arguments</b> <dl class="arglist">
+<dt>source_table </dt>
+<dd><p class="startdd">TEXT. Name of the table containing the training data.</p>
+<p class="enddd"></p>
+</dd>
+<dt>model_table </dt>
+<dd><p class="startdd">TEXT. Name of the output table containing the model. Details of the output tables are provided below. </p>
+<p class="enddd"></p>
+</dd>
+<dt>dependent_varname </dt>
+<dd><p class="startdd">TEXT. Name of the dependent variable column. For classification, this column can contain values of any type, but must assume exactly two distinct values. Otherwise, an error will be thrown. </p>
+<p class="enddd"></p>
+</dd>
+<dt>independent_varname </dt>
+<dd><p class="startdd">TEXT. Expression list to evaluate for the independent variables. An intercept variable is not assumed. It is common to provide an explicit intercept term by including a single constant <code>1</code> term in the independent variable list. Expression should be able to be cast into DOUBLE PRECISION []. </p>
+<p class="enddd"></p>
+</dd>
+<dt>kernel_func (optional) </dt>
+<dd><p class="startdd">TEXT, default: 'linear'. Type of kernel. Currently three kernel types are supported: 'linear', 'gaussian', and 'polynomial'. The text can be any subset of the three strings; for e.g., kernel_func='ga' will create a Gaussian kernel. </p>
+<p class="enddd"></p>
+</dd>
+<dt>kernel_params (optional) </dt>
+<dd><p class="startdd">TEXT, defaults: NULL. Parameters for non-linear kernel in a comma-separated string of key-value pairs. The actual parameters differ depending on the value of <em>kernel_func</em>. See the description below for details. </p>
+<p class="enddd"></p>
+</dd>
+<dt>grouping_col (optional) </dt>
+<dd><p class="startdd">TEXT, default: NULL. An expression list used to group the input dataset into discrete groups, which results in running one model per group. Similar to the SQL "GROUP BY" clause. When this value is NULL, no grouping is used and a single model is generated. Please note that cross validation is not supported if grouping is used.</p>
+<p class="enddd"></p>
+</dd>
+<dt>params (optional) </dt>
+<dd><p class="startdd">TEXT, default: NULL. Parameters for optimization and regularization in a comma-separated string of key-value pairs. If a list of values is provided, then cross-validation will be performed to select the <em>best</em> value from the list. See the description below for details. </p>
+<p class="enddd"></p>
+</dd>
+<dt>verbose (optional) </dt>
+<dd>BOOLEAN default: FALSE. Verbose output of the results of training. </dd>
+</dl>
+</dd></dl>
+<p><b>Output tables</b> <br />
+ The model table produced by SVM contains the following columns: </p><table  class="output">
+<tr>
+<th>coef </th><td>FLOAT8. Vector of coefficients.  </td></tr>
+<tr>
+<th>grouping_key </th><td>TEXT Identifies the group to which the datum belongs.  </td></tr>
+<tr>
+<th>num_rows_processed </th><td>BIGINT. Numbers of rows processed.  </td></tr>
+<tr>
+<th>num_rows_skipped </th><td>BIGINT. Numbers of rows skipped due to missing values or failures.  </td></tr>
+<tr>
+<th>num_iterations </th><td>INTEGER. Number of iterations completed by stochastic gradient descent algorithm. The algorithm either converged in this number of iterations or hit the maximum number specified in the optimization parameters.   </td></tr>
+<tr>
+<th>loss </th><td>FLOAT8. Value of the objective function of SVM. See Technical Background section below for more details.  </td></tr>
+<tr>
+<th>norm_of_gradient </th><td>FLOAT8. Value of the L2-norm of the (sub)-gradient of the objective function.  </td></tr>
+<tr>
+<th>__dep_var_mapping </th><td>TEXT[]. Vector of dependent variable labels. The first entry corresponds to -1 and the second to +1. For internal use only.  </td></tr>
+</table>
+<p>An auxiliary table named &lt;model_table&gt;_random is created if the kernel is not linear. It contains data needed to embed test data into a random feature space (see references [2,3]). This data is used internally by svm_predict and not meaningful on its own to the user, so you can ignore it.</p>
+<p>A summary table named &lt;model_table&gt;_summary is also created, which has the following columns: </p><table  class="output">
+<tr>
+<th>method </th><td>'svm'  </td></tr>
+<tr>
+<th>version_number </th><td>Version of MADlib which was used to generate the model.  </td></tr>
+<tr>
+<th>source_table </th><td>The data source table name.  </td></tr>
+<tr>
+<th>model_table </th><td>The model table name.  </td></tr>
+<tr>
+<th>dependent_varname </th><td>The dependent variable.  </td></tr>
+<tr>
+<th>independent_varname </th><td>The independent variables.  </td></tr>
+<tr>
+<th>kernel_func </th><td>The kernel function.  </td></tr>
+<tr>
+<th>kernel_parameters </th><td>The kernel parameters, as well as random feature map data.  </td></tr>
+<tr>
+<th>grouping_col </th><td>Columns on which to group.  </td></tr>
+<tr>
+<th>optim_params </th><td>A string containing the optimization parameters.  </td></tr>
+<tr>
+<th>reg_params </th><td>A string containing the regularization parameters.  </td></tr>
+<tr>
+<th>num_all_groups </th><td>Number of groups in SVM training.  </td></tr>
+<tr>
+<th>num_failed_groups </th><td>Number of failed groups in SVM training.  </td></tr>
+<tr>
+<th>total_rows_processed </th><td>Total numbers of rows processed in all groups.  </td></tr>
+<tr>
+<th>total_rows_skipped </th><td>Total numbers of rows skipped in all groups due to missing values or failures.  </td></tr>
+</table>
+<p><a class="anchor" id="svm_regression"></a></p><dl class="section user"><dt>Regression Training Function</dt><dd>The SVM regression training function has the following format: <pre class="syntax">
+svm_regression(source_table,
+    model_table,
+    dependent_varname,
+    independent_varname,
+    kernel_func,
+    kernel_params,
+    grouping_col,
+    params,
+    verbose
+    )
+</pre> <b>Arguments</b> </dd></dl>
+<p>Specifications for regression are largely the same as for classification. In the model table, there is no dependent variable mapping. The following arguments have specifications which differ from svm_classification:</p>
+<dl class="arglist">
+<dt>dependent_varname </dt>
+<dd><p class="startdd">TEXT. Name of the dependent variable column. For regression, this column can contain only values or expressions that can be cast to DOUBLE PRECISION. Otherwise, an error will be thrown. </p>
+<p class="enddd"></p>
+</dd>
+<dt>params (optional) </dt>
+<dd>TEXT, default: NULL. The parameters <em>epsilon</em> and <em>eps_table</em> are only meaningful for regression. See description below for more details.  </dd>
+</dl>
+<p><a class="anchor" id="kernel_params"></a></p><dl class="section user"><dt>Kernel Parameters</dt><dd>Kernel parameters are supplied in a string containing a comma-delimited list of name-value pairs. All of these named parameters are optional, and their order does not matter. You must use the format "&lt;param_name&gt; = &lt;value&gt;" to specify the value of a parameter, otherwise the parameter is ignored.</dd></dl>
+<p>When the <a class="el" href="svm_8sql__in.html#a8da862a62df18a51dc88eba9c60b5638">svm_classification()</a> <em>kernel_func</em> argument value is 'gaussian', the <em>kernel_params</em> argument is a string containing name-value pairs with the following format. (Line breaks are inserted for readability.) </p><pre class="syntax">
+  'gamma = &lt;value&gt;,
+   n_components = &lt;value&gt;,
+   random_state = &lt;value&gt;'
+</pre><p> <b>Parameters</b> </p><dl class="arglist">
+<dt>gamma </dt>
+<dd><p class="startdd">Default: 1/num_features. The parameter <img class="formulaInl" alt="$\gamma$" src="form_542.png"/> in the Radius Basis Function kernel, i.e., <img class="formulaInl" alt="$\exp(-\gamma||x-y||^2)$" src="form_541.png"/>. Choosing a proper value for <em>gamma</em> is critical to the performance of kernel machine; e.g., while a large <em>gamma</em> tends to cause overfitting, a small <em>gamma</em> will make the model too constrained to capture the complexity of the data. </p>
+<p class="enddd"></p>
+</dd>
+<dt>n_components </dt>
+<dd><p class="startdd">Default: 2*num_features. The dimensionality of the transformed feature space. A larger value lowers the variance of the estimate of the kernel but requires more memory and takes longer to train.</p>
+<p class="enddd"></p>
+</dd>
+<dt>random_state </dt>
+<dd>Default: 1. Seed used by the random number generator.  </dd>
+</dl>
+<p>When the <a class="el" href="svm_8sql__in.html#a8da862a62df18a51dc88eba9c60b5638">svm_classification()</a> <em>kernel_func</em> argument value is 'polynomial’, the <em>kernel_params</em> argument is a string containing name-value pairs with the following format. (Line breaks are inserted for readability.) </p><pre class="syntax">
+  'coef0 = &lt;value&gt;,
+   degree = &lt;value&gt;,
+   n_components = &lt;value&gt;,
+   random_state = &lt;value&gt;'
+</pre><p> <b>Parameters</b> </p><dl class="arglist">
+<dt>coef0 </dt>
+<dd><p class="startdd">Default: 1.0. The independent term <img class="formulaInl" alt="$q$" src="form_517.png"/> in <img class="formulaInl" alt="$ (\langle x,y\rangle + q)^r $" src="form_543.png"/>. Must be larger than or equal to 0. When it is 0, the polynomial kernel is in homogeneous form. </p>
+<p class="enddd"></p>
+</dd>
+<dt>degree </dt>
+<dd><p class="startdd">Default: 3. The parameter <img class="formulaInl" alt="$r$" src="form_544.png"/> in <img class="formulaInl" alt="$ (\langle x,y\rangle + q)^r $" src="form_543.png"/>. </p>
+<p class="enddd"></p>
+</dd>
+<dt>n_components </dt>
+<dd><p class="startdd">Default: 2*num_features. The dimensionality of the transformed feature space. A larger value lowers the variance of the estimate of kernel but requires more memory and takes longer to train.</p>
+<p class="enddd"></p>
+</dd>
+<dt>random_state </dt>
+<dd>Default: 1. Seed used by the random number generator.  </dd>
+</dl>
+<p><a class="anchor" id="parameters"></a></p><dl class="section user"><dt>Other Parameters</dt><dd>Parameters in this section are supplied in the <em>params</em> argument as a string containing a comma-delimited list of name-value pairs. All of these named parameters are optional, and their order does not matter. You must use the format "&lt;param_name&gt; = &lt;value&gt;" to specify the value of a parameter, otherwise the parameter is ignored.</dd></dl>
+<p>Hyperparameter optimization can be carried out using the built-in cross validation mechanism, which is activated by assigning a value greater than 1 to the parameter <em>n_folds</em> in <em>params</em>. Please note that cross validation is not supported if grouping is used. The values of a parameter to cross validate should be provided in a list. For example, if one wanted to regularize with the L1 norm and use a lambda value from the set {0.3, 0.4, 0.5}, one might input 'lambda={0.3, 0.4, 0.5}, norm=L1, n_folds=10' in <em>params</em>. Note that the use of '{}' and '[]' are both valid here.</p>
+<p>Please note that not all of the parameters below can be cross-validated. For parameters where cross validation is allowed, their default values are presented in list format; e.g., [0.01]. </p><pre class="syntax">
+  'init_stepsize = &lt;value&gt;,
+   decay_factor = &lt;value&gt;,
+   max_iter = &lt;value&gt;,
+   tolerance = &lt;value&gt;,
+   lambda = &lt;value&gt;,
+   norm = &lt;value&gt;,
+   epsilon = &lt;value&gt;,
+   eps_table = &lt;value&gt;,
+   validation_result = &lt;value&gt;,
+   n_folds = &lt;value&gt;'
+</pre><p> <b>Parameters</b> </p><dl class="arglist">
+<dt>init_stepsize </dt>
+<dd><p class="startdd">Default: [0.01]. Also known as the initial learning rate. A small value is usually desirable to ensure convergence, while a large value provides more room for progress during training. Since the best value depends on the condition number of the data, in practice one often searches in an exponential grid using built-in cross validation; e.g., "init_stepsize = [1, 0.1, 0.001]". To reduce training time, it is common to run cross validation on a subsampled dataset, since this usually provides a good estimate of the condition number of the whole dataset. Then the resulting <em>init_stepsize</em> can be run on the whole dataset. </p>
+<p class="enddd"></p>
+</dd>
+<dt>decay_factor </dt>
+<dd><p class="startdd">Default: [0.9]. Control the learning rate schedule: 0 means constant rate; -1 means inverse scaling, i.e., stepsize = init_stepsize / iteration; &gt; 0 means exponential decay, i.e., stepsize = init_stepsize * decay_factor^iteration. </p>
+<p class="enddd"></p>
+</dd>
+<dt>max_iter </dt>
+<dd><p class="startdd">Default: [100]. The maximum number of iterations allowed. </p>
+<p class="enddd"></p>
+</dd>
+<dt>tolerance </dt>
+<dd><p class="startdd">Default: 1e-10. The criterion to end iterations. The training stops whenever the difference between the training models of two consecutive iterations is smaller than <em>tolerance</em> or the iteration number is larger than <em>max_iter</em>. </p>
+<p class="enddd"></p>
+</dd>
+<dt>lambda </dt>
+<dd><p class="startdd">Default: [0.01]. Regularization parameter. Must be positive, can’t be 0. </p>
+<p class="enddd"></p>
+</dd>
+<dt>norm </dt>
+<dd><p class="startdd">Default: 'L2'. Name of the regularization, either 'L2' or 'L1'. </p>
+<p class="enddd"></p>
+</dd>
+<dt>epsilon </dt>
+<dd><p class="startdd">Default: [0.01]. Determines the <img class="formulaInl" alt="$\epsilon$" src="form_520.png"/> for <img class="formulaInl" alt="$\epsilon$" src="form_520.png"/>-regression. Ignored during classification. When training the model, differences of less than <img class="formulaInl" alt="$\epsilon$" src="form_520.png"/> between estimated labels and actual labels are ignored. A larger <img class="formulaInl" alt="$\epsilon$" src="form_520.png"/> will yield a model with fewer support vectors, but will not generalize as well to future data. Generally, it has been suggested that epsilon should increase with noisier data, and decrease with the number of samples. See [5]. </p>
+<p class="enddd"></p>
+</dd>
+<dt>eps_tabl </dt>
+<dd><p class="startdd">Default: NULL. Name of the input table that contains values of epsilon for different groups. Ignored when <em>grouping_col</em> is NULL. Define this input table if you want different epsilon values for different groups. The table consists of a column named <em>epsilon</em> which specifies the epsilon values, and one or more columns for <em>grouping_col</em>. Extra groups are ignored, and groups not present in this table will use the epsilon value specified in parameter <em>epsilon</em>. </p>
+<p class="enddd"></p>
+</dd>
+<dt>validation_result </dt>
+<dd><p class="startdd">Default: NULL. Name of the table to store the cross validation results including the values of parameters and their averaged error values. For now, simple metric like 0-1 loss is used for classification and mean square error is used for regression. The table is only created if the name is not NULL. </p>
+<p class="enddd"></p>
+</dd>
+<dt>n_folds </dt>
+<dd>Default: 0. Number of folds (k). Must be at least 2 to activate cross validation. If a value of k &gt; 2 is specified, each fold is then used as a validation set once, while the other k - 1 folds form the training set.  </dd>
+</dl>
+<p><a class="anchor" id="predict"></a></p><dl class="section user"><dt>Prediction Function</dt><dd>The prediction function is used to estimate the conditional mean given a new predictor. It has the following syntax: <pre class="syntax">
+svm_predict(model_table,
+            new_data_table,
+            id_col_name,
+            output_table)
+</pre></dd></dl>
+<p><b>Arguments</b> </p><dl class="arglist">
+<dt>model_table </dt>
+<dd><p class="startdd">TEXT. Model table produced by the training function.</p>
+<p class="enddd"></p>
+</dd>
+<dt>new_data_table </dt>
+<dd><p class="startdd">TEXT. Name of the table containing the prediction data. This table is expected to contain the same features that were used during training. The table should also contain id_col_name used for identifying each row.</p>
+<p class="enddd"></p>
+</dd>
+<dt>id_col_name </dt>
+<dd><p class="startdd">TEXT. The name of the id column in the input table.</p>
+<p class="enddd"></p>
+</dd>
+<dt>output_table </dt>
+<dd>TEXT. Name of the table where output predictions are written. If this table name is already in use, then an error is returned. The table contains the id_col_name column giving the 'id' for each prediction and the prediction columns for the dependent variable. </dd>
+</dl>
+<p><a class="anchor" id="example"></a></p><dl class="section user"><dt>Examples</dt><dd><ol type="1">
+<li>Create an input data set. <pre class="example">
+CREATE TABLE houses (id INT, tax INT, bedroom INT, bath FLOAT, price INT,
+            size INT, lot INT);
+COPY houses FROM STDIN WITH DELIMITER '|';
+  1 |  590 |       2 |    1 |  50000 |  770 | 22100
+  2 | 1050 |       3 |    2 |  85000 | 1410 | 12000
+  3 |   20 |       3 |    1 |  22500 | 1060 |  3500
+  4 |  870 |       2 |    2 |  90000 | 1300 | 17500
+  5 | 1320 |       3 |    2 | 133000 | 1500 | 30000
+  6 | 1350 |       2 |    1 |  90500 |  820 | 25700
+  7 | 2790 |       3 |  2.5 | 260000 | 2130 | 25000
+  8 |  680 |       2 |    1 | 142500 | 1170 | 22000
+  9 | 1840 |       3 |    2 | 160000 | 1500 | 19000
+ 10 | 3680 |       4 |    2 | 240000 | 2790 | 20000
+ 11 | 1660 |       3 |    1 |  87000 | 1030 | 17500
+ 12 | 1620 |       3 |    2 | 118600 | 1250 | 20000
+ 13 | 3100 |       3 |    2 | 140000 | 1760 | 38000
+ 14 | 2070 |       2 |    3 | 148000 | 1550 | 14000
+ 15 |  650 |       3 |  1.5 |  65000 | 1450 | 12000
+\.
+</pre></li>
+<li>Train a classification model. First, use a linear model. <pre class="example">
+SELECT madlib.svm_classification('houses',
+                                 'houses_svm',
+                                 'price &lt; 100000',
+                                 'ARRAY[1, tax, bath, size]'
+                           );
+</pre></li>
+<li>Next generate a nonlinear model using a Gaussian kernel. This time we specify the initial step size and maximum number of iterations to run. As part of the kernel parameter, we choose 10 as the dimension of the space where we train SVM. A larger number will lead to a more powerful model but run the risk of overfitting. As a result, the model will be a 10 dimensional vector, instead of 4 as in the case of linear model, which we will verify when we examine the models. <pre class="example">
+SELECT madlib.svm_classification( 'houses',
+                                  'houses_svm_gaussian',
+                                  'price &lt; 100000',
+                                  'ARRAY[1, tax, bath, size]',
+                                  'gaussian',
+                                  'n_components=10',
+                                  '',
+                                  'init_stepsize=1, max_iter=200'
+                           );
+</pre></li>
+<li>View the result for the linear model. <pre class="example">
+-- Set extended display on for easier reading of output
+\x ON
+SELECT * FROM houses_svm;
+</pre> Result: <pre class="result">
+-[ RECORD 1 ]+---------------------------------------------------------------------------
+coef               | [0.113989576847, -0.00226133300602, -0.0676303607996, 0.00179440841072]
+loss               | 9.21745071385
+norm_of_gradient   | 108.171180769
+num_iterations     | 100
+num_rows_processed | 15
+num_rows_skipped   | 0
+dep_var_mapping    | [False, True]
+</pre></li>
+<li>View the results from kernel SVM. <pre class="example">
+-- Set extended display on for easier reading of output
+\x ON
+SELECT * FROM houses_svm_gaussian;
+</pre> Result: <pre class="result">
+-[ RECORD 1 ]+---------------------------------------------------------------------------
+coef               | [-2.00789985255, 2.02625531256, -1.09903715824, 2.04431020735, 3.14208435644, 0.14838741816, 2.07527256499, 3.0816372960, 0.853428649407, 3.63747384926]
+loss               | 0.255909866744
+norm_of_gradient   | 0.0715415776655
+num_iterations     | 184
+num_rows_processed | 15
+num_rows_skipped   | 0
+dep_var_mapping    | [False, True]
+</pre></li>
+<li>Use the prediction function to evaluate the models. The predicted results are in the <em>prediction</em> column and the actual data is in the <em>target</em> column. For the linear model: <pre class="example">
+SELECT madlib.svm_predict('houses_svm', 'houses', 'id', 'houses_pred');
+SELECT *, price &lt; 100000 AS target FROM houses JOIN houses_pred USING (id) ORDER BY id;
+</pre> For the Gaussian model: <pre class="example">
+SELECT madlib.svm_predict('houses_svm_gaussian', 'houses', 'id', 'houses_pred_gaussian');
+SELECT *, price &lt; 100000 AS target FROM houses JOIN houses_pred_gaussian USING (id) ORDER BY id;
+</pre> Result for the Gaussian model: <pre class="result">
+ id | tax  | bedroom | bath | price  | size |  lot  | prediction | target
+----+------+---------+------+--------+------+-------+------------+--------
+  1 |  590 |       2 |    1 |  50000 |  770 | 22100 | t          | t
+  2 | 1050 |       3 |    2 |  85000 | 1410 | 12000 | t          | t
+  3 |   20 |       3 |    1 |  22500 | 1060 |  3500 | t          | t
+  4 |  870 |       2 |    2 |  90000 | 1300 | 17500 | t          | t
+  5 | 1320 |       3 |    2 | 133000 | 1500 | 30000 | f          | f
+  6 | 1350 |       2 |    1 |  90500 |  820 | 25700 | t          | t
+  7 | 2790 |       3 |  2.5 | 260000 | 2130 | 25000 | f          | f
+  8 |  680 |       2 |    1 | 142500 | 1170 | 22000 | f          | f
+  9 | 1840 |       3 |    2 | 160000 | 1500 | 19000 | f          | f
+ 10 | 3680 |       4 |    2 | 240000 | 2790 | 20000 | f          | f
+ 11 | 1660 |       3 |    1 |  87000 | 1030 | 17500 | t          | t
+ 12 | 1620 |       3 |    2 | 118600 | 1250 | 20000 | f          | f
+ 13 | 3100 |       3 |    2 | 140000 | 1760 | 38000 | f          | f
+ 14 | 2070 |       2 |    3 | 148000 | 1550 | 14000 | f          | f
+ 15 |  650 |       3 |  1.5 |  65000 | 1450 | 12000 | t          | t
+</pre> Note the result may vary somewhat with the platform configuration you are using.</li>
+</ol>
+</dd></dl>
+<p><a class="anchor" id="background"></a></p><dl class="section user"><dt>Technical Background</dt><dd></dd></dl>
+<p>To solve linear SVM, the following objective function is minimized: </p><p class="formulaDsp">
+<img class="formulaDsp" alt="\[ \underset{w,b}{\text{Minimize }} \lambda||w||^2 + \frac{1}{n}\sum_{i=1}^n \ell(y_i,f_{w,b}(x_i)) \]" src="form_521.png"/>
+</p>
+<p>where <img class="formulaInl" alt="$(x_1,y_1),\ldots,(x_n,y_n)$" src="form_522.png"/> are labeled training data and <img class="formulaInl" alt="$\ell(y,f(x))$" src="form_523.png"/> is a loss function. When performing classification, <img class="formulaInl" alt="$\ell(y,f(x)) = \max(0,1-yf(x))$" src="form_524.png"/> is the <em>hinge loss</em>. For regression, the loss function <img class="formulaInl" alt="$\ell(y,f(x)) = \max(0,|y-f(x)|-\epsilon)$" src="form_525.png"/> is used.</p>
+<p>If <img class="formulaInl" alt="$ f_{w,b}(x) = \langle w, x\rangle + b$" src="form_526.png"/> is linear, then the objective function is convex and incremental gradient descent (IGD, or SGD) can be applied to find a global minimum. See Feng, et al. [1] for more details.</p>
+<p>To learn with Gaussian or polynomial kernels, the training data is first mapped via a <em>random feature map</em> in such a way that the usual inner product in the feature space approximates the kernel function in the input space. The linear SVM training function is then run on the resulting data. See the papers [2,3] for more information on random feature maps.</p>
+<p>Also, see the book [4] by Scholkopf and Smola for more details on SVMs in general.</p>
+<p><a class="anchor" id="literature"></a></p><dl class="section user"><dt>Literature</dt><dd></dd></dl>
+<p><a class="anchor" id="svm-lit-1"></a>[1] Xixuan Feng, Arun Kumar, Ben Recht, and Christopher Re: Towards a Unified Architecture for in-RDBMS analytics, in SIGMOD Conference, 2012 <a href="http://www.eecs.berkeley.edu/~brecht/papers/12.FengEtAl.SIGMOD.pdf">http://www.eecs.berkeley.edu/~brecht/papers/12.FengEtAl.SIGMOD.pdf</a></p>
+<p><a class="anchor" id="svm-lit-2"></a>[2] Purushottam Kar and Harish Karnick: Random Feature Maps for Dot Product Kernels, Proceedings of the 15th International Conference on Artificial Intelligence and Statistics, 2012, <a href="http://machinelearning.wustl.edu/mlpapers/paper_files/AISTATS2012_KarK12.pdf">http://machinelearning.wustl.edu/mlpapers/paper_files/AISTATS2012_KarK12.pdf</a></p>
+<p><a class="anchor" id="svm-lit-3"></a>[3] Ali Rahmini and Ben Recht: Random Features for Large-Scale Kernel Machines, Neural Information Processing Systems 2007, <a href="http://www.eecs.berkeley.edu/~brecht/papers/07.rah.rec.nips.pdf">http://www.eecs.berkeley.edu/~brecht/papers/07.rah.rec.nips.pdf</a></p>
+<p><a class="anchor" id="svm-lit-4"></a>[4] Bernhard Scholkopf and Alexander Smola: Learning with Kernels, The MIT Press, Cambridge, MA, 2002.</p>
+<p><a class="anchor" id="svm-lit-5"></a>[5] Vladimir Cherkassky and Yunqian Ma: Practical Selection of SVM Parameters and Noise Estimation for SVM Regression, Neural Networks, 2004 <a href="http://www.ece.umn.edu/users/cherkass/N2002-SI-SVM-13-whole.pdf">http://www.ece.umn.edu/users/cherkass/N2002-SI-SVM-13-whole.pdf</a></p>
+<p><a class="anchor" id="related"></a></p><dl class="section user"><dt>Related Topics</dt><dd></dd></dl>
+<p>File <a class="el" href="svm_8sql__in.html" title="SQL functions for SVM (Poisson) ">svm.sql_in</a> documenting the training function</p>
+</div><!-- contents -->
+</div><!-- doc-content -->
+<!-- start footer part -->
+<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
+  <ul>
+    <li class="footer">Generated on Thu Apr 7 2016 14:24:10 for MADlib by
+    <a href="http://www.doxygen.org/index.html">
+    <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.10 </li>
+  </ul>
+</div>
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/blob/c506dd05/docs/latest/group__grp__text__analysis.html
----------------------------------------------------------------------
diff --git a/docs/latest/group__grp__text__analysis.html b/docs/latest/group__grp__text__analysis.html
index b3ac705..99c30e3 100644
--- a/docs/latest/group__grp__text__analysis.html
+++ b/docs/latest/group__grp__text__analysis.html
@@ -24,14 +24,8 @@
 <script type="text/javascript">
   $(document).ready(function() { init_search(); });
 </script>
-<script type="text/x-mathjax-config">
-  MathJax.Hub.Config({
-    extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
-    jax: ["input/TeX","output/HTML-CSS"],
-});
-</script><script src="../mathjax/MathJax.js"></script>
 <!-- hack in the navigation tree -->
-<script type="text/javascript" src="navtree_hack.js"></script>
+<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
 <link href="doxygen.css" rel="stylesheet" type="text/css" />
 <link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
 <!-- google analytics -->
@@ -40,7 +34,7 @@
   (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
   m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
   })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
-  ga('create', 'UA-45382226-1', 'auto');
+  ga('create', 'UA-45382226-1', 'madlib.net');
   ga('send', 'pageview');
 </script>
 </head>
@@ -50,10 +44,10 @@
 <table cellspacing="0" cellpadding="0">
  <tbody>
  <tr style="height: 56px;">
-  <td id="projectlogo"><a href="http://madlib.incubator.apache.org"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
+  <td id="projectlogo"><a href="http://madlib.net"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
   <td style="padding-left: 0.5em;">
    <div id="projectname">
-   <span id="projectnumber">1.8</span>
+   <span id="projectnumber">1.9</span>
    </div>
    <div id="projectbrief">User Documentation for MADlib</div>
   </td>
@@ -131,7 +125,7 @@ Modules</h2></td></tr>
 <!-- start footer part -->
 <div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
   <ul>
-    <li class="footer">Generated on Mon Jul 27 2015 20:37:45 for MADlib by
+    <li class="footer">Generated on Thu Apr 7 2016 14:24:11 for MADlib by
     <a href="http://www.doxygen.org/index.html">
     <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.10 </li>
   </ul>

http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/blob/c506dd05/docs/latest/group__grp__text__utilities.html
----------------------------------------------------------------------
diff --git a/docs/latest/group__grp__text__utilities.html b/docs/latest/group__grp__text__utilities.html
index d4ad8ca..fdf4599 100644
--- a/docs/latest/group__grp__text__utilities.html
+++ b/docs/latest/group__grp__text__utilities.html
@@ -24,14 +24,8 @@
 <script type="text/javascript">
   $(document).ready(function() { init_search(); });
 </script>
-<script type="text/x-mathjax-config">
-  MathJax.Hub.Config({
-    extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
-    jax: ["input/TeX","output/HTML-CSS"],
-});
-</script><script src="../mathjax/MathJax.js"></script>
 <!-- hack in the navigation tree -->
-<script type="text/javascript" src="navtree_hack.js"></script>
+<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
 <link href="doxygen.css" rel="stylesheet" type="text/css" />
 <link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
 <!-- google analytics -->
@@ -40,7 +34,7 @@
   (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
   m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
   })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
-  ga('create', 'UA-45382226-1', 'auto');
+  ga('create', 'UA-45382226-1', 'madlib.net');
   ga('send', 'pageview');
 </script>
 </head>
@@ -50,10 +44,10 @@
 <table cellspacing="0" cellpadding="0">
  <tbody>
  <tr style="height: 56px;">
-  <td id="projectlogo"><a href="http://madlib.incubator.apache.org"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
+  <td id="projectlogo"><a href="http://madlib.net"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
   <td style="padding-left: 0.5em;">
    <div id="projectname">
-   <span id="projectnumber">1.8</span>
+   <span id="projectnumber">1.9</span>
    </div>
    <div id="projectbrief">User Documentation for MADlib</div>
   </td>
@@ -170,9 +164,8 @@ INSERT INTO documents VALUES
 (4, 'Look at this cute hamster munching on a piece of broccoli');
 </pre></li>
 <li>Add a new column containing the words (lower-cased) in a text array <pre class="example">
-ALTER TABLE documents DROP COLUMN words;
 ALTER TABLE documents ADD COLUMN words TEXT[];
-UPDATE documents SET words = regexp_split_to_array(lower(doc_contents), E'[\s+\.]');
+UPDATE documents SET words = regexp_split_to_array(lower(doc_contents), E'[\\s+\\.]');
 </pre></li>
 <li>Compute the frequency of each word in each document <pre class="example">
 DROP TABLE IF EXISTS documents_tf;
@@ -308,7 +301,7 @@ SELECT * FROM documents_tf_vocabulary order by wordid;
 <!-- start footer part -->
 <div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
   <ul>
-    <li class="footer">Generated on Mon Jul 27 2015 20:37:45 for MADlib by
+    <li class="footer">Generated on Thu Apr 7 2016 14:24:11 for MADlib by
     <a href="http://www.doxygen.org/index.html">
     <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.10 </li>
   </ul>

http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/blob/c506dd05/docs/latest/group__grp__topic__modelling.html
----------------------------------------------------------------------
diff --git a/docs/latest/group__grp__topic__modelling.html b/docs/latest/group__grp__topic__modelling.html
index 2ceb70c..83c579b 100644
--- a/docs/latest/group__grp__topic__modelling.html
+++ b/docs/latest/group__grp__topic__modelling.html
@@ -24,14 +24,8 @@
 <script type="text/javascript">
   $(document).ready(function() { init_search(); });
 </script>
-<script type="text/x-mathjax-config">
-  MathJax.Hub.Config({
-    extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
-    jax: ["input/TeX","output/HTML-CSS"],
-});
-</script><script src="../mathjax/MathJax.js"></script>
 <!-- hack in the navigation tree -->
-<script type="text/javascript" src="navtree_hack.js"></script>
+<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
 <link href="doxygen.css" rel="stylesheet" type="text/css" />
 <link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
 <!-- google analytics -->
@@ -40,7 +34,7 @@
   (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
   m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
   })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
-  ga('create', 'UA-45382226-1', 'auto');
+  ga('create', 'UA-45382226-1', 'madlib.net');
   ga('send', 'pageview');
 </script>
 </head>
@@ -50,10 +44,10 @@
 <table cellspacing="0" cellpadding="0">
  <tbody>
  <tr style="height: 56px;">
-  <td id="projectlogo"><a href="http://madlib.incubator.apache.org"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
+  <td id="projectlogo"><a href="http://madlib.net"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
   <td style="padding-left: 0.5em;">
    <div id="projectname">
-   <span id="projectnumber">1.8</span>
+   <span id="projectnumber">1.9</span>
    </div>
    <div id="projectbrief">User Documentation for MADlib</div>
   </td>
@@ -131,7 +125,7 @@ Modules</h2></td></tr>
 <!-- start footer part -->
 <div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
   <ul>
-    <li class="footer">Generated on Mon Jul 27 2015 20:37:45 for MADlib by
+    <li class="footer">Generated on Thu Apr 7 2016 14:24:10 for MADlib by
     <a href="http://www.doxygen.org/index.html">
     <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.10 </li>
   </ul>

http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/blob/c506dd05/docs/latest/group__grp__tree.html
----------------------------------------------------------------------
diff --git a/docs/latest/group__grp__tree.html b/docs/latest/group__grp__tree.html
index 82100cc..6e703f5 100644
--- a/docs/latest/group__grp__tree.html
+++ b/docs/latest/group__grp__tree.html
@@ -24,14 +24,8 @@
 <script type="text/javascript">
   $(document).ready(function() { init_search(); });
 </script>
-<script type="text/x-mathjax-config">
-  MathJax.Hub.Config({
-    extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
-    jax: ["input/TeX","output/HTML-CSS"],
-});
-</script><script src="../mathjax/MathJax.js"></script>
 <!-- hack in the navigation tree -->
-<script type="text/javascript" src="navtree_hack.js"></script>
+<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
 <link href="doxygen.css" rel="stylesheet" type="text/css" />
 <link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
 <!-- google analytics -->
@@ -40,7 +34,7 @@
   (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
   m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
   })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
-  ga('create', 'UA-45382226-1', 'auto');
+  ga('create', 'UA-45382226-1', 'madlib.net');
   ga('send', 'pageview');
 </script>
 </head>
@@ -50,10 +44,10 @@
 <table cellspacing="0" cellpadding="0">
  <tbody>
  <tr style="height: 56px;">
-  <td id="projectlogo"><a href="http://madlib.incubator.apache.org"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
+  <td id="projectlogo"><a href="http://madlib.net"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
   <td style="padding-left: 0.5em;">
    <div id="projectname">
-   <span id="projectnumber">1.8</span>
+   <span id="projectnumber">1.9</span>
    </div>
    <div id="projectbrief">User Documentation for MADlib</div>
   </td>
@@ -134,7 +128,7 @@ Modules</h2></td></tr>
 <!-- start footer part -->
 <div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
   <ul>
-    <li class="footer">Generated on Mon Jul 27 2015 20:37:45 for MADlib by
+    <li class="footer">Generated on Thu Apr 7 2016 14:24:10 for MADlib by
     <a href="http://www.doxygen.org/index.html">
     <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.10 </li>
   </ul>

http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/blob/c506dd05/docs/latest/group__grp__tsa.html
----------------------------------------------------------------------
diff --git a/docs/latest/group__grp__tsa.html b/docs/latest/group__grp__tsa.html
index d98f19f..1324eb3 100644
--- a/docs/latest/group__grp__tsa.html
+++ b/docs/latest/group__grp__tsa.html
@@ -24,14 +24,8 @@
 <script type="text/javascript">
   $(document).ready(function() { init_search(); });
 </script>
-<script type="text/x-mathjax-config">
-  MathJax.Hub.Config({
-    extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
-    jax: ["input/TeX","output/HTML-CSS"],
-});
-</script><script src="../mathjax/MathJax.js"></script>
 <!-- hack in the navigation tree -->
-<script type="text/javascript" src="navtree_hack.js"></script>
+<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
 <link href="doxygen.css" rel="stylesheet" type="text/css" />
 <link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
 <!-- google analytics -->
@@ -40,7 +34,7 @@
   (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
   m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
   })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
-  ga('create', 'UA-45382226-1', 'auto');
+  ga('create', 'UA-45382226-1', 'madlib.net');
   ga('send', 'pageview');
 </script>
 </head>
@@ -50,10 +44,10 @@
 <table cellspacing="0" cellpadding="0">
  <tbody>
  <tr style="height: 56px;">
-  <td id="projectlogo"><a href="http://madlib.incubator.apache.org"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
+  <td id="projectlogo"><a href="http://madlib.net"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
   <td style="padding-left: 0.5em;">
    <div id="projectname">
-   <span id="projectnumber">1.8</span>
+   <span id="projectnumber">1.9</span>
    </div>
    <div id="projectbrief">User Documentation for MADlib</div>
   </td>
@@ -131,7 +125,7 @@ Modules</h2></td></tr>
 <!-- start footer part -->
 <div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
   <ul>
-    <li class="footer">Generated on Mon Jul 27 2015 20:37:45 for MADlib by
+    <li class="footer">Generated on Thu Apr 7 2016 14:24:10 for MADlib by
     <a href="http://www.doxygen.org/index.html">
     <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.10 </li>
   </ul>

http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/blob/c506dd05/docs/latest/group__grp__unsupervised.html
----------------------------------------------------------------------
diff --git a/docs/latest/group__grp__unsupervised.html b/docs/latest/group__grp__unsupervised.html
index 7fd4927..58d51c0 100644
--- a/docs/latest/group__grp__unsupervised.html
+++ b/docs/latest/group__grp__unsupervised.html
@@ -24,14 +24,8 @@
 <script type="text/javascript">
   $(document).ready(function() { init_search(); });
 </script>
-<script type="text/x-mathjax-config">
-  MathJax.Hub.Config({
-    extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
-    jax: ["input/TeX","output/HTML-CSS"],
-});
-</script><script src="../mathjax/MathJax.js"></script>
 <!-- hack in the navigation tree -->
-<script type="text/javascript" src="navtree_hack.js"></script>
+<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
 <link href="doxygen.css" rel="stylesheet" type="text/css" />
 <link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
 <!-- google analytics -->
@@ -40,7 +34,7 @@
   (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
   m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
   })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
-  ga('create', 'UA-45382226-1', 'auto');
+  ga('create', 'UA-45382226-1', 'madlib.net');
   ga('send', 'pageview');
 </script>
 </head>
@@ -50,10 +44,10 @@
 <table cellspacing="0" cellpadding="0">
  <tbody>
  <tr style="height: 56px;">
-  <td id="projectlogo"><a href="http://madlib.incubator.apache.org"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
+  <td id="projectlogo"><a href="http://madlib.net"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
   <td style="padding-left: 0.5em;">
    <div id="projectname">
-   <span id="projectnumber">1.8</span>
+   <span id="projectnumber">1.9</span>
    </div>
    <div id="projectbrief">User Documentation for MADlib</div>
   </td>
@@ -134,7 +128,7 @@ Modules</h2></td></tr>
 <!-- start footer part -->
 <div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
   <ul>
-    <li class="footer">Generated on Mon Jul 27 2015 20:37:45 for MADlib by
+    <li class="footer">Generated on Thu Apr 7 2016 14:24:10 for MADlib by
     <a href="http://www.doxygen.org/index.html">
     <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.10 </li>
   </ul>

http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/blob/c506dd05/docs/latest/group__grp__utilities.html
----------------------------------------------------------------------
diff --git a/docs/latest/group__grp__utilities.html b/docs/latest/group__grp__utilities.html
index d7089c9..129c4bf 100644
--- a/docs/latest/group__grp__utilities.html
+++ b/docs/latest/group__grp__utilities.html
@@ -24,14 +24,8 @@
 <script type="text/javascript">
   $(document).ready(function() { init_search(); });
 </script>
-<script type="text/x-mathjax-config">
-  MathJax.Hub.Config({
-    extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
-    jax: ["input/TeX","output/HTML-CSS"],
-});
-</script><script src="../mathjax/MathJax.js"></script>
 <!-- hack in the navigation tree -->
-<script type="text/javascript" src="navtree_hack.js"></script>
+<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
 <link href="doxygen.css" rel="stylesheet" type="text/css" />
 <link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
 <!-- google analytics -->
@@ -40,7 +34,7 @@
   (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
   m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
   })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
-  ga('create', 'UA-45382226-1', 'auto');
+  ga('create', 'UA-45382226-1', 'madlib.net');
   ga('send', 'pageview');
 </script>
 </head>
@@ -50,10 +44,10 @@
 <table cellspacing="0" cellpadding="0">
  <tbody>
  <tr style="height: 56px;">
-  <td id="projectlogo"><a href="http://madlib.incubator.apache.org"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
+  <td id="projectlogo"><a href="http://madlib.net"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
   <td style="padding-left: 0.5em;">
    <div id="projectname">
-   <span id="projectnumber">1.8</span>
+   <span id="projectnumber">1.9</span>
    </div>
    <div id="projectbrief">User Documentation for MADlib</div>
   </td>
@@ -121,8 +115,7 @@ $(document).ready(function(){initNavTree('group__grp__utilities.html','');});
 <li>
 <a href="#rel;ated">Related Topics</a> </li>
 </ul>
-</div><dl class="section warning"><dt>Warning</dt><dd><em> This MADlib method is still in early stage development. There may be some issues that will be addressed in a future version. Interface and implementation is subject to change. </em></dd></dl>
-<p>The utility module consists of useful utility functions to assist data scientists in using the product. Several of these functions can be used while implementing new algorithms.</p>
+</div><p>The utility module consists of useful utility functions to assist data scientists in using the product. Several of these functions can be used while implementing new algorithms.</p>
 <p><a class="anchor" id="utilities"></a></p><dl class="section user"><dt>Utility Functions</dt><dd></dd></dl>
 <table  class="output">
 <tr>
@@ -166,7 +159,7 @@ $(document).ready(function(){initNavTree('group__grp__utilities.html','');});
 <!-- start footer part -->
 <div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
   <ul>
-    <li class="footer">Generated on Mon Jul 27 2015 20:37:45 for MADlib by
+    <li class="footer">Generated on Thu Apr 7 2016 14:24:11 for MADlib by
     <a href="http://www.doxygen.org/index.html">
     <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.10 </li>
   </ul>

http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/blob/c506dd05/docs/latest/group__grp__utility__functions.html
----------------------------------------------------------------------
diff --git a/docs/latest/group__grp__utility__functions.html b/docs/latest/group__grp__utility__functions.html
index 9dad559..b060290 100644
--- a/docs/latest/group__grp__utility__functions.html
+++ b/docs/latest/group__grp__utility__functions.html
@@ -24,14 +24,8 @@
 <script type="text/javascript">
   $(document).ready(function() { init_search(); });
 </script>
-<script type="text/x-mathjax-config">
-  MathJax.Hub.Config({
-    extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
-    jax: ["input/TeX","output/HTML-CSS"],
-});
-</script><script src="../mathjax/MathJax.js"></script>
 <!-- hack in the navigation tree -->
-<script type="text/javascript" src="navtree_hack.js"></script>
+<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
 <link href="doxygen.css" rel="stylesheet" type="text/css" />
 <link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
 <!-- google analytics -->
@@ -40,7 +34,7 @@
   (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
   m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
   })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
-  ga('create', 'UA-45382226-1', 'auto');
+  ga('create', 'UA-45382226-1', 'madlib.net');
   ga('send', 'pageview');
 </script>
 </head>
@@ -50,10 +44,10 @@
 <table cellspacing="0" cellpadding="0">
  <tbody>
  <tr style="height: 56px;">
-  <td id="projectlogo"><a href="http://madlib.incubator.apache.org"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
+  <td id="projectlogo"><a href="http://madlib.net"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
   <td style="padding-left: 0.5em;">
    <div id="projectname">
-   <span id="projectnumber">1.8</span>
+   <span id="projectnumber">1.9</span>
    </div>
    <div id="projectbrief">User Documentation for MADlib</div>
   </td>
@@ -124,13 +118,11 @@ Modules</h2></td></tr>
 <tr class="memitem:group__grp__utilities"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group__grp__utilities.html">Developer Database Functions</a></td></tr>
 <tr class="memdesc:group__grp__utilities"><td class="mdescLeft">&#160;</td><td class="mdescRight">Provides a collection of user-defined functions for performing common tasks in the database. <br /></td></tr>
 <tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:group__grp__pca"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group__grp__pca.html">Dimensionality Reduction</a></td></tr>
-<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:group__grp__data__prep"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group__grp__data__prep.html">Encoding Categorical Variables</a></td></tr>
-<tr class="memdesc:group__grp__data__prep"><td class="mdescLeft">&#160;</td><td class="mdescRight">Provides utility functions helpful for data preparation before modeling. <br /></td></tr>
-<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:group__grp__linear__solver"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group__grp__linear__solver.html">Linear Solvers</a></td></tr>
 <tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:group__grp__path"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group__grp__path.html">Path Functions</a></td></tr>
+<tr class="memdesc:group__grp__path"><td class="mdescLeft">&#160;</td><td class="mdescRight">Path Functions. <br /></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:group__grp__pmml"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group__grp__pmml.html">PMML Export</a></td></tr>
 <tr class="memdesc:group__grp__pmml"><td class="mdescLeft">&#160;</td><td class="mdescRight">Implements the PMML XML standard to describe and exchange models produced by data mining and machine learning algorithms. <br /></td></tr>
 <tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
@@ -142,7 +134,7 @@ Modules</h2></td></tr>
 <!-- start footer part -->
 <div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
   <ul>
-    <li class="footer">Generated on Mon Jul 27 2015 20:37:45 for MADlib by
+    <li class="footer">Generated on Thu Apr 7 2016 14:24:11 for MADlib by
     <a href="http://www.doxygen.org/index.html">
     <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.10 </li>
   </ul>

http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/blob/c506dd05/docs/latest/group__grp__utility__functions.js
----------------------------------------------------------------------
diff --git a/docs/latest/group__grp__utility__functions.js b/docs/latest/group__grp__utility__functions.js
index 0800d05..3c3c53b 100644
--- a/docs/latest/group__grp__utility__functions.js
+++ b/docs/latest/group__grp__utility__functions.js
@@ -1,9 +1,8 @@
 var group__grp__utility__functions =
 [
     [ "Developer Database Functions", "group__grp__utilities.html", null ],
-    [ "Dimensionality Reduction", "group__grp__pca.html", "group__grp__pca" ],
-    [ "Encoding Categorical Variables", "group__grp__data__prep.html", null ],
     [ "Linear Solvers", "group__grp__linear__solver.html", "group__grp__linear__solver" ],
+    [ "Path Functions", "group__grp__path.html", null ],
     [ "PMML Export", "group__grp__pmml.html", null ],
     [ "Text Analysis", "group__grp__text__analysis.html", "group__grp__text__analysis" ]
 ];
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/blob/c506dd05/docs/latest/group__grp__validation.html
----------------------------------------------------------------------
diff --git a/docs/latest/group__grp__validation.html b/docs/latest/group__grp__validation.html
index 4b89ee0..03146d5 100644
--- a/docs/latest/group__grp__validation.html
+++ b/docs/latest/group__grp__validation.html
@@ -24,14 +24,8 @@
 <script type="text/javascript">
   $(document).ready(function() { init_search(); });
 </script>
-<script type="text/x-mathjax-config">
-  MathJax.Hub.Config({
-    extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
-    jax: ["input/TeX","output/HTML-CSS"],
-});
-</script><script src="../mathjax/MathJax.js"></script>
 <!-- hack in the navigation tree -->
-<script type="text/javascript" src="navtree_hack.js"></script>
+<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
 <link href="doxygen.css" rel="stylesheet" type="text/css" />
 <link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
 <!-- google analytics -->
@@ -40,7 +34,7 @@
   (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
   m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
   })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
-  ga('create', 'UA-45382226-1', 'auto');
+  ga('create', 'UA-45382226-1', 'madlib.net');
   ga('send', 'pageview');
 </script>
 </head>
@@ -50,10 +44,10 @@
 <table cellspacing="0" cellpadding="0">
  <tbody>
  <tr style="height: 56px;">
-  <td id="projectlogo"><a href="http://madlib.incubator.apache.org"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
+  <td id="projectlogo"><a href="http://madlib.net"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
   <td style="padding-left: 0.5em;">
    <div id="projectname">
-   <span id="projectnumber">1.8</span>
+   <span id="projectnumber">1.9</span>
    </div>
    <div id="projectbrief">User Documentation for MADlib</div>
   </td>
@@ -258,7 +252,7 @@ SELECT madlib.cross_validation_general
 <!-- start footer part -->
 <div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
   <ul>
-    <li class="footer">Generated on Mon Jul 27 2015 20:37:45 for MADlib by
+    <li class="footer">Generated on Thu Apr 7 2016 14:24:10 for MADlib by
     <a href="http://www.doxygen.org/index.html">
     <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.10 </li>
   </ul>