You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by ok...@apache.org on 2017/12/28 22:52:07 UTC
[33/51] [abbrv] [partial] madlib-site git commit: Additional updates for 1.13 release

http://git-wip-us.apache.org/repos/asf/madlib-site/blob/6c103d3e/docs/v1.13/group__grp__arraysmatrix.html
----------------------------------------------------------------------
diff --git a/docs/v1.13/group__grp__arraysmatrix.html b/docs/v1.13/group__grp__arraysmatrix.html
new file mode 100644
index 0000000..12e9466
--- /dev/null
+++ b/docs/v1.13/group__grp__arraysmatrix.html
@@ -0,0 +1,175 @@
+<!-- HTML header for doxygen 1.8.4-->
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.13"/>
+<meta name="keywords" content="madlib,postgres,greenplum,machine learning,data mining,deep learning,ensemble methods,data science,market basket analysis,affinity analysis,pca,lda,regression,elastic net,huber white,proportional hazards,k-means,latent dirichlet allocation,bayes,support vector machines,svm"/>
+<title>MADlib: Arrays and Matrices</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="navtree.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="resize.js"></script>
+<script type="text/javascript" src="navtreedata.js"></script>
+<script type="text/javascript" src="navtree.js"></script>
+<script type="text/javascript">
+  $(document).ready(initResizable);
+</script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/searchdata.js"></script>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+  $(document).ready(function() { init_search(); });
+</script>
+<script type="text/x-mathjax-config">
+  MathJax.Hub.Config({
+    extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
+    jax: ["input/TeX","output/HTML-CSS"],
+});
+</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
+<!-- hack in the navigation tree -->
+<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
+<!-- google analytics -->
+<script>
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+  ga('create', 'UA-45382226-1', 'madlib.apache.org');
+  ga('send', 'pageview');
+</script>
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td id="projectlogo"><a href="http://madlib.apache.org"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">
+   <span id="projectnumber">1.13</span>
+   </div>
+   <div id="projectbrief">User Documentation for MADlib</div>
+  </td>
+   <td>        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+</td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.13 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
+</div><!-- top -->
+<div id="side-nav" class="ui-resizable side-nav-resizable">
+  <div id="nav-tree">
+    <div id="nav-tree-contents">
+      <div id="nav-sync" class="sync"></div>
+    </div>
+  </div>
+  <div id="splitbar" style="-moz-user-select:none;" 
+       class="ui-resizable-handle">
+  </div>
+</div>
+<script type="text/javascript">
+$(document).ready(function(){initNavTree('group__grp__arraysmatrix.html','');});
+</script>
+<div id="doc-content">
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+</div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+<div class="header">
+  <div class="summary">
+<a href="#groups">Modules</a>  </div>
+  <div class="headertitle">
+<div class="title">Arrays and Matrices<div class="ingroups"><a class="el" href="group__grp__datatrans.html">Data Types and Transformations</a></div></div>  </div>
+</div><!--header-->
+<div class="contents">
+<a name="details" id="details"></a><h2 class="groupheader">Detailed Description</h2>
+<p>These modules provide basic mathematical operations to be run on array and matrices.</p>
+<p>For a distributed system, a matrix cannot simply be represented as a 2D array of numbers in memory. <b>We provide two forms of distributed representation of a matrix</b>:</p>
+<ul>
+<li>Dense: The matrix is represented as a distributed collection of 1-D arrays. An example 3x10 matrix would be the below table: <pre>
+ row_id |         row_vec
+--------+-------------------------
+   1    | {9,6,5,8,5,6,6,3,10,8}
+   2    | {8,2,2,6,6,10,2,1,9,9}
+   3    | {3,9,9,9,8,6,3,9,5,6}
+</pre></li>
+<li>Sparse: The matrix is represented using the row and column indices for each non-zero entry of the matrix. Example: <pre>
+ row_id | col_id | value
+--------+--------+-------
+      1 |      1 |     9
+      1 |      5 |     6
+      1 |      6 |     6
+      2 |      1 |     8
+      3 |      1 |     3
+      3 |      2 |     9
+      4 |      7 |     0
+(6 rows)
+</pre> &#160; All matrix operations work with either form of representation.</li>
+</ul>
+<p>In many cases, a matrix function can be <b>decomposed to vector operations applied independently on each row of a matrix (or corresponding rows of two matrices)</b>. We have also provided access to these internal vector operations (<a class="el" href="group__grp__array.html">Array Operations</a>) for greater flexibility. Matrix operations like <em>matrix_add</em> use the corresponding vector operation (<em>array_add</em>) and also include additional validation and formating. Other functions like <em>matrix_mult</em> are complex and use a combination of such vector operations and other SQL operations.</p>
+<p><b>It's important to note</b> that these array functions are only available for the dense format representation of the matrix. In general, the scope of a single array function invocation is limited to only an array (1-dimensional or 2-dimensional) that fits in memory. When such function is executed on a table of arrays, the function is called multiple times - once for each array (or pair of arrays). On contrary, scope of a single matrix function invocation is the complete matrix stored as a distributed table. </p>
+<table class="memberdecls">
+<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="groups"></a>
+Modules</h2></td></tr>
+<tr class="memitem:group__grp__array"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group__grp__array.html">Array Operations</a></td></tr>
+<tr class="memdesc:group__grp__array"><td class="mdescLeft">&#160;</td><td class="mdescRight">Provides fast array operations supporting other MADlib modules. <br /></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:group__grp__matrix"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group__grp__matrix.html">Matrix Operations</a></td></tr>
+<tr class="memdesc:group__grp__matrix"><td class="mdescLeft">&#160;</td><td class="mdescRight">Provides fast matrix operations supporting other MADlib modules. <br /></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:group__grp__matrix__factorization"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group__grp__matrix__factorization.html">Matrix Factorization</a></td></tr>
+<tr class="memdesc:group__grp__matrix__factorization"><td class="mdescLeft">&#160;</td><td class="mdescRight">Matrix Factorization methods including Singular Value Decomposition and Low-rank Matrix Factorization. <br /></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:group__grp__linalg"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group__grp__linalg.html">Norms and Distance functions</a></td></tr>
+<tr class="memdesc:group__grp__linalg"><td class="mdescLeft">&#160;</td><td class="mdescRight">Provides utility functions for basic linear algebra operations. <br /></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:group__grp__svec"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group__grp__svec.html">Sparse Vectors</a></td></tr>
+<tr class="memdesc:group__grp__svec"><td class="mdescLeft">&#160;</td><td class="mdescRight">Implements a sparse vector data type that provides compressed storage of vectors that may have many duplicate elements. <br /></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+</table>
+</div><!-- contents -->
+</div><!-- doc-content -->
+<!-- start footer part -->
+<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
+  <ul>
+    <li class="footer">Generated on Wed Dec 27 2017 19:05:57 for MADlib by
+    <a href="http://www.doxygen.org/index.html">
+    <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.13 </li>
+  </ul>
+</div>
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/madlib-site/blob/6c103d3e/docs/v1.13/group__grp__arraysmatrix.js
----------------------------------------------------------------------
diff --git a/docs/v1.13/group__grp__arraysmatrix.js b/docs/v1.13/group__grp__arraysmatrix.js
new file mode 100644
index 0000000..5e12094
--- /dev/null
+++ b/docs/v1.13/group__grp__arraysmatrix.js
@@ -0,0 +1,8 @@
+var group__grp__arraysmatrix =
+[
+    [ "Array Operations", "group__grp__array.html", null ],
+    [ "Matrix Operations", "group__grp__matrix.html", null ],
+    [ "Matrix Factorization", "group__grp__matrix__factorization.html", "group__grp__matrix__factorization" ],
+    [ "Norms and Distance functions", "group__grp__linalg.html", null ],
+    [ "Sparse Vectors", "group__grp__svec.html", null ]
+];
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/madlib-site/blob/6c103d3e/docs/v1.13/group__grp__assoc__rules.html
----------------------------------------------------------------------
diff --git a/docs/v1.13/group__grp__assoc__rules.html b/docs/v1.13/group__grp__assoc__rules.html
new file mode 100644
index 0000000..4339126
--- /dev/null
+++ b/docs/v1.13/group__grp__assoc__rules.html
@@ -0,0 +1,368 @@
+<!-- HTML header for doxygen 1.8.4-->
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.13"/>
+<meta name="keywords" content="madlib,postgres,greenplum,machine learning,data mining,deep learning,ensemble methods,data science,market basket analysis,affinity analysis,pca,lda,regression,elastic net,huber white,proportional hazards,k-means,latent dirichlet allocation,bayes,support vector machines,svm"/>
+<title>MADlib: Apriori Algorithm</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="navtree.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="resize.js"></script>
+<script type="text/javascript" src="navtreedata.js"></script>
+<script type="text/javascript" src="navtree.js"></script>
+<script type="text/javascript">
+  $(document).ready(initResizable);
+</script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/searchdata.js"></script>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+  $(document).ready(function() { init_search(); });
+</script>
+<script type="text/x-mathjax-config">
+  MathJax.Hub.Config({
+    extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
+    jax: ["input/TeX","output/HTML-CSS"],
+});
+</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
+<!-- hack in the navigation tree -->
+<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
+<!-- google analytics -->
+<script>
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+  ga('create', 'UA-45382226-1', 'madlib.apache.org');
+  ga('send', 'pageview');
+</script>
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td id="projectlogo"><a href="http://madlib.apache.org"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">
+   <span id="projectnumber">1.13</span>
+   </div>
+   <div id="projectbrief">User Documentation for MADlib</div>
+  </td>
+   <td>        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+</td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.13 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
+</div><!-- top -->
+<div id="side-nav" class="ui-resizable side-nav-resizable">
+  <div id="nav-tree">
+    <div id="nav-tree-contents">
+      <div id="nav-sync" class="sync"></div>
+    </div>
+  </div>
+  <div id="splitbar" style="-moz-user-select:none;" 
+       class="ui-resizable-handle">
+  </div>
+</div>
+<script type="text/javascript">
+$(document).ready(function(){initNavTree('group__grp__assoc__rules.html','');});
+</script>
+<div id="doc-content">
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+</div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+<div class="header">
+  <div class="headertitle">
+<div class="title">Apriori Algorithm<div class="ingroups"><a class="el" href="group__grp__unsupervised.html">Unsupervised Learning</a> &raquo; <a class="el" href="group__grp__association__rules.html">Association Rules</a></div></div>  </div>
+</div><!--header-->
+<div class="contents">
+<div class="toc"><b>Contents</b> <ul>
+<li>
+<a href="#rules">Rules</a> </li>
+<li>
+<a href="#algorithm">Apriori Algorithm</a> </li>
+<li>
+<a href="#syntax">Function Syntax</a> </li>
+<li>
+<a href="#examples">Examples</a> </li>
+<li>
+<a href="#notes">Notes</a> </li>
+<li>
+<a href="#literature">Literature</a> </li>
+<li>
+<a href="#related">Related Topics</a> </li>
+</ul>
+</div><p>This module implements the association rules data mining technique on a transactional data set. Given the names of a table and the columns, minimum support and confidence values, this function generates all single and multidimensional association rules that meet the minimum thresholds.</p>
+<p>Association rule mining is a widely used technique for discovering relationships between variables in a large data set (e.g., items in a store that are commonly purchased together). The classic market basket analysis example using association rules is the "beer and diapers" rule. According to data mining urban legend, a study of customer purchase behavior in a supermarket found that men often purchased beer and diapers together. After making this discovery, the managers strategically placed beer and diapers closer together on the shelves and saw a dramatic increase in sales. In addition to market basket analysis, association rules are also used in bioinformatics, web analytics, and several other fields.</p>
+<p>This type of data mining algorithm uses transactional data. Every transaction event has a unique identification, and each transaction consists of a set of items (or itemset). Purchases are considered binary (either it was purchased or not), and this implementation does not take into consideration the quantity of each item. For the MADlib association rules function, it is assumed that the data is stored in two columns with one item and transaction id per row. Transactions with multiple items will span multiple rows with one row per item.</p>
+<pre>
+    trans_id | product
+    ---------+---------
+           1 | 1
+           1 | 2
+           1 | 3
+           1 | 4
+           2 | 3
+           2 | 4
+           2 | 5
+           3 | 1
+           3 | 4
+           3 | 6
+    ...
+</pre><p><a class="anchor" id="rules"></a></p><dl class="section user"><dt>Rules</dt><dd></dd></dl>
+<p>Association rules take the form "If X, then Y", where X and Y are non-empty itemsets. X and Y are called the antecedent and consequent, or the left-hand-side and right-hand-side, of the rule respectively. Using our previous example, the association rule may state "If {diapers}, then {beer}" with .2 support and .85 confidence.</p>
+<p>The following metrics are defined for any given itemset "X".</p><ul>
+<li>Count: The number of transactions that contain X</li>
+<li>Support: The ratio of transactions that contain X to all transactions, T <p class="formulaDsp">
+\[ S (X) = \frac{Total X}{Total transactions} \]
+</p>
+</li>
+</ul>
+<p>Given any association rule "If X, then Y", the association rules function will also calculate the following metrics:</p><ul>
+<li>Count: The number of transactions that contain X,Y</li>
+<li>Support: The ratio of transactions that contain X,Y to all transactions, T <p class="formulaDsp">
+\[ S (X \Rightarrow Y) = \frac{Total(X \cup Y)}{Total transactions} \]
+</p>
+</li>
+<li>Confidence: The ratio of transactions that contain \( X,Y \) to transactions that contain \( X \). One could view this metric as the conditional probability of \( Y \) , given \( X \) . \( P(Y|X) \) <p class="formulaDsp">
+\[ C (X \Rightarrow Y) = \frac{s(X \cap Y )}{s(X)} \]
+</p>
+</li>
+<li>Lift: The ratio of observed support of \( X,Y \) to the expected support of \( X,Y \) , assuming \( X \) and \( Y \) are independent. <p class="formulaDsp">
+\[ L (X \Rightarrow Y) = \frac{s(X \cap Y )}{s(X) \cdot s(Y)} \]
+</p>
+</li>
+<li><p class="startli">Conviction: The ratio of expected support of \( X \) occurring without \( Y \) assuming \( X \) and \( \neg Y \) are independent, to the observed support of \( X \) occuring without \( Y \). If conviction is greater than 1, then this metric shows that incorrect predictions ( \( X \Rightarrow Y \) ) occur less often than if these two actions were independent. This metric can be viewed as the ratio that the association rule would be incorrect if the actions were independent (i.e. a conviction of 1.5 indicates that if the variables were independent, this rule would be incorrect 50% more often.)</p>
+<p class="formulaDsp">
+\[ Conv (X \Rightarrow Y) = \frac{1 - S(Y)}{1 - C(X \Rightarrow Y)} \]
+</p>
+</li>
+</ul>
+<p><a class="anchor" id="algorithm"></a></p><dl class="section user"><dt>Apriori Algorithm</dt><dd></dd></dl>
+<p>Although there are many algorithms that generate association rules, the classic algorithm is called Apriori [1] which we have implemented in this module. It is a breadth-first search, as opposed to depth-first searches like Eclat. Frequent itemsets of order \( n \) are generated from sets of order \( n - 1 \). Using the downward closure property, all sets must have frequent subsets. There are two steps in this algorithm; generating frequent itemsets, and using these itemsets to construct the association rules. A simplified version of the algorithm is as follows, and assumes a minimum level of support and confidence is provided:</p>
+<p><em>Initial</em> <em>step</em> </p><ol type="1">
+<li>Generate all itemsets of order 1.</li>
+<li>Eliminate itemsets that have support less than minimum support.</li>
+</ol>
+<p><em>Main</em> <em>algorithm</em> </p><ol type="1">
+<li>For \( n \ge 2 \), generate itemsets of order \( n \) by combining the itemsets of order \( n - 1 \). This is done by doing the union of two itemsets that have identical items except one.</li>
+<li>Eliminate itemsets that have (n-1) order subsets with insufficient support.</li>
+<li>Eliminate itemsets with insufficient support.</li>
+<li>Repeat until itemsets cannot be generated, or maximum itemset size is exceeded.</li>
+</ol>
+<p><em>Association</em> <em>rule</em> <em>generation</em> </p>
+<p>Given a frequent itemset \( A \) generated from the Apriori algorithm, and all subsets \( B \) , we generate rules such that \( B \Rightarrow (A - B) \) meets minimum confidence requirements.</p>
+<p><a class="anchor" id="syntax"></a></p><dl class="section user"><dt>Function Syntax</dt><dd>Association rules has the following syntax: <pre class="syntax">
+assoc_rules( support,
+             confidence,
+             tid_col,
+             item_col,
+             input_table,
+             output_schema,
+             verbose,
+             max_itemset_size
+           );</pre> This generates all association rules that satisfy the specified minimum <em>support</em> and <em>confidence</em>.</dd></dl>
+<p><b>Arguments</b> </p><dl class="arglist">
+<dt>support </dt>
+<dd><p class="startdd">Minimum level of support needed for each itemset to be included in result.</p>
+<p class="enddd"></p>
+</dd>
+<dt>confidence </dt>
+<dd><p class="startdd">Minimum level of confidence needed for each rule to be included in result.</p>
+<p class="enddd"></p>
+</dd>
+<dt>tid_col </dt>
+<dd><p class="startdd">Name of the column storing the transaction ids.</p>
+<p class="enddd"></p>
+</dd>
+<dt>item_col </dt>
+<dd><p class="startdd">Name of the column storing the products.</p>
+<p class="enddd"></p>
+</dd>
+<dt>input_table </dt>
+<dd><p class="startdd">Name of the table containing the input data.</p>
+<p>The input data is expected to be of the following form: </p><pre>{TABLE|VIEW} <em>input_table</em> (
+    <em>trans_id</em> INTEGER,
+    <em>product</em> TEXT
+)</pre><p>The algorithm maps the product names to consecutive integer ids starting at 1. If they are already structured this way, then the ids will not change. </p>
+<p class="enddd"></p>
+</dd>
+<dt>output_schema </dt>
+<dd><p class="startdd">The name of the schema where the final results will be stored. The schema must be created before calling the function. Alternatively, use <code>NULL</code> to output to the current schema.</p>
+<p>The results containing the rules, support, count, confidence, lift, and conviction are stored in the table <code>assoc_rules</code> in the schema specified by <code>output_schema</code>.</p>
+<p>The table has the following columns. </p><table class="output">
+<tr>
+<th>ruleid </th><td>integer  </td></tr>
+<tr>
+<th>pre </th><td>text  </td></tr>
+<tr>
+<th>post </th><td>text  </td></tr>
+<tr>
+<th>count </th><td>integer  </td></tr>
+<tr>
+<th>support </th><td>double  </td></tr>
+<tr>
+<th>confidence </th><td>double  </td></tr>
+<tr>
+<th>lift </th><td>double  </td></tr>
+<tr>
+<th>conviction </th><td>double  </td></tr>
+</table>
+<p>On Greenplum Database or Apache HAWQ, the table is distributed by the <code>ruleid</code> column.</p>
+<p>The <code>pre</code> and <code>post</code> columns are the itemsets of left and right hand sides of the association rule respectively. The <code>support</code>, <code>confidence</code>, <code>lift</code>, and <code>conviction</code> columns are calculated as described earlier. </p>
+<p class="enddd"></p>
+</dd>
+<dt>verbose </dt>
+<dd><p class="startdd">BOOLEAN, default: FALSE. Determines if details are printed for each iteration as the algorithm progresses.</p>
+<p class="enddd"></p>
+</dd>
+<dt>max_itemset_size </dt>
+<dd>INTEGER, default: generate itemsets of all sizes. Determines the maximum size of frequent itemsets that are used for generating association rules. Must be 2 or more. This parameter can be used to reduce run time for data sets where itemset size is large.  </dd>
+</dl>
+<p><a class="anchor" id="examples"></a></p><dl class="section user"><dt>Examples</dt><dd></dd></dl>
+<p>Let's look at some sample transactional data and generate association rules.</p>
+<ol type="1">
+<li>Create an input dataset: <pre class="example">
+DROP TABLE IF EXISTS test_data;
+CREATE TABLE test_data (
+    trans_id INT,
+    product TEXT
+);
+INSERT INTO test_data VALUES (1, 'beer');
+INSERT INTO test_data VALUES (1, 'diapers');
+INSERT INTO test_data VALUES (1, 'chips');
+INSERT INTO test_data VALUES (2, 'beer');
+INSERT INTO test_data VALUES (2, 'diapers');
+INSERT INTO test_data VALUES (3, 'beer');
+INSERT INTO test_data VALUES (3, 'diapers');
+INSERT INTO test_data VALUES (4, 'beer');
+INSERT INTO test_data VALUES (4, 'chips');
+INSERT INTO test_data VALUES (5, 'beer');
+INSERT INTO test_data VALUES (6, 'beer');
+INSERT INTO test_data VALUES (6, 'diapers');
+INSERT INTO test_data VALUES (6, 'chips');
+INSERT INTO test_data VALUES (7, 'beer');
+INSERT INTO test_data VALUES (7, 'diapers');
+</pre></li>
+<li>Let \( min(support) = .25 \) and \( min(confidence) = .5 \), and the output schema is set to <code>NULL</code> indicating output to the current schema. In this example we set verbose to TRUE so that we have some insight into progress of the function. We can now generate association rules as follows: <pre class="example">
+SELECT * FROM madlib.assoc_rules( .25,            -- Support
+                                  .5,             -- Confidence
+                                  'trans_id',     -- Transaction id col
+                                  'product',      -- Product col
+                                  'test_data',    -- Input data
+                                  NULL,           -- Output schema
+                                  TRUE            -- Verbose output
+                                );
+</pre> Result (iteration details not shown): <pre class="result">
+ output_schema | output_table | total_rules |   total_time    
+---------------+--------------+-------------+-----------------
+ public        | assoc_rules  |           7 | 00:00:00.569254
+(1 row)
+</pre> The association rules are stored in the assoc_rules table: <pre class="example">
+SELECT * FROM assoc_rules
+ORDER BY support DESC, confidence DESC;
+</pre> Result: <pre class="result">
+ ruleid |       pre       |      post      | count |      support      |    confidence     |       lift        |    conviction     
+--------+-----------------+----------------+-------+-------------------+-------------------+-------------------+-------------------
+      2 | {diapers}       | {beer}         |     5 | 0.714285714285714 |                 1 |                 1 |                 0
+      6 | {beer}          | {diapers}      |     5 | 0.714285714285714 | 0.714285714285714 |                 1 |                 1
+      5 | {chips}         | {beer}         |     3 | 0.428571428571429 |                 1 |                 1 |                 0
+      4 | {chips,diapers} | {beer}         |     2 | 0.285714285714286 |                 1 |                 1 |                 0
+      1 | {chips}         | {diapers,beer} |     2 | 0.285714285714286 | 0.666666666666667 | 0.933333333333333 | 0.857142857142857
+      7 | {chips}         | {diapers}      |     2 | 0.285714285714286 | 0.666666666666667 | 0.933333333333333 | 0.857142857142857
+      3 | {beer,chips}    | {diapers}      |     2 | 0.285714285714286 | 0.666666666666667 | 0.933333333333333 | 0.857142857142857
+(7 rows)
+</pre></li>
+<li>Limit association rules generated from itemsets of size at most 2: <pre class="example">
+SELECT * FROM madlib.assoc_rules( .25,            -- Support
+                                  .5,             -- Confidence
+                                  'trans_id',     -- Transaction id col
+                                  'product',      -- Product col
+                                  'test_data',    -- Input data
+                                  NULL,           -- Output schema
+                                  TRUE,           -- Verbose output
+                                  2               -- Max itemset size
+                                );
+</pre> Result (iteration details not shown): <pre class="result">
+ output_schema | output_table | total_rules |   total_time    
+---------------+--------------+-------------+-----------------
+ public        | assoc_rules  |           4 | 00:00:00.565176
+(1 row)
+</pre> The association rules are again stored in the assoc_rules table: <pre class="example">
+SELECT * FROM assoc_rules
+ORDER BY support DESC, confidence DESC;
+</pre> Result: <pre class="result">
+ ruleid |    pre    |   post    | count |      support      |    confidence     |       lift        |    conviction     
+--------+-----------+-----------+-------+-------------------+-------------------+-------------------+-------------------
+      1 | {diapers} | {beer}    |     5 | 0.714285714285714 |                 1 |                 1 |                 0
+      2 | {beer}    | {diapers} |     5 | 0.714285714285714 | 0.714285714285714 |                 1 |                 1
+      3 | {chips}   | {beer}    |     3 | 0.428571428571429 |                 1 |                 1 |                 0
+      4 | {chips}   | {diapers} |     2 | 0.285714285714286 | 0.666666666666667 | 0.933333333333333 | 0.857142857142857
+(4 rows)
+</pre></li>
+<li>Post-processing can now be done on the output table in the case that you want to filter the results. For example, if you want any single item on the left hand side and a particular item on the right hand side: <pre class="example">
+SELECT * FROM assoc_rules WHERE array_upper(pre,1) = 1 AND post = array['beer'];
+</pre> Result: <pre class="result">
+ ruleid |    pre    |  post  | count |      support      | confidence | lift | conviction 
+--------+-----------+--------+-------+-------------------+------------+------+------------
+      1 | {diapers} | {beer} |     5 | 0.714285714285714 |          1 |    1 |          0
+      3 | {chips}   | {beer} |     3 | 0.428571428571429 |          1 |    1 |          0
+(2 rows)
+</pre></li>
+</ol>
+<p><a class="anchor" id="notes"></a></p><dl class="section user"><dt>Notes</dt><dd></dd></dl>
+<p>The association rules function always creates a table named <code>assoc_rules</code>. Make a copy of this table before running the function again if you would like to keep multiple association rule tables.</p>
+<p><a class="anchor" id="literature"></a></p><dl class="section user"><dt>Literature</dt><dd></dd></dl>
+<p>[1] <a href="https://en.wikipedia.org/wiki/Apriori_algorithm">https://en.wikipedia.org/wiki/Apriori_algorithm</a></p>
+<p><a class="anchor" id="related"></a></p><dl class="section user"><dt>Related Topics</dt><dd></dd></dl>
+<p>File <a class="el" href="assoc__rules_8sql__in.html" title="The assoc_rules function computes association rules for a given set of data. The data is assumed to h...">assoc_rules.sql_in</a> documenting the SQL function. </p>
+</div><!-- contents -->
+</div><!-- doc-content -->
+<!-- start footer part -->
+<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
+  <ul>
+    <li class="footer">Generated on Wed Dec 27 2017 19:05:57 for MADlib by
+    <a href="http://www.doxygen.org/index.html">
+    <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.13 </li>
+  </ul>
+</div>
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/madlib-site/blob/6c103d3e/docs/v1.13/group__grp__association__rules.html
----------------------------------------------------------------------
diff --git a/docs/v1.13/group__grp__association__rules.html b/docs/v1.13/group__grp__association__rules.html
new file mode 100644
index 0000000..ea514e0
--- /dev/null
+++ b/docs/v1.13/group__grp__association__rules.html
@@ -0,0 +1,139 @@
+<!-- HTML header for doxygen 1.8.4-->
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.13"/>
+<meta name="keywords" content="madlib,postgres,greenplum,machine learning,data mining,deep learning,ensemble methods,data science,market basket analysis,affinity analysis,pca,lda,regression,elastic net,huber white,proportional hazards,k-means,latent dirichlet allocation,bayes,support vector machines,svm"/>
+<title>MADlib: Association Rules</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="navtree.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="resize.js"></script>
+<script type="text/javascript" src="navtreedata.js"></script>
+<script type="text/javascript" src="navtree.js"></script>
+<script type="text/javascript">
+  $(document).ready(initResizable);
+</script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/searchdata.js"></script>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+  $(document).ready(function() { init_search(); });
+</script>
+<script type="text/x-mathjax-config">
+  MathJax.Hub.Config({
+    extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
+    jax: ["input/TeX","output/HTML-CSS"],
+});
+</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
+<!-- hack in the navigation tree -->
+<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
+<!-- google analytics -->
+<script>
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+  ga('create', 'UA-45382226-1', 'madlib.apache.org');
+  ga('send', 'pageview');
+</script>
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td id="projectlogo"><a href="http://madlib.apache.org"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">
+   <span id="projectnumber">1.13</span>
+   </div>
+   <div id="projectbrief">User Documentation for MADlib</div>
+  </td>
+   <td>        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+</td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.13 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
+</div><!-- top -->
+<div id="side-nav" class="ui-resizable side-nav-resizable">
+  <div id="nav-tree">
+    <div id="nav-tree-contents">
+      <div id="nav-sync" class="sync"></div>
+    </div>
+  </div>
+  <div id="splitbar" style="-moz-user-select:none;" 
+       class="ui-resizable-handle">
+  </div>
+</div>
+<script type="text/javascript">
+$(document).ready(function(){initNavTree('group__grp__association__rules.html','');});
+</script>
+<div id="doc-content">
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+</div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+<div class="header">
+  <div class="summary">
+<a href="#groups">Modules</a>  </div>
+  <div class="headertitle">
+<div class="title">Association Rules<div class="ingroups"><a class="el" href="group__grp__unsupervised.html">Unsupervised Learning</a></div></div>  </div>
+</div><!--header-->
+<div class="contents">
+<a name="details" id="details"></a><h2 class="groupheader">Detailed Description</h2>
+<p>A collection of methods used to uncover interesting patterns in transactional datasets. </p>
+<table class="memberdecls">
+<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="groups"></a>
+Modules</h2></td></tr>
+<tr class="memitem:group__grp__assoc__rules"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group__grp__assoc__rules.html">Apriori Algorithm</a></td></tr>
+<tr class="memdesc:group__grp__assoc__rules"><td class="mdescLeft">&#160;</td><td class="mdescRight">Computes association rules for a given set of data. <br /></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+</table>
+</div><!-- contents -->
+</div><!-- doc-content -->
+<!-- start footer part -->
+<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
+  <ul>
+    <li class="footer">Generated on Wed Dec 27 2017 19:05:57 for MADlib by
+    <a href="http://www.doxygen.org/index.html">
+    <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.13 </li>
+  </ul>
+</div>
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/madlib-site/blob/6c103d3e/docs/v1.13/group__grp__association__rules.js
----------------------------------------------------------------------
diff --git a/docs/v1.13/group__grp__association__rules.js b/docs/v1.13/group__grp__association__rules.js
new file mode 100644
index 0000000..e10c849
--- /dev/null
+++ b/docs/v1.13/group__grp__association__rules.js
@@ -0,0 +1,4 @@
+var group__grp__association__rules =
+[
+    [ "Apriori Algorithm", "group__grp__assoc__rules.html", null ]
+];
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/madlib-site/blob/6c103d3e/docs/v1.13/group__grp__bayes.html
----------------------------------------------------------------------
diff --git a/docs/v1.13/group__grp__bayes.html b/docs/v1.13/group__grp__bayes.html
new file mode 100644
index 0000000..49966a3
--- /dev/null
+++ b/docs/v1.13/group__grp__bayes.html
@@ -0,0 +1,488 @@
+<!-- HTML header for doxygen 1.8.4-->
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.13"/>
+<meta name="keywords" content="madlib,postgres,greenplum,machine learning,data mining,deep learning,ensemble methods,data science,market basket analysis,affinity analysis,pca,lda,regression,elastic net,huber white,proportional hazards,k-means,latent dirichlet allocation,bayes,support vector machines,svm"/>
+<title>MADlib: Naive Bayes Classification</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="navtree.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="resize.js"></script>
+<script type="text/javascript" src="navtreedata.js"></script>
+<script type="text/javascript" src="navtree.js"></script>
+<script type="text/javascript">
+  $(document).ready(initResizable);
+</script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/searchdata.js"></script>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+  $(document).ready(function() { init_search(); });
+</script>
+<script type="text/x-mathjax-config">
+  MathJax.Hub.Config({
+    extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
+    jax: ["input/TeX","output/HTML-CSS"],
+});
+</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
+<!-- hack in the navigation tree -->
+<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
+<!-- google analytics -->
+<script>
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+  ga('create', 'UA-45382226-1', 'madlib.apache.org');
+  ga('send', 'pageview');
+</script>
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td id="projectlogo"><a href="http://madlib.apache.org"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">
+   <span id="projectnumber">1.13</span>
+   </div>
+   <div id="projectbrief">User Documentation for MADlib</div>
+  </td>
+   <td>        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+</td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.13 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
+</div><!-- top -->
+<div id="side-nav" class="ui-resizable side-nav-resizable">
+  <div id="nav-tree">
+    <div id="nav-tree-contents">
+      <div id="nav-sync" class="sync"></div>
+    </div>
+  </div>
+  <div id="splitbar" style="-moz-user-select:none;" 
+       class="ui-resizable-handle">
+  </div>
+</div>
+<script type="text/javascript">
+$(document).ready(function(){initNavTree('group__grp__bayes.html','');});
+</script>
+<div id="doc-content">
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+</div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+<div class="header">
+  <div class="headertitle">
+<div class="title">Naive Bayes Classification<div class="ingroups"><a class="el" href="group__grp__early__stage.html">Early Stage Development</a></div></div>  </div>
+</div><!--header-->
+<div class="contents">
+<div class="toc"><b>Contents</b> <ul>
+<li>
+<a href="#train">Training Function(s)</a> </li>
+<li>
+<a href="#classify">Classify Function(s)</a> </li>
+<li>
+<a href="#probabilities">Probabilities Function(s)</a> </li>
+<li>
+<a href="#adhoc">Ad Hoc Computation</a> </li>
+<li>
+<a href="#notes">Implementation Notes</a> </li>
+<li>
+<a href="#examples">Examples</a> </li>
+<li>
+<a href="#background">Technical Background</a> </li>
+<li>
+<a href="#related">Related Topics</a> </li>
+</ul>
+</div><dl class="section warning"><dt>Warning</dt><dd><em> This MADlib method is still in early stage development. There may be some issues that will be addressed in a future version. Interface and implementation is subject to change. </em></dd></dl>
+<p>Naive Bayes refers to a stochastic model where all independent variables \( a_1, \dots, a_n \) (often referred to as attributes in this context) independently contribute to the probability that a data point belongs to a certain class \( c \).</p>
+<p>Naives Bayes classification estimates feature probabilities and class priors using maximum likelihood or Laplacian smoothing. For numeric attributes, Gaussian smoothing can be used to estimate the feature probabilities.These parameters are then used to classify new data.</p>
+<p><a class="anchor" id="train"></a></p><dl class="section user"><dt>Training Function(s)</dt><dd></dd></dl>
+<p>For data with only categorical attributes, precompute feature probabilities and class priors using the following function:</p>
+<pre class="syntax">
+create_nb_prepared_data_tables ( trainingSource,
+                                 trainingClassColumn,
+                                 trainingAttrColumn,
+                                 numAttrs,
+                                 featureProbsName,
+                                 classPriorsName
+                               )
+</pre><p>For data containing both categorical and numeric attributes, use the following form to precompute the Gaussian parameters (mean and variance) for numeric attributes alongside the feature probabilities for categorical attributes and class priors.</p>
+<pre class="syntax">
+create_nb_prepared_data_tables ( trainingSource,
+                                 trainingClassColumn,
+                                 trainingAttrColumn,
+                                 numericAttrsColumnIndices,
+                                 numAttrs,
+                                 featureProbsName,
+                                 numericAttrParamsName,
+                                 classPriorsName
+                               )
+</pre><p>The <em>trainingSource</em> is expected to be of the following form: </p><pre>{TABLE|VIEW} <em>trainingSource</em> (
+    ...
+    <em>trainingClassColumn</em> INTEGER,
+    <em>trainingAttrColumn</em> INTEGER[] OR NUMERIC[] OR FLOAT8[],
+    ...
+)</pre><p><em>numericAttrsColumnIndices</em> should be of type TEXT, specified as an array of indices (starting from 1) in the <em>trainingAttrColumn</em> attributes-array that correspond to numeric attributes.</p>
+<p>The two output tables are:</p><ul>
+<li><em>featureProbsName</em> &ndash; stores feature probabilities</li>
+<li><em>classPriorsName</em> &ndash; stores the class priors</li>
+</ul>
+<p>In addition to the above, if the function specifying numeric attributes is used, an additional table <em>numericAttrParamsName</em> is created which stores the Gaussian parameters for the numeric attributes.</p>
+<p><a class="anchor" id="classify"></a></p><dl class="section user"><dt>Classify Function(s)</dt><dd></dd></dl>
+<p>Perform Naive Bayes classification: </p><pre class="syntax">
+create_nb_classify_view ( featureProbsName,
+                          classPriorsName,
+                          classifySource,
+                          classifyKeyColumn,
+                          classifyAttrColumn,
+                          numAttrs,
+                          destName
+                        )
+</pre><p>For data with numeric attributes, use the following version:</p>
+<pre class="syntax">
+create_nb_classify_view ( featureProbsName,
+                          classPriorsName,
+                          classifySource,
+                          classifyKeyColumn,
+                          classifyAttrColumn,
+                          numAttrs,
+                          numericAttrParamsName,
+                          destName
+                        )
+</pre><p>The <b>data to classify</b> is expected to be of the following form: </p><pre>{TABLE|VIEW} <em>classifySource</em> (
+    ...
+    <em>classifyKeyColumn</em> ANYTYPE,
+    <em>classifyAttrColumn</em> INTEGER[],
+    ...
+)</pre><p>This function creates the view <code><em>destName</em></code> mapping <em>classifyKeyColumn</em> to the Naive Bayes classification. </p><pre class="result">
+key | nb_classification
+&#160;---+------------------
+...
+</pre><p><a class="anchor" id="probabilities"></a></p><dl class="section user"><dt>Probabilities Function(s)</dt><dd></dd></dl>
+<p>Compute Naive Bayes probabilities. </p><pre class="syntax">
+create_nb_probs_view( featureProbsName,
+                      classPriorsName,
+                      classifySource,
+                      classifyKeyColumn,
+                      classifyAttrColumn,
+                      numAttrs,
+                      destName
+                    )
+</pre><p>For data with numeric attributes , use the following version:</p>
+<pre class="syntax">
+create_nb_probs_view( featureProbsName,
+                      classPriorsName,
+                      classifySource,
+                      classifyKeyColumn,
+                      classifyAttrColumn,
+                      numAttrs,
+                      numericAttrParamsName,
+                      destName
+                    )
+</pre><p>This creates the view <code><em>destName</em></code> mapping <em>classifyKeyColumn</em> and every single class to the Naive Bayes probability: </p><pre class="result">
+key | class | nb_prob
+&#160;---+-------+--------
+...
+</pre><p><a class="anchor" id="adhoc"></a></p><dl class="section user"><dt>Ad Hoc Computation Function</dt><dd></dd></dl>
+<p>With ad hoc execution (no precomputation), the functions <a class="el" href="bayes_8sql__in.html#a798402280fc6db710957ae3ab58767e0" title="Create a view with columns (key, nb_classification) ">create_nb_classify_view()</a> and <a class="el" href="bayes_8sql__in.html#a163afffd0c845d325f060f74bcf02243" title="Create view with columns (key, class, nb_prob) ">create_nb_probs_view()</a> can be used in an ad-hoc fashion without the precomputation step. In this case, replace the function arguments</p>
+<pre>'<em>featureProbsName</em>', '<em>classPriorsName</em>'</pre><p> with </p><pre>'<em>trainingSource</em>', '<em>trainingClassColumn</em>', '<em>trainingAttrColumn</em>'</pre><p> for data without any any numeric attributes and with </p><pre>'<em>trainingSource</em>', '<em>trainingClassColumn</em>', '<em>trainingAttrColumn</em>', '<em>numericAttrsColumnIndices</em>'</pre><p> for data containing numeric attributes as well.</p>
+<p><a class="anchor" id="notes"></a></p><dl class="section user"><dt>Implementation Notes</dt><dd><ul>
+<li>The probabilities computed on the platforms of PostgreSQL and Greenplum database have a small difference due to the nature of floating point computation. Usually this is not important. However, if a data point has <p class="formulaDsp">
+\[ P(C=c_i \mid A) \approx P(C=c_j \mid A) \]
+</p>
+ for two classes, this data point might be classified into diferent classes on PostgreSQL and Greenplum. This leads to the differences in classifications on PostgreSQL and Greenplum for some data sets, but this should not affect the quality of the results.</li>
+<li>When two classes have equal and highest probability among all classes, the classification result is an array of these two classes, but the order of the two classes is random.</li>
+<li>The current implementation of Naive Bayes classification is suitable for discontinuous (categorial) attributes as well as continuous (numeric) attributes.<br />
+For continuous data, a typical assumption, usually used for small datasets, is that the continuous values associated with each class are distributed according to a Gaussian distribution, and the probabilities \( P(A_i = a \mid C=c) \) are estimated using the Gaussian Distribution formula: <p class="formulaDsp">
+\[ P(A_i=a \mid C=c) = \frac{1}{\sqrt{2\pi\sigma^{2}_c}}exp\left(-\frac{(a-\mu_c)^{2}}{2\sigma^{2}_c}\right) \]
+</p>
+ where \(\mu_c\) and \(\sigma^{2}_c\) are the population mean and variance of the attribute for the class \(c\).<br />
+Another common technique for handling continuous values, which is better for large data sets, is to use binning to discretize the values, and convert the continuous data into categorical bins. This approach is currently not implemented.</li>
+<li>One can provide floating point data to the Naive Bayes classification function. If the corresponding attribute index is not specified in <em>numericAttrsColumnIndices</em>, floating point numbers will be used as symbolic substitutions for categorial data. In this case, the classification would work best if there are sufficient data points for each floating point attribute. However, if floating point numbers are used as continuous data without the attribute being marked as of type numeric in <em>numericAttrsColumnIndices</em>, no warning is raised and the result may not be as expected.</li>
+</ul>
+</dd></dl>
+<p><a class="anchor" id="examples"></a></p><dl class="section user"><dt>Examples</dt><dd></dd></dl>
+<p>The following is an extremely simplified example of the above option #1 which can by verified by hand.</p>
+<ol type="1">
+<li>The training and the classification data. <pre class="example">
+SELECT * FROM training;
+</pre> Result: <pre class="result">
+ id | class | attributes
+&#160;---+-------+------------
+  1 |     1 | {1,2,3}
+  2 |     1 | {1,2,1}
+  3 |     1 | {1,4,3}
+  4 |     2 | {1,2,2}
+  5 |     2 | {0,2,2}
+  6 |     2 | {0,1,3}
+(6 rows)
+</pre> <pre class="example">
+SELECT * FROM toclassify;
+</pre> Result: <pre class="result">
+ id | attributes
+&#160;---+------------
+  1 | {0,2,1}
+  2 | {1,2,3}
+(2 rows)
+</pre></li>
+<li>Precompute feature probabilities and class priors. <pre class="example">
+SELECT madlib.create_nb_prepared_data_tables( 'training',
+                                              'class',
+                                              'attributes',
+                                              3,
+                                              'nb_feature_probs',
+                                              'nb_class_priors'
+                                            );
+</pre></li>
+<li>Optionally check the contents of the precomputed tables. <pre class="example">
+SELECT * FROM nb_class_priors;
+</pre> Result: <pre class="result">
+ class | class_cnt | all_cnt
+&#160;------+-----------+---------
+     1 |         3 |       6
+     2 |         3 |       6
+(2 rows)
+</pre> <pre class="example">
+SELECT * FROM nb_feature_probs;
+</pre> Result: <pre class="result">
+ class | attr | value | cnt | attr_cnt
+&#160;------+------+-------+-----+----------
+     1 |    1 |     0 |   0 |        2
+     1 |    1 |     1 |   3 |        2
+     1 |    2 |     1 |   0 |        3
+     1 |    2 |     2 |   2 |        3
+...
+</pre></li>
+<li>Create the view with Naive Bayes classification and check the results. <pre class="example">
+SELECT madlib.create_nb_classify_view( 'nb_feature_probs',
+                                       'nb_class_priors',
+                                       'toclassify',
+                                       'id',
+                                       'attributes',
+                                       3,
+                                       'nb_classify_view_fast'
+                                     );
+&#160;
+SELECT * FROM nb_classify_view_fast;
+</pre> Result: <pre class="result">
+ key | nb_classification
+&#160;----+-------------------
+   1 | {2}
+   2 | {1}
+(2 rows)
+</pre></li>
+<li>Look at the probabilities for each class (note that we use "Laplacian smoothing"), <pre class="example">
+SELECT madlib.create_nb_probs_view( 'nb_feature_probs',
+                                    'nb_class_priors',
+                                    'toclassify',
+                                    'id',
+                                    'attributes',
+                                    3,
+                                    'nb_probs_view_fast'
+                                  );
+&#160;
+SELECT * FROM nb_probs_view_fast;
+</pre> Result: <pre class="result">
+ key | class | nb_prob
+&#160;----+-------+---------
+   1 |     1 |     0.4
+   1 |     2 |     0.6
+   2 |     1 |    0.75
+   2 |     2 |    0.25
+(4 rows)
+</pre></li>
+</ol>
+<p>The following is an example of using a dataset with both numeric and categorical attributes</p>
+<ol type="1">
+<li>The training and the classification data. Attributes {height(numeric),weight(numeric),shoe size(categorical)}, Class{sex(1=male,2=female)} <pre class="example">
+SELECT * FROM gaussian_data;
+</pre> Result: <pre class="result">
+ id | sex |  attributes   
+&#160;----+-----+---------------
+  1 |   1 | {6,180,12}
+  2 |   1 | {5.92,190,12}
+  3 |   1 | {5.58,170,11}
+  4 |   1 | {5.92,165,11}
+  5 |   2 | {5,100,6}
+  6 |   2 | {5.5,150,6}
+  7 |   2 | {5.42,130,7}
+  8 |   2 | {5.75,150,8}
+(8 rows)
+</pre> <pre class="example">
+SELECT * FROM gaussian_test;
+</pre> Result: <pre class="result">
+ id | sex |  attributes  
+----+-----+--------------
+  9 |   1 | {5.8,180,11}
+ 10 |   2 | {5,160,6}
+(2 rows)
+</pre></li>
+<li>Precompute feature probabilities and class priors. <pre class="example">
+SELECT madlib.create_nb_prepared_data_tables( 'gaussian_data',
+                                              'sex',
+                                              'attributes',
+                                              'ARRAY[1,2]',
+                                              3,
+                                              'categ_feature_probs',
+                                              'numeric_attr_params',
+                                              'class_priors'
+                                            );
+</pre></li>
+<li>Optionally check the contents of the precomputed tables. <pre class="example">
+SELECT * FROM class_priors;
+</pre> Result: <pre class="result">
+class | class_cnt | all_cnt 
+&#160;-------+-----------+---------
+     1 |         4 |       8
+     2 |         4 |       8
+(2 rows)
+</pre> <pre class="example">
+SELECT * FROM categ_feature_probs;
+</pre> Result: <pre class="result">
+ class | attr | value | cnt | attr_cnt 
+-------+------+-------+-----+----------
+     2 |    3 |     6 |   2 |        5
+     1 |    3 |    12 |   2 |        5
+     2 |    3 |     7 |   1 |        5
+     1 |    3 |    11 |   2 |        5
+     2 |    3 |     8 |   1 |        5
+     2 |    3 |    12 |   0 |        5
+     1 |    3 |     6 |   0 |        5
+     2 |    3 |    11 |   0 |        5
+     1 |    3 |     8 |   0 |        5
+     1 |    3 |     7 |   0 |        5
+(10 rows)
+</pre> <pre class="example">
+SELECT * FROM numeric_attr_params;
+</pre> Result: <pre class="result">
+class | attr |      attr_mean       |        attr_var        
+-------+------+----------------------+------------------------
+     1 |    1 |   5.8550000000000000 | 0.03503333333333333333
+     1 |    2 | 176.2500000000000000 |   122.9166666666666667
+     2 |    1 |   5.4175000000000000 | 0.09722500000000000000
+     2 |    2 | 132.5000000000000000 |   558.3333333333333333
+(4 rows)
+</pre></li>
+<li>Create the view with Naive Bayes classification and check the results. <pre class="example">
+SELECT madlib.create_nb_classify_view( 'categ_feature_probs',
+                                       'class_priors',
+                                       'gaussian_test',
+                                       'id',
+                                       'attributes',
+                                       3,
+                                       'numeric_attr_params',
+                                       'classify_view'
+                                     );
+&#160;
+SELECT * FROM classify_view;
+</pre> Result: <pre class="result">
+ key | nb_classification
+&#160;----+-------------------
+   9 | {1}
+   10 | {2}
+(2 rows)
+</pre></li>
+<li>Look at the probabilities for each class <pre class="example">
+SELECT madlib.create_nb_probs_view( 'categ_feature_probs',
+                                       'class_priors',
+                                       'gaussian_test',
+                                       'id',
+                                       'attributes',
+                                       3,
+                                       'numeric_attr_params',
+                                       'probs_view'
+                                  );
+&#160;
+SELECT * FROM probs_view;
+</pre> Result: <pre class="result">
+ key | class |       nb_prob        
+-----+-------+----------------------
+   9 |     1 |    0.993556745948775
+   9 |     2 |  0.00644325405122553
+  10 |     1 | 5.74057538627122e-05
+  10 |     2 |    0.999942594246137
+(4 rows)
+</pre></li>
+</ol>
+<p><a class="anchor" id="background"></a></p><dl class="section user"><dt>Technical Background</dt><dd></dd></dl>
+<p>In detail, <b>Bayes'</b> theorem states that </p><p class="formulaDsp">
+\[ \Pr(C = c \mid A_1 = a_1, \dots, A_n = a_n) = \frac{\Pr(C = c) \cdot \Pr(A_1 = a_1, \dots, A_n = a_n \mid C = c)} {\Pr(A_1 = a_1, \dots, A_n = a_n)} \,, \]
+</p>
+<p> and the <b>naive</b> assumption is that </p><p class="formulaDsp">
+\[ \Pr(A_1 = a_1, \dots, A_n = a_n \mid C = c) = \prod_{i=1}^n \Pr(A_i = a_i \mid C = c) \,. \]
+</p>
+<p> Naives Bayes classification estimates feature probabilities and class priors using maximum likelihood or Laplacian smoothing. These parameters are then used to classifying new data.</p>
+<p>A Naive Bayes classifier computes the following formula: </p><p class="formulaDsp">
+\[ \text{classify}(a_1, ..., a_n) = \arg\max_c \left\{ \Pr(C = c) \cdot \prod_{i=1}^n \Pr(A_i = a_i \mid C = c) \right\} \]
+</p>
+<p> where \( c \) ranges over all classes in the training data and probabilites are estimated with relative frequencies from the training set. There are different ways to estimate the feature probabilities \( P(A_i = a \mid C = c) \). The maximum likelihood estimate takes the relative frequencies. That is: </p><p class="formulaDsp">
+\[ P(A_i = a \mid C = c) = \frac{\#(c,i,a)}{\#c} \]
+</p>
+<p> where</p><ul>
+<li>\( \#(c,i,a) \) denotes the # of training samples where attribute \( i \) is \( a \) and class is \( c \)</li>
+<li>\( \#c \) denotes the # of training samples where class is \( c \).</li>
+</ul>
+<p>Since the maximum likelihood sometimes results in estimates of "0", you might want to use a "smoothed" estimate. To do this, you add a number of "virtual" samples and make the assumption that these samples are evenly distributed among the values assumed by attribute \( i \) (that is, the set of all values observed for attribute \( a \) for any class):</p>
+<p class="formulaDsp">
+\[ P(A_i = a \mid C = c) = \frac{\#(c,i,a) + s}{\#c + s \cdot \#i} \]
+</p>
+<p> where</p><ul>
+<li>\( \#i \) denotes the # of distinct values for attribute \( i \) (for all classes)</li>
+<li>\( s \geq 0 \) denotes the smoothing factor.</li>
+</ul>
+<p>The case \( s = 1 \) is known as "Laplace smoothing". The case \( s = 0 \) trivially reduces to maximum-likelihood estimates.</p>
+<p><a class="anchor" id="literature"></a></p><dl class="section user"><dt>Literature</dt><dd></dd></dl>
+<p>[1] Tom Mitchell: Machine Learning, McGraw Hill, 1997. Book chapter <em>Generativ and Discriminative Classifiers: Naive Bayes and Logistic Regression</em> available at: <a href="http://www.cs.cmu.edu/~tom/NewChapters.html">http://www.cs.cmu.edu/~tom/NewChapters.html</a></p>
+<p>[2] Wikipedia, Naive Bayes classifier, <a href="http://en.wikipedia.org/wiki/Naive_Bayes_classifier">http://en.wikipedia.org/wiki/Naive_Bayes_classifier</a></p>
+<p><a class="anchor" id="related"></a></p><dl class="section user"><dt>Related Topics</dt><dd>File <a class="el" href="bayes_8sql__in.html" title="SQL functions for naive Bayes. ">bayes.sql_in</a> documenting the SQL functions.</dd></dl>
+</div><!-- contents -->
+</div><!-- doc-content -->
+<!-- start footer part -->
+<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
+  <ul>
+    <li class="footer">Generated on Wed Dec 27 2017 19:05:58 for MADlib by
+    <a href="http://www.doxygen.org/index.html">
+    <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.13 </li>
+  </ul>
+</div>
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/madlib-site/blob/6c103d3e/docs/v1.13/group__grp__bfs.html
----------------------------------------------------------------------
diff --git a/docs/v1.13/group__grp__bfs.html b/docs/v1.13/group__grp__bfs.html
new file mode 100644
index 0000000..618a6fb
--- /dev/null
+++ b/docs/v1.13/group__grp__bfs.html
@@ -0,0 +1,414 @@
+<!-- HTML header for doxygen 1.8.4-->
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.13"/>
+<meta name="keywords" content="madlib,postgres,greenplum,machine learning,data mining,deep learning,ensemble methods,data science,market basket analysis,affinity analysis,pca,lda,regression,elastic net,huber white,proportional hazards,k-means,latent dirichlet allocation,bayes,support vector machines,svm"/>
+<title>MADlib: Breadth-First Search</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="navtree.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="resize.js"></script>
+<script type="text/javascript" src="navtreedata.js"></script>
+<script type="text/javascript" src="navtree.js"></script>
+<script type="text/javascript">
+  $(document).ready(initResizable);
+</script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/searchdata.js"></script>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+  $(document).ready(function() { init_search(); });
+</script>
+<script type="text/x-mathjax-config">
+  MathJax.Hub.Config({
+    extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
+    jax: ["input/TeX","output/HTML-CSS"],
+});
+</script><script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js"></script>
+<!-- hack in the navigation tree -->
+<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
+<!-- google analytics -->
+<script>
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+  ga('create', 'UA-45382226-1', 'madlib.apache.org');
+  ga('send', 'pageview');
+</script>
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td id="projectlogo"><a href="http://madlib.apache.org"><img alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">
+   <span id="projectnumber">1.13</span>
+   </div>
+   <div id="projectbrief">User Documentation for MADlib</div>
+  </td>
+   <td>        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+</td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.13 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
+</div><!-- top -->
+<div id="side-nav" class="ui-resizable side-nav-resizable">
+  <div id="nav-tree">
+    <div id="nav-tree-contents">
+      <div id="nav-sync" class="sync"></div>
+    </div>
+  </div>
+  <div id="splitbar" style="-moz-user-select:none;" 
+       class="ui-resizable-handle">
+  </div>
+</div>
+<script type="text/javascript">
+$(document).ready(function(){initNavTree('group__grp__bfs.html','');});
+</script>
+<div id="doc-content">
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+</div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+<div class="header">
+  <div class="headertitle">
+<div class="title">Breadth-First Search<div class="ingroups"><a class="el" href="group__grp__graph.html">Graph</a></div></div>  </div>
+</div><!--header-->
+<div class="contents">
+<div class="toc"><b>Contents</b> <ul>
+<li>
+<a href="#bfs">Breadth-First Search</a> </li>
+<li>
+<a href="#notes">Notes</a> </li>
+<li>
+<a href="#examples">Examples</a> </li>
+<li>
+<a href="#literature">Literature</a> </li>
+</ul>
+</div><p>Given a graph and a source vertex, the breadth-first search (BFS) algorithm finds all nodes reachable from the source vertex by searching / traversing the graph in a breadth-first manner.</p>
+<p><a class="anchor" id="bfs"></a></p><dl class="section user"><dt>BFS</dt><dd><pre class="syntax">
+graph_bfs( vertex_table,
+           vertex_id,
+           edge_table,
+           edge_args,
+           source_vertex,
+           out_table,
+           max_distance,
+           directed,
+           grouping_cols
+          )
+</pre></dd></dl>
+<p><b>Arguments</b> </p><dl class="arglist">
+<dt>vertex_table </dt>
+<dd><p class="startdd">TEXT. Name of the table containing the vertex data for the graph. Must contain the column specified in the 'vertex_id' parameter below.</p>
+<p class="enddd"></p>
+</dd>
+<dt>vertex_id </dt>
+<dd><p class="startdd">TEXT, default = 'id'. Name of the column in 'vertex_table' containing vertex ids. The vertex ids are of type INTEGER with no duplicates. They do not need to be contiguous.</p>
+<p class="enddd"></p>
+</dd>
+<dt>edge_table </dt>
+<dd><p class="startdd">TEXT. Name of the table containing the edge data. The edge table must contain columns for source vertex and destination vertex. Column naming convention is described below in the 'edge_args' parameter. In addition to vertex columns, if grouping is used then the columns specified in the 'grouping_cols' parameter must be present. </p>
+<p class="enddd"></p>
+</dd>
+<dt>edge_args </dt>
+<dd><p class="startdd">TEXT. A comma-delimited string containing multiple named arguments of the form "name=value". The following parameters are supported for this string argument:</p><ul>
+<li>src (INTEGER): Name of the column containing the source vertex ids in the edge table. Default column name is 'src'. (This is not to be confused with the 'source_vertex' argument passed to the BFS function.)</li>
+<li>dest (INTEGER): Name of the column containing the destination vertex ids in the edge table. Default column name is 'dest'.</li>
+</ul>
+<p class="enddd"></p>
+</dd>
+<dt>source_vertex </dt>
+<dd><p class="startdd">INTEGER. The source vertex id for the algorithm to start. This vertex id must exist in the 'vertex_id' column of 'vertex_table'.</p>
+<p class="enddd"></p>
+</dd>
+<dt>out_table </dt>
+<dd><p class="startdd">TEXT. Name of the table to store the result of BFS. It contains a row for every vertex that is reachable from the source_vertex. In the presence of grouping columns, only those edges are used for which there are no NULL values in any grouping column. The output table will have the following columns (in addition to the grouping columns):</p><ul>
+<li>vertex_id : The id for any node reachable from source_vertex in addition to the source_vertex. Will use the input parameter 'vertex_id' for column naming.</li>
+<li>dist : The distance in number of edges (or hops) from the source_vertex to where this vertex is located.</li>
+<li>parent : The parent of this vertex in BFS traversal of the graph from source_vertex. Will use 'parent' for column naming. For the case where vertex_id = source_vertex, the value for parent is NULL.</li>
+</ul>
+<p>A summary table named &lt;out_table&gt;_summary is also created. This is an internal table that keeps a record of the input parameters. </p>
+<p class="enddd"></p>
+</dd>
+<dt>max_distance (optional) </dt>
+<dd><p class="startdd">INT, default = NULL. Maximum distance to traverse from the source vertex. When this value is null, traverses until reaches leaf node. E.g., if set to 1 will return only adjacent vertices, if set to 7 will return vertices up to a maximum distance of 7 vertices away.</p>
+<p class="enddd"></p>
+</dd>
+<dt>directed (optional) </dt>
+<dd><p class="startdd">BOOLEAN, default = FALSE. If TRUE the graph will be treated as directed, else it will be treated as an undirected graph.</p>
+<p class="enddd"></p>
+</dd>
+<dt>grouping_cols (optional) </dt>
+<dd>TEXT, default = NULL. A comma-separated list of columns used to group the input into discrete subgraphs. These columns must exist in the edge table. When this value is NULL, no grouping is used and a single BFS result is generated. <dl class="section note"><dt>Note</dt><dd>Expressions are not currently supported for 'grouping_cols'.</dd></dl>
+</dd>
+</dl>
+<p><a class="anchor" id="notes"></a></p><dl class="section user"><dt>Notes</dt><dd></dd></dl>
+<p>The graph_bfs function is a SQL implementation of the well-known breadth-first search algorithm [1] modified appropriately for a relational database. It will find any node in the graph reachable from the source_vertex only once. If a node is reachable by many different paths from the source_vertex (i.e. has more than one parent), then only one of those parents is present in the output table. The BFS result will, in general, be different for different choices of source_vertex.</p>
+<p><a class="anchor" id="examples"></a></p><dl class="section user"><dt>Examples</dt><dd></dd></dl>
+<ol type="1">
+<li>Create vertex and edge tables to represent the graph: <pre class="syntax">
+DROP TABLE IF EXISTS vertex, edge;
+CREATE TABLE vertex(
+        id INTEGER
+        );
+CREATE TABLE edge(
+        src INTEGER,
+        dest INTEGER
+        );
+INSERT INTO vertex VALUES
+(0),
+(1),
+(2),
+(3),
+(4),
+(5),
+(6),
+(7),
+(8),
+(9),
+(10),
+(11)
+;
+INSERT INTO edge VALUES
+(0, 5),
+(1, 0),
+(1, 3),
+(2, 6),
+(3, 4),
+(3, 5),
+(4, 2),
+(8, 9),
+(9, 10),
+(9, 11),
+(10, 8);
+</pre></li>
+<li>Traverse undirected graph from vertex 3: <pre class="syntax">
+DROP TABLE IF EXISTS out, out_summary;
+SELECT madlib.graph_bfs(
+                         'vertex',      -- Vertex table
+                         NULL,          -- Vertix id column (NULL means use default naming)
+                         'edge',        -- Edge table
+                         NULL,          -- Edge arguments (NULL means use default naming)
+                         3,             -- Source vertex for BFS
+                         'out');        -- Output table of nodes reachable from source_vertex
+                         -- Default values used for the other arguments
+SELECT * FROM out ORDER BY dist,id;
+</pre> <pre class="result">
+ id | dist | parent 
+----+------+--------
+  3 |    0 |       
+  1 |    1 |      3
+  4 |    1 |      3
+  5 |    1 |      3
+  0 |    2 |      1
+  2 |    2 |      4
+  6 |    3 |      2
+(7 rows)
+</pre> <pre class="syntax">
+SELECT * FROM out_summary;
+</pre> <pre class="result">
+ vertex_table | vertex_id | edge_table | edge_args | source_vertex | out_table | max_distance | directed | grouping_cols 
+--------------+-----------+------------+-----------+---------------+-----------+--------------+----------+---------------
+ vertex       | NULL      | edge       | NULL      |             3 | out       |              |          | NULL
+(1 row)
+</pre></li>
+<li>In this example, we use max_distance to limit the search distance. <pre class="syntax">
+DROP TABLE IF EXISTS out_max, out_max_summary;
+SELECT madlib.graph_bfs(
+                         'vertex',      -- Vertex table
+                         NULL,          -- Vertix id column (NULL means use default naming)
+                         'edge',        -- Edge table
+                         NULL,          -- Edge arguments (NULL means use default naming)
+                         3,             -- Source vertex for BFS
+                         'out_max',     -- Output table of nodes reachable from source_vertex
+                         2);            -- Maximum distance to traverse from source_vertex        
+                         -- Default values used for the other arguments
+SELECT * FROM out_max ORDER BY dist,id;
+</pre> <pre class="result">
+ id | dist | parent 
+----+------+--------
+  3 |    0 |       
+  1 |    1 |      3
+  4 |    1 |      3
+  5 |    1 |      3
+  0 |    2 |      1
+  2 |    2 |      4
+(6 rows)
+</pre></li>
+<li>Now let's do an example using different column names in the tables (i.e., not the defaults). Create the vertex and edge tables: <pre class="syntax">
+DROP TABLE IF EXISTS vertex_alt, edge_alt;
+CREATE TABLE vertex_alt AS SELECT id AS v_id FROM vertex;
+CREATE TABLE edge_alt AS SELECT src AS n1, dest AS n2 FROM edge;
+</pre></li>
+<li>Run BFS from vertex 8: <pre class="syntax">
+DROP TABLE IF EXISTS out_alt, out_alt_summary;
+SELECT madlib.graph_bfs(
+                         'vertex_alt',                  -- Vertex table
+                         'v_id',                        -- Vertex id column (NULL means use default naming)
+                         'edge_alt',                    -- Edge table
+                         'src=n1, dest=n2',             -- Edge arguments (NULL means use default naming)
+                         8,                             -- Source vertex for BFS
+                         'out_alt');                    -- Output table of nodes reachable from source_vertex
+SELECT * FROM out_alt ORDER BY v_id;
+</pre> <pre class="result">
+ v_id | dist | parent 
+------+------+--------
+    8 |    0 |       
+    9 |    1 |      8
+   10 |    1 |      8
+   11 |    2 |      9
+</pre></li>
+<li>Now we show an example where the graph is treated as a directed graph. <pre class="syntax">
+DROP TABLE IF EXISTS out_alt_dir, out_alt_dir_summary;
+SELECT madlib.graph_bfs(
+                         'vertex_alt',                  -- Vertex table
+                         'v_id',                        -- Vertex id column (NULL means use default naming)
+                         'edge_alt',                    -- Edge table
+                         'src=n1, dest=n2',             -- Edge arguments (NULL means use default naming)
+                         8,                             -- Source vertex for BFS
+                         'out_alt_dir',                 -- Output table of nodes reachable from source_vertex
+                         NULL,                          -- Maximum distance to traverse from source_vertex
+                         TRUE);                         -- Flag for specifying directed graph
+SELECT * FROM out_alt_dir ORDER BY v_id;
+</pre> <pre class="result">
+ v_id | dist | parent 
+------+------+--------
+    8 |    0 |       
+    9 |    1 |      8
+   10 |    2 |      9
+   11 |    2 |      9
+(4 rows)
+</pre> Notice that, with the graph being treated as directed, the parent of v_id=10 is now vertex 9 and not 8 as in the undirected case.</li>
+<li>Create a graph with 2 groups: <pre class="syntax">
+DROP TABLE IF EXISTS edge_gr;
+CREATE TABLE edge_gr(
+                  g1 INTEGER,
+                  g2 TEXT,
+                  src INTEGER,
+                  dest INTEGER
+                );
+INSERT INTO edge_gr VALUES
+(100, 'a', 0, 5),
+(100, 'a', 1, 0),
+(100, 'a', 1, 3),
+(100, 'a', 2, 6),
+(100, 'a', 3, 4),
+(100, 'a', 3, 5),
+(100, 'a', 4, 2),
+(100, 'a', 8, 9),
+(100, 'a', 9, 10),
+(100, 'a', 9, 11),
+(100, 'a', 10, 8),
+(202, 'c', 8, 9),
+(202, 'c', 9, 10),
+(202, 'c', 9, 11),
+(202, 'c', 10, 8)
+;
+</pre></li>
+<li>Run BFS for all groups from a given source_vertex. <pre class="syntax">
+DROP TABLE IF EXISTS out_gr, out_gr_summary;
+SELECT madlib.graph_bfs(
+                         'vertex',      -- Vertex table
+                         NULL,          -- Vertex id column (NULL means use default naming)
+                         'edge_gr',     -- Edge table
+                         NULL,          -- Edge arguments (NULL means use default naming)
+                         8,             -- Source vertex for BFS
+                         'out_gr',      -- Output table of nodes reachable from source_vertex
+                         NULL,          -- Maximum distance to traverse from source_vertex
+                         NULL,          -- Flag for specifying directed graph
+                         'g1,g2'        -- Grouping columns
+);
+SELECT * FROM out_gr ORDER BY g1,g2,dist,id;
+</pre> <pre class="result">
+ g1  | g2 | id | dist | parent 
+-----+----+----+------+--------
+ 100 | a  |  8 |    0 |       
+ 100 | a  |  9 |    1 |      8
+ 100 | a  | 10 |    1 |      8
+ 100 | a  | 11 |    2 |      9
+ 202 | c  |  8 |    0 |       
+ 202 | c  |  9 |    1 |      8
+ 202 | c  | 10 |    1 |      8
+ 202 | c  | 11 |    2 |      9
+(8 rows)
+</pre> If source_vertex is not present in a group, then that group will not appear in the output table. <pre class="syntax">
+DROP TABLE IF EXISTS out_gr, out_gr_summary;
+SELECT madlib.graph_bfs(
+                         'vertex',      -- Vertex table
+                         NULL,          -- Vertex id column (NULL means use default naming)
+                         'edge_gr',     -- Edge table
+                         NULL,          -- Edge arguments (NULL means use default naming)
+                         3,             -- Source vertex for BFS
+                         'out_gr',      -- Output table of nodes reachable from source_vertex
+                         NULL,          -- Maximum distance to traverse from source_vertex
+                         NULL,          -- Flag for specifying directed graph
+                         'g1,g2'        -- Grouping columns
+);
+SELECT * FROM out_gr ORDER BY g1,g2,dist,id;
+</pre> <pre class="result">
+ g1  | g2 | id | dist | parent 
+-----+----+----+------+--------
+ 100 | a  |  3 |    0 |       
+ 100 | a  |  1 |    1 |      3
+ 100 | a  |  4 |    1 |      3
+ 100 | a  |  5 |    1 |      3
+ 100 | a  |  0 |    2 |      1
+ 100 | a  |  2 |    2 |      4
+ 100 | a  |  6 |    3 |      2
+(7 rows)
+</pre></li>
+</ol>
+<p><a class="anchor" id="literature"></a></p><dl class="section user"><dt>Literature</dt><dd></dd></dl>
+<p>[1] Breadth-first Search algorithm. <a href="https://en.wikipedia.org/wiki/Breadth-first_search">https://en.wikipedia.org/wiki/Breadth-first_search</a> </p>
+</div><!-- contents -->
+</div><!-- doc-content -->
+<!-- start footer part -->
+<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
+  <ul>
+    <li class="footer">Generated on Wed Dec 27 2017 19:05:57 for MADlib by
+    <a href="http://www.doxygen.org/index.html">
+    <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.13 </li>
+  </ul>
+</div>
+</body>
+</html>