You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datafu.apache.org by mh...@apache.org on 2016/08/10 22:07:31 UTC

svn commit: r1755883 [7/49] - in /incubator/datafu/site: ./ blog/ blog/2012/01/10/ blog/2013/01/24/ blog/2013/09/04/ blog/2013/10/03/ blog/2014/04/27/ blog/2015/11/17/ blog/2016/ blog/2016/08/ blog/2016/08/10/ community/ docs/ docs/datafu/ docs/datafu/...

Added: incubator/datafu/site/docs/datafu/1.3.1/datafu/pig/hash/lsh/CosineDistanceHash.html
URL: http://svn.apache.org/viewvc/incubator/datafu/site/docs/datafu/1.3.1/datafu/pig/hash/lsh/CosineDistanceHash.html?rev=1755883&view=auto
==============================================================================
--- incubator/datafu/site/docs/datafu/1.3.1/datafu/pig/hash/lsh/CosineDistanceHash.html (added)
+++ incubator/datafu/site/docs/datafu/1.3.1/datafu/pig/hash/lsh/CosineDistanceHash.html Wed Aug 10 22:07:27 2016
@@ -0,0 +1,467 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<!-- NewPage -->
+<html lang="en">
+<head>
+<!-- Generated by javadoc (version 1.7.0_79) on Wed Aug 10 15:01:03 PDT 2016 -->
+<title>CosineDistanceHash (datafu-pig 1.3.1 API)</title>
+<meta name="date" content="2016-08-10">
+<link rel="stylesheet" type="text/css" href="../../../../stylesheet.css" title="Style">
+</head>
+<body>
+<script type="text/javascript"><!--
+    if (location.href.indexOf('is-external=true') == -1) {
+        parent.document.title="CosineDistanceHash (datafu-pig 1.3.1 API)";
+    }
+//-->
+</script>
+<noscript>
+<div>JavaScript is disabled on your browser.</div>
+</noscript>
+<!-- ========= START OF TOP NAVBAR ======= -->
+<div class="topNav"><a name="navbar_top">
+<!--   -->
+</a><a href="#skip-navbar_top" title="Skip navigation links"></a><a name="navbar_top_firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="../../../../overview-summary.html">Overview</a></li>
+<li><a href="package-summary.html">Package</a></li>
+<li class="navBarCell1Rev">Class</li>
+<li><a href="package-tree.html">Tree</a></li>
+<li><a href="../../../../deprecated-list.html">Deprecated</a></li>
+<li><a href="../../../../index-all.html">Index</a></li>
+<li><a href="../../../../help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li>Prev Class</li>
+<li><a href="../../../../datafu/pig/hash/lsh/L1PStableHash.html" title="class in datafu.pig.hash.lsh"><span class="strong">Next Class</span></a></li>
+</ul>
+<ul class="navList">
+<li><a href="../../../../index.html?datafu/pig/hash/lsh/CosineDistanceHash.html" target="_top">Frames</a></li>
+<li><a href="CosineDistanceHash.html" target="_top">No Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_top">
+<li><a href="../../../../allclasses-noframe.html">All Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_top");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<div>
+<ul class="subNavList">
+<li>Summary:&nbsp;</li>
+<li><a href="#nested_classes_inherited_from_class_org.apache.pig.EvalFunc">Nested</a>&nbsp;|&nbsp;</li>
+<li><a href="#fields_inherited_from_class_datafu.pig.hash.lsh.LSHFunc">Field</a>&nbsp;|&nbsp;</li>
+<li><a href="#constructor_summary">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method_summary">Method</a></li>
+</ul>
+<ul class="subNavList">
+<li>Detail:&nbsp;</li>
+<li>Field&nbsp;|&nbsp;</li>
+<li><a href="#constructor_detail">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method_detail">Method</a></li>
+</ul>
+</div>
+<a name="skip-navbar_top">
+<!--   -->
+</a></div>
+<!-- ========= END OF TOP NAVBAR ========= -->
+<!-- ======== START OF CLASS DATA ======== -->
+<div class="header">
+<div class="subTitle">datafu.pig.hash.lsh</div>
+<h2 title="Class CosineDistanceHash" class="title">Class CosineDistanceHash</h2>
+</div>
+<div class="contentContainer">
+<ul class="inheritance">
+<li>java.lang.Object</li>
+<li>
+<ul class="inheritance">
+<li>org.apache.pig.EvalFunc&lt;org.apache.pig.data.DataBag&gt;</li>
+<li>
+<ul class="inheritance">
+<li><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html" title="class in datafu.pig.hash.lsh">datafu.pig.hash.lsh.LSHFunc</a></li>
+<li>
+<ul class="inheritance">
+<li>datafu.pig.hash.lsh.CosineDistanceHash</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+<div class="description">
+<ul class="blockList">
+<li class="blockList">
+<hr>
+<br>
+<pre>public class <span class="strong">CosineDistanceHash</span>
+extends <a href="../../../../datafu/pig/hash/lsh/LSHFunc.html" title="class in datafu.pig.hash.lsh">LSHFunc</a></pre>
+<div class="block">From wikipedia's article on <a href="http://en.wikipedia.org/wiki/Locality-sensitive_hashing" target="_blank">Locality Sensitive Hashing</a>:
+ <pre>
+ Locality-sensitive hashing (LSH) is a method of performing probabilistic dimension reduction of high-dimensional data. 
+ The basic idea is to hash the input items so that similar items are mapped to the same buckets with high probability 
+ (the number of buckets being much smaller than the universe of possible input items).
+ </pre>
+ 
+ In particular, this implementation implements a locality sensitive hashing scheme which maps high-dimensional vectors which are
+ close together (with high probability) according to <a href="http://en.wikipedia.org/wiki/Cosine_similarity" target="_blank">Cosine Similarity</a>
+ into the same buckets.  Each LSH maps a vector onto one side or the other of a random hyperplane, thereby producing a single
+ bit as the hash value.  Multiple, independent, hashes can be run on the same input and aggregated together to form a more
+ broad domain than a single bit.
+ 
+ For more information, see Charikar, Moses S.. (2002). "Similarity Estimation Techniques from Rounding Algorithms". Proceedings of the 34th Annual ACM Symposium on Theory of Computing 2002.</div>
+</li>
+</ul>
+</div>
+<div class="summary">
+<ul class="blockList">
+<li class="blockList">
+<!-- ======== NESTED CLASS SUMMARY ======== -->
+<ul class="blockList">
+<li class="blockList"><a name="nested_class_summary">
+<!--   -->
+</a>
+<h3>Nested Class Summary</h3>
+<ul class="blockList">
+<li class="blockList"><a name="nested_classes_inherited_from_class_org.apache.pig.EvalFunc">
+<!--   -->
+</a>
+<h3>Nested classes/interfaces inherited from class&nbsp;org.apache.pig.EvalFunc</h3>
+<code>org.apache.pig.EvalFunc.SchemaType</code></li>
+</ul>
+</li>
+</ul>
+<!-- =========== FIELD SUMMARY =========== -->
+<ul class="blockList">
+<li class="blockList"><a name="field_summary">
+<!--   -->
+</a>
+<h3>Field Summary</h3>
+<ul class="blockList">
+<li class="blockList"><a name="fields_inherited_from_class_datafu.pig.hash.lsh.LSHFunc">
+<!--   -->
+</a>
+<h3>Fields inherited from class&nbsp;datafu.pig.hash.lsh.<a href="../../../../datafu/pig/hash/lsh/LSHFunc.html" title="class in datafu.pig.hash.lsh">LSHFunc</a></h3>
+<code><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html#lsh">lsh</a></code></li>
+</ul>
+<ul class="blockList">
+<li class="blockList"><a name="fields_inherited_from_class_org.apache.pig.EvalFunc">
+<!--   -->
+</a>
+<h3>Fields inherited from class&nbsp;org.apache.pig.EvalFunc</h3>
+<code>log, pigLogger, reporter, returnType</code></li>
+</ul>
+</li>
+</ul>
+<!-- ======== CONSTRUCTOR SUMMARY ======== -->
+<ul class="blockList">
+<li class="blockList"><a name="constructor_summary">
+<!--   -->
+</a>
+<h3>Constructor Summary</h3>
+<table class="overviewSummary" border="0" cellpadding="3" cellspacing="0" summary="Constructor Summary table, listing constructors, and an explanation">
+<caption><span>Constructors</span><span class="tabEnd">&nbsp;</span></caption>
+<tr>
+<th class="colOne" scope="col">Constructor and Description</th>
+</tr>
+<tr class="altColor">
+<td class="colOne"><code><strong><a href="../../../../datafu/pig/hash/lsh/CosineDistanceHash.html#CosineDistanceHash(java.lang.String,%20java.lang.String,%20java.lang.String)">CosineDistanceHash</a></strong>(java.lang.String&nbsp;sDim,
+                  java.lang.String&nbsp;sRepeat,
+                  java.lang.String&nbsp;sNumHashes)</code>&nbsp;</td>
+</tr>
+<tr class="rowColor">
+<td class="colOne"><code><strong><a href="../../../../datafu/pig/hash/lsh/CosineDistanceHash.html#CosineDistanceHash(java.lang.String,%20java.lang.String,%20java.lang.String,%20java.lang.String)">CosineDistanceHash</a></strong>(java.lang.String&nbsp;sDim,
+                  java.lang.String&nbsp;sRepeat,
+                  java.lang.String&nbsp;sNumHashes,
+                  java.lang.String&nbsp;sSeed)</code>
+<div class="block">Locality sensitive hash that maps vectors onto 0,1 in such a way that colliding
+ vectors are "near" one another according to cosine similarity with high probability.</div>
+</td>
+</tr>
+</table>
+</li>
+</ul>
+<!-- ========== METHOD SUMMARY =========== -->
+<ul class="blockList">
+<li class="blockList"><a name="method_summary">
+<!--   -->
+</a>
+<h3>Method Summary</h3>
+<table class="overviewSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation">
+<caption><span>Methods</span><span class="tabEnd">&nbsp;</span></caption>
+<tr>
+<th class="colFirst" scope="col">Modifier and Type</th>
+<th class="colLast" scope="col">Method and Description</th>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>protected <a href="../../../../datafu/pig/hash/lsh/interfaces/LSHCreator.html" title="class in datafu.pig.hash.lsh.interfaces">LSHCreator</a></code></td>
+<td class="colLast"><code><strong><a href="../../../../datafu/pig/hash/lsh/CosineDistanceHash.html#createLSHCreator()">createLSHCreator</a></strong>()</code>&nbsp;</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>protected int</code></td>
+<td class="colLast"><code><strong><a href="../../../../datafu/pig/hash/lsh/CosineDistanceHash.html#getDimension()">getDimension</a></strong>()</code>&nbsp;</td>
+</tr>
+</table>
+<ul class="blockList">
+<li class="blockList"><a name="methods_inherited_from_class_datafu.pig.hash.lsh.LSHFunc">
+<!--   -->
+</a>
+<h3>Methods inherited from class&nbsp;datafu.pig.hash.lsh.<a href="../../../../datafu/pig/hash/lsh/LSHFunc.html" title="class in datafu.pig.hash.lsh">LSHFunc</a></h3>
+<code><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html#exec(org.apache.pig.data.Tuple)">exec</a>, <a href="../../../../datafu/pig/hash/lsh/LSHFunc.html#getSeed()">getSeed</a>, <a href="../../../../datafu/pig/hash/lsh/LSHFunc.html#outputSchema(org.apache.pig.impl.logicalLayer.schema.Schema)">outputSchema</a></code></li>
+</ul>
+<ul class="blockList">
+<li class="blockList"><a name="methods_inherited_from_class_org.apache.pig.EvalFunc">
+<!--   -->
+</a>
+<h3>Methods inherited from class&nbsp;org.apache.pig.EvalFunc</h3>
+<code>allowCompileTimeCalculation, finish, getArgToFuncMapping, getCacheFiles, getInputSchema, getLogger, getPigLogger, getReporter, getReturnType, getSchemaName, getSchemaType, getShipFiles, isAsynchronous, progress, setInputSchema, setPigLogger, setReporter, setUDFContextSignature, warn</code></li>
+</ul>
+<ul class="blockList">
+<li class="blockList"><a name="methods_inherited_from_class_java.lang.Object">
+<!--   -->
+</a>
+<h3>Methods inherited from class&nbsp;java.lang.Object</h3>
+<code>clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait</code></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+<div class="details">
+<ul class="blockList">
+<li class="blockList">
+<!-- ========= CONSTRUCTOR DETAIL ======== -->
+<ul class="blockList">
+<li class="blockList"><a name="constructor_detail">
+<!--   -->
+</a>
+<h3>Constructor Detail</h3>
+<a name="CosineDistanceHash(java.lang.String, java.lang.String, java.lang.String, java.lang.String)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>CosineDistanceHash</h4>
+<pre>public&nbsp;CosineDistanceHash(java.lang.String&nbsp;sDim,
+                  java.lang.String&nbsp;sRepeat,
+                  java.lang.String&nbsp;sNumHashes,
+                  java.lang.String&nbsp;sSeed)</pre>
+<div class="block">Locality sensitive hash that maps vectors onto 0,1 in such a way that colliding
+ vectors are "near" one another according to cosine similarity with high probability.  
+ 
+ <p>
+ Generally, multiple LSH are combined via repetition to increase the range of the hash function to the full set of longs.
+ The number of functions which you want to internally repeat is specified by the sRepeat parameter.
+ 
+ The size of the hash family corresponds to the number of independent hashes you want to apply to the data.
+ In a k-near neighbors style of searching, this corresponds to the number of neighbors you want to find
+ (i.e. the number of vectors within a distance according to cosine similarity).
+ 
+ <p>
+ Consider the following example where we input some 3-dimensional points and a set of 3-dimensional queries
+ and find the nearest neighbors of the query points:
+ <pre>
+ -- Create a CosineDistanceHash of 
+ --   3 dimensional data
+ --   1500 internal hashes (being combined into one hash)
+ --   family of 5 hashes
+ --   with a seed of 0
+ 
+ -- This creates a bag of tuples:
+ --   lsh_id:Integer the family ID (in this case, 0-4)
+ --   hash:Long the hash 
+ 
+ define LSH datafu.pig.hash.lsh.CosineDistanceHash('3', '1500', '5', '0');
+ define METRIC datafu.pig.hash.lsh.metric.L2();
+
+ PTS = LOAD 'input' AS (dim1:double, dim2:double, dim3:double);
+ 
+ --hash the input points
+ PTS_HASHED = foreach PTS generate TOTUPLE(dim1, dim2, dim3) as pt
+                    , FLATTEN(LSH(TOTUPLE(dim1, dim2, dim3)));
+ 
+ -- the hash family ID and the hash should group the input points into partitions
+ PARTITIONS = group PTS_HASHED by (lsh_id, hash);
+ 
+ -- take in the query points and hash them
+ QUERIES = LOAD 'queries' as (dim1:double, dim2:double, dim3:double);
+ QUERIES_HASHED = foreach QUERIES generate TOTUPLE(dim1, dim2, dim3) as query_pt
+                        , FLATTEN(LSH(TOTUPLE(dim1, dim2, dim3)))
+                        ;
+ 
+ -- join the hashed query points with the (presumably larger) list of input data split by partitions
+ QUERIES_W_PARTS = join QUERIES_HASHED by (lsh_id, hash), PARTITIONS by (group.$0, group.$1);
+ 
+ -- Now, use the appropriate METRIC UDF (in this case Cosine distance) to find the first point within
+ -- a parameterized threshold (in this case, .001).  It takes:
+ --   query_pt:Tuple the query point
+ --   threshold:Double the threshold, so that if the distance between the query point and a point
+ --                    in the partition is less than this threshold, it returns the point (and stops searching)
+ --   partition:Bag The bag of tuples in the partition.
+ 
+ tuples from 
+ NEAR_NEIGHBORS = foreach QUERIES_W_PARTS generate query_pt as query_pt
+                                                 , METRIC(query_pt, .001, PTS_HASHED) as neighbor
+                                                 ;
+ describe NEAR_NEIGHBORS;
+ -- {query_pt: (dim1: double,dim2: double,dim3: double)
+ -- ,neighbor: (pt: (dim1: double,dim2: double,dim3: double)
+ --            ,lsh::lsh_id: int
+ --            ,lsh::hash: long
+ --            )
+ -- }
+ 
+ -- project out the query and the matching point
+ NEIGHBORS_PROJ = foreach NEAR_NEIGHBORS {
+  generate query_pt as query_pt, neighbor.pt as matching_pts;
+ };
+ 
+ -- Filter out the hashes which resulted in no matches
+ NOT_NULL = filter NEIGHBORS_PROJ by SIZE(matching_pts) &gt; 0;
+ 
+ -- group by the query
+ NEIGHBORS_GRP = group NOT_NULL by query_pt;
+ describe NEIGHBORS_GRP;
+ 
+ -- Generate the query, the number of matches and the bag of matching points
+ NEIGHBOR_CNT = foreach NEIGHBORS_GRP{
+    MATCHING_PTS = foreach NOT_NULL generate FLATTEN(matching_pts);
+    DIST_MATCHING_PTS = DISTINCT MATCHING_PTS;
+    generate group as query_pt, COUNT(NOT_NULL), DIST_MATCHING_PTS;
+ };
+ describe NEIGHBOR_CNT;
+ -- NEIGHBOR_CNT: {query_pt: (dim1: double,dim2: double,dim3: double)
+ --               ,long
+ --               ,DIST_MATCHING_PTS: { (matching_pts::dim1: double,matching_pts::dim2: double,matching_pts::dim3: double)
+ --                              }
+ --               }
+ STORE NEIGHBOR_CNT INTO 'neighbors';
+ </pre></div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>sDim</code> - Dimension of the vectors</dd><dd><code>sRepeat</code> - Number of internal repetitions</dd><dd><code>sNumHashes</code> - Size of the hash family (if you're looking for k near neighbors, this is the k)</dd><dd><code>sSeed</code> - Seed to use when constructing LSH family</dd></dl>
+</li>
+</ul>
+<a name="CosineDistanceHash(java.lang.String, java.lang.String, java.lang.String)">
+<!--   -->
+</a>
+<ul class="blockListLast">
+<li class="blockList">
+<h4>CosineDistanceHash</h4>
+<pre>public&nbsp;CosineDistanceHash(java.lang.String&nbsp;sDim,
+                  java.lang.String&nbsp;sRepeat,
+                  java.lang.String&nbsp;sNumHashes)</pre>
+</li>
+</ul>
+</li>
+</ul>
+<!-- ============ METHOD DETAIL ========== -->
+<ul class="blockList">
+<li class="blockList"><a name="method_detail">
+<!--   -->
+</a>
+<h3>Method Detail</h3>
+<a name="createLSHCreator()">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>createLSHCreator</h4>
+<pre>protected&nbsp;<a href="../../../../datafu/pig/hash/lsh/interfaces/LSHCreator.html" title="class in datafu.pig.hash.lsh.interfaces">LSHCreator</a>&nbsp;createLSHCreator()</pre>
+<dl>
+<dt><strong>Specified by:</strong></dt>
+<dd><code><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html#createLSHCreator()">createLSHCreator</a></code>&nbsp;in class&nbsp;<code><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html" title="class in datafu.pig.hash.lsh">LSHFunc</a></code></dd>
+</dl>
+</li>
+</ul>
+<a name="getDimension()">
+<!--   -->
+</a>
+<ul class="blockListLast">
+<li class="blockList">
+<h4>getDimension</h4>
+<pre>protected&nbsp;int&nbsp;getDimension()</pre>
+<dl>
+<dt><strong>Specified by:</strong></dt>
+<dd><code><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html#getDimension()">getDimension</a></code>&nbsp;in class&nbsp;<code><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html" title="class in datafu.pig.hash.lsh">LSHFunc</a></code></dd>
+</dl>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+</div>
+<!-- ========= END OF CLASS DATA ========= -->
+<!-- ======= START OF BOTTOM NAVBAR ====== -->
+<div class="bottomNav"><a name="navbar_bottom">
+<!--   -->
+</a><a href="#skip-navbar_bottom" title="Skip navigation links"></a><a name="navbar_bottom_firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="../../../../overview-summary.html">Overview</a></li>
+<li><a href="package-summary.html">Package</a></li>
+<li class="navBarCell1Rev">Class</li>
+<li><a href="package-tree.html">Tree</a></li>
+<li><a href="../../../../deprecated-list.html">Deprecated</a></li>
+<li><a href="../../../../index-all.html">Index</a></li>
+<li><a href="../../../../help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li>Prev Class</li>
+<li><a href="../../../../datafu/pig/hash/lsh/L1PStableHash.html" title="class in datafu.pig.hash.lsh"><span class="strong">Next Class</span></a></li>
+</ul>
+<ul class="navList">
+<li><a href="../../../../index.html?datafu/pig/hash/lsh/CosineDistanceHash.html" target="_top">Frames</a></li>
+<li><a href="CosineDistanceHash.html" target="_top">No Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_bottom">
+<li><a href="../../../../allclasses-noframe.html">All Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_bottom");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<div>
+<ul class="subNavList">
+<li>Summary:&nbsp;</li>
+<li><a href="#nested_classes_inherited_from_class_org.apache.pig.EvalFunc">Nested</a>&nbsp;|&nbsp;</li>
+<li><a href="#fields_inherited_from_class_datafu.pig.hash.lsh.LSHFunc">Field</a>&nbsp;|&nbsp;</li>
+<li><a href="#constructor_summary">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method_summary">Method</a></li>
+</ul>
+<ul class="subNavList">
+<li>Detail:&nbsp;</li>
+<li>Field&nbsp;|&nbsp;</li>
+<li><a href="#constructor_detail">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method_detail">Method</a></li>
+</ul>
+</div>
+<a name="skip-navbar_bottom">
+<!--   -->
+</a></div>
+<!-- ======== END OF BOTTOM NAVBAR ======= -->
+</body>
+</html>

Added: incubator/datafu/site/docs/datafu/1.3.1/datafu/pig/hash/lsh/L1PStableHash.html
URL: http://svn.apache.org/viewvc/incubator/datafu/site/docs/datafu/1.3.1/datafu/pig/hash/lsh/L1PStableHash.html?rev=1755883&view=auto
==============================================================================
--- incubator/datafu/site/docs/datafu/1.3.1/datafu/pig/hash/lsh/L1PStableHash.html (added)
+++ incubator/datafu/site/docs/datafu/1.3.1/datafu/pig/hash/lsh/L1PStableHash.html Wed Aug 10 22:07:27 2016
@@ -0,0 +1,483 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<!-- NewPage -->
+<html lang="en">
+<head>
+<!-- Generated by javadoc (version 1.7.0_79) on Wed Aug 10 15:01:03 PDT 2016 -->
+<title>L1PStableHash (datafu-pig 1.3.1 API)</title>
+<meta name="date" content="2016-08-10">
+<link rel="stylesheet" type="text/css" href="../../../../stylesheet.css" title="Style">
+</head>
+<body>
+<script type="text/javascript"><!--
+    if (location.href.indexOf('is-external=true') == -1) {
+        parent.document.title="L1PStableHash (datafu-pig 1.3.1 API)";
+    }
+//-->
+</script>
+<noscript>
+<div>JavaScript is disabled on your browser.</div>
+</noscript>
+<!-- ========= START OF TOP NAVBAR ======= -->
+<div class="topNav"><a name="navbar_top">
+<!--   -->
+</a><a href="#skip-navbar_top" title="Skip navigation links"></a><a name="navbar_top_firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="../../../../overview-summary.html">Overview</a></li>
+<li><a href="package-summary.html">Package</a></li>
+<li class="navBarCell1Rev">Class</li>
+<li><a href="package-tree.html">Tree</a></li>
+<li><a href="../../../../deprecated-list.html">Deprecated</a></li>
+<li><a href="../../../../index-all.html">Index</a></li>
+<li><a href="../../../../help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li><a href="../../../../datafu/pig/hash/lsh/CosineDistanceHash.html" title="class in datafu.pig.hash.lsh"><span class="strong">Prev Class</span></a></li>
+<li><a href="../../../../datafu/pig/hash/lsh/L2PStableHash.html" title="class in datafu.pig.hash.lsh"><span class="strong">Next Class</span></a></li>
+</ul>
+<ul class="navList">
+<li><a href="../../../../index.html?datafu/pig/hash/lsh/L1PStableHash.html" target="_top">Frames</a></li>
+<li><a href="L1PStableHash.html" target="_top">No Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_top">
+<li><a href="../../../../allclasses-noframe.html">All Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_top");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<div>
+<ul class="subNavList">
+<li>Summary:&nbsp;</li>
+<li><a href="#nested_classes_inherited_from_class_org.apache.pig.EvalFunc">Nested</a>&nbsp;|&nbsp;</li>
+<li><a href="#fields_inherited_from_class_datafu.pig.hash.lsh.LSHFunc">Field</a>&nbsp;|&nbsp;</li>
+<li><a href="#constructor_summary">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method_summary">Method</a></li>
+</ul>
+<ul class="subNavList">
+<li>Detail:&nbsp;</li>
+<li>Field&nbsp;|&nbsp;</li>
+<li><a href="#constructor_detail">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method_detail">Method</a></li>
+</ul>
+</div>
+<a name="skip-navbar_top">
+<!--   -->
+</a></div>
+<!-- ========= END OF TOP NAVBAR ========= -->
+<!-- ======== START OF CLASS DATA ======== -->
+<div class="header">
+<div class="subTitle">datafu.pig.hash.lsh</div>
+<h2 title="Class L1PStableHash" class="title">Class L1PStableHash</h2>
+</div>
+<div class="contentContainer">
+<ul class="inheritance">
+<li>java.lang.Object</li>
+<li>
+<ul class="inheritance">
+<li>org.apache.pig.EvalFunc&lt;org.apache.pig.data.DataBag&gt;</li>
+<li>
+<ul class="inheritance">
+<li><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html" title="class in datafu.pig.hash.lsh">datafu.pig.hash.lsh.LSHFunc</a></li>
+<li>
+<ul class="inheritance">
+<li>datafu.pig.hash.lsh.L1PStableHash</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+<div class="description">
+<ul class="blockList">
+<li class="blockList">
+<hr>
+<br>
+<pre>public class <span class="strong">L1PStableHash</span>
+extends <a href="../../../../datafu/pig/hash/lsh/LSHFunc.html" title="class in datafu.pig.hash.lsh">LSHFunc</a></pre>
+<div class="block">From wikipedia's article on <a href="http://en.wikipedia.org/wiki/Locality-sensitive_hashing" target="_blank">Locality Sensitive Hashing</a>:
+ <pre>
+ Locality-sensitive hashing (LSH) is a method of performing probabilistic dimension reduction of high-dimensional data. 
+ The basic idea is to hash the input items so that similar items are mapped to the same buckets with high probability 
+ (the number of buckets being much smaller than the universe of possible input items).
+ </pre>
+ 
+ In particular, this implementation implements a locality sensitive hashing scheme which maps high-dimensional vectors which are
+ close together (with high probability) according to the <a href="http://en.wikipedia.org/wiki/Lp_space" target="_blank">L1</a>
+ distance metric into the same buckets.  This implementation uses a 1-stable distribution (a Cauchy distribution) in order
+ to accomplish this.
+ 
+ For more information, see Datar, M.; Immorlica, N.; Indyk, P.; Mirrokni, V.S. (2004). "Locality-Sensitive Hashing Scheme Based on p-Stable Distributions". Proceedings of the Symposium on Computational Geometry.</div>
+</li>
+</ul>
+</div>
+<div class="summary">
+<ul class="blockList">
+<li class="blockList">
+<!-- ======== NESTED CLASS SUMMARY ======== -->
+<ul class="blockList">
+<li class="blockList"><a name="nested_class_summary">
+<!--   -->
+</a>
+<h3>Nested Class Summary</h3>
+<ul class="blockList">
+<li class="blockList"><a name="nested_classes_inherited_from_class_org.apache.pig.EvalFunc">
+<!--   -->
+</a>
+<h3>Nested classes/interfaces inherited from class&nbsp;org.apache.pig.EvalFunc</h3>
+<code>org.apache.pig.EvalFunc.SchemaType</code></li>
+</ul>
+</li>
+</ul>
+<!-- =========== FIELD SUMMARY =========== -->
+<ul class="blockList">
+<li class="blockList"><a name="field_summary">
+<!--   -->
+</a>
+<h3>Field Summary</h3>
+<ul class="blockList">
+<li class="blockList"><a name="fields_inherited_from_class_datafu.pig.hash.lsh.LSHFunc">
+<!--   -->
+</a>
+<h3>Fields inherited from class&nbsp;datafu.pig.hash.lsh.<a href="../../../../datafu/pig/hash/lsh/LSHFunc.html" title="class in datafu.pig.hash.lsh">LSHFunc</a></h3>
+<code><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html#lsh">lsh</a>, <a href="../../../../datafu/pig/hash/lsh/LSHFunc.html#seed">seed</a></code></li>
+</ul>
+<ul class="blockList">
+<li class="blockList"><a name="fields_inherited_from_class_org.apache.pig.EvalFunc">
+<!--   -->
+</a>
+<h3>Fields inherited from class&nbsp;org.apache.pig.EvalFunc</h3>
+<code>log, pigLogger, reporter, returnType</code></li>
+</ul>
+</li>
+</ul>
+<!-- ======== CONSTRUCTOR SUMMARY ======== -->
+<ul class="blockList">
+<li class="blockList"><a name="constructor_summary">
+<!--   -->
+</a>
+<h3>Constructor Summary</h3>
+<table class="overviewSummary" border="0" cellpadding="3" cellspacing="0" summary="Constructor Summary table, listing constructors, and an explanation">
+<caption><span>Constructors</span><span class="tabEnd">&nbsp;</span></caption>
+<tr>
+<th class="colOne" scope="col">Constructor and Description</th>
+</tr>
+<tr class="altColor">
+<td class="colOne"><code><strong><a href="../../../../datafu/pig/hash/lsh/L1PStableHash.html#L1PStableHash(java.lang.String,%20java.lang.String,%20java.lang.String,%20java.lang.String)">L1PStableHash</a></strong>(java.lang.String&nbsp;sDim,
+             java.lang.String&nbsp;sW,
+             java.lang.String&nbsp;sRepeat,
+             java.lang.String&nbsp;sNumHashes)</code>&nbsp;</td>
+</tr>
+<tr class="rowColor">
+<td class="colOne"><code><strong><a href="../../../../datafu/pig/hash/lsh/L1PStableHash.html#L1PStableHash(java.lang.String,%20java.lang.String,%20java.lang.String,%20java.lang.String,%20java.lang.String)">L1PStableHash</a></strong>(java.lang.String&nbsp;sDim,
+             java.lang.String&nbsp;sW,
+             java.lang.String&nbsp;sRepeat,
+             java.lang.String&nbsp;sNumHashes,
+             java.lang.String&nbsp;sSeed)</code>
+<div class="block">Locality sensitive hash that maps vectors onto a long in such a way that colliding
+ vectors are "near" one another according to cosine similarity with high probability.</div>
+</td>
+</tr>
+</table>
+</li>
+</ul>
+<!-- ========== METHOD SUMMARY =========== -->
+<ul class="blockList">
+<li class="blockList"><a name="method_summary">
+<!--   -->
+</a>
+<h3>Method Summary</h3>
+<table class="overviewSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation">
+<caption><span>Methods</span><span class="tabEnd">&nbsp;</span></caption>
+<tr>
+<th class="colFirst" scope="col">Modifier and Type</th>
+<th class="colLast" scope="col">Method and Description</th>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>protected <a href="../../../../datafu/pig/hash/lsh/interfaces/LSHCreator.html" title="class in datafu.pig.hash.lsh.interfaces">LSHCreator</a></code></td>
+<td class="colLast"><code><strong><a href="../../../../datafu/pig/hash/lsh/L1PStableHash.html#createLSHCreator()">createLSHCreator</a></strong>()</code>&nbsp;</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>protected int</code></td>
+<td class="colLast"><code><strong><a href="../../../../datafu/pig/hash/lsh/L1PStableHash.html#getDimension()">getDimension</a></strong>()</code>&nbsp;</td>
+</tr>
+</table>
+<ul class="blockList">
+<li class="blockList"><a name="methods_inherited_from_class_datafu.pig.hash.lsh.LSHFunc">
+<!--   -->
+</a>
+<h3>Methods inherited from class&nbsp;datafu.pig.hash.lsh.<a href="../../../../datafu/pig/hash/lsh/LSHFunc.html" title="class in datafu.pig.hash.lsh">LSHFunc</a></h3>
+<code><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html#exec(org.apache.pig.data.Tuple)">exec</a>, <a href="../../../../datafu/pig/hash/lsh/LSHFunc.html#getSeed()">getSeed</a>, <a href="../../../../datafu/pig/hash/lsh/LSHFunc.html#outputSchema(org.apache.pig.impl.logicalLayer.schema.Schema)">outputSchema</a></code></li>
+</ul>
+<ul class="blockList">
+<li class="blockList"><a name="methods_inherited_from_class_org.apache.pig.EvalFunc">
+<!--   -->
+</a>
+<h3>Methods inherited from class&nbsp;org.apache.pig.EvalFunc</h3>
+<code>allowCompileTimeCalculation, finish, getArgToFuncMapping, getCacheFiles, getInputSchema, getLogger, getPigLogger, getReporter, getReturnType, getSchemaName, getSchemaType, getShipFiles, isAsynchronous, progress, setInputSchema, setPigLogger, setReporter, setUDFContextSignature, warn</code></li>
+</ul>
+<ul class="blockList">
+<li class="blockList"><a name="methods_inherited_from_class_java.lang.Object">
+<!--   -->
+</a>
+<h3>Methods inherited from class&nbsp;java.lang.Object</h3>
+<code>clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait</code></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+<div class="details">
+<ul class="blockList">
+<li class="blockList">
+<!-- ========= CONSTRUCTOR DETAIL ======== -->
+<ul class="blockList">
+<li class="blockList"><a name="constructor_detail">
+<!--   -->
+</a>
+<h3>Constructor Detail</h3>
+<a name="L1PStableHash(java.lang.String, java.lang.String, java.lang.String, java.lang.String, java.lang.String)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>L1PStableHash</h4>
+<pre>public&nbsp;L1PStableHash(java.lang.String&nbsp;sDim,
+             java.lang.String&nbsp;sW,
+             java.lang.String&nbsp;sRepeat,
+             java.lang.String&nbsp;sNumHashes,
+             java.lang.String&nbsp;sSeed)</pre>
+<div class="block">Locality sensitive hash that maps vectors onto a long in such a way that colliding
+ vectors are "near" one another according to cosine similarity with high probability.  
+ 
+ <p>
+ Generally, multiple LSH are combined via repetition to increase the range of the hash function to the full set of longs.
+ The number of functions which you want to internally repeat is specified by the sRepeat parameter.
+ 
+ The size of the hash family corresponds to the number of independent hashes you want to apply to the data.
+ In a k-near neighbors style of searching, this corresponds to the number of neighbors you want to find
+ (i.e. the number of vectors within a distance according to cosine similarity).
+ 
+ This UDF, indeed all p-stable LSH functions, are parameterized with a quantization parameter (w or r in the literature
+ , depending on where you look).  Consider the following excerpt from Datar, M.; Immorlica, N.; Indyk, P.; Mirrokni, V.S. (2004). "Locality-Sensitive Hashing Scheme Based on p-Stable Distributions". Proceedings of the Symposium on Computational Geometry.
+ 
+ <pre>
+ Decreasing the width of the projection (w) decreases the probability of collision for any two points. 
+ Thus, it has the same effect as increasing k . As a result, we would like to set w as small as possible
+ and in this way decrease the number of projections we need to make. 
+ </pre>
+ 
+ In the literature, the quantization parameter (or width of the projection) is found empirically given a sample of
+ the data and the likely threshold of for the metric.  Tuning this parameter is very important for the performance
+ of this algorithm.
+ 
+ <p>
+ Consider the following example where we input some 3-dimensional points and a set of 3-dimensional queries
+ and find the nearest neighbors of the query points:
+ <pre>
+ -- Create a L1PStableHash of 
+ --   3 dimensional data
+ --   projection width of 150
+ --   1 internal hashes 
+ --   family of 5 hashes
+ --   with a seed of 0
+ 
+ -- This creates a bag of tuples:
+ --   lsh_id:Integer the family ID (in this case, 0-4)
+ --   hash:Long the hash 
+ 
+ define LSH datafu.pig.hash.lsh.L1PStableHash('3', '150', '1', '5', '0');
+ define METRIC datafu.pig.hash.lsh.metric.L1();
+
+ PTS = LOAD 'input' AS (dim1:double, dim2:double, dim3:double);
+ 
+ --hash the input points
+ PTS_HASHED = foreach PTS generate TOTUPLE(dim1, dim2, dim3) as pt
+                    , FLATTEN(LSH(TOTUPLE(dim1, dim2, dim3)));
+ 
+ -- the hash family ID and the hash should group the input points into partitions
+ PARTITIONS = group PTS_HASHED by (lsh_id, hash);
+ 
+ -- take in the query points and hash them
+ QUERIES = LOAD 'queries' as (dim1:double, dim2:double, dim3:double);
+ QUERIES_HASHED = foreach QUERIES generate TOTUPLE(dim1, dim2, dim3) as query_pt
+                        , FLATTEN(LSH(TOTUPLE(dim1, dim2, dim3)))
+                        ;
+ 
+ -- join the hashed query points with the (presumably larger) list of input data split by partitions
+ QUERIES_W_PARTS = join QUERIES_HASHED by (lsh_id, hash), PARTITIONS by (group.$0, group.$1);
+ 
+ -- Now, use the appropriate METRIC UDF (in this case L1 (aka city block) distance) to find the first point within
+ -- a parameterized threshold (in this case, 1000).  It takes:
+ --   query_pt:Tuple the query point
+ --   threshold:Double the threshold, so that if the distance between the query point and a point
+ --                    in the partition is less than this threshold, it returns the point (and stops searching)
+ --   partition:Bag The bag of tuples in the partition.
+ 
+  
+ NEAR_NEIGHBORS = foreach QUERIES_W_PARTS generate query_pt as query_pt
+                                                 , METRIC(query_pt, 1000, PTS_HASHED) as neighbor
+                                                 ;
+ describe NEAR_NEIGHBORS;
+ -- {query_pt: (dim1: double,dim2: double,dim3: double)
+ -- ,neighbor: (pt: (dim1: double,dim2: double,dim3: double)
+ --            ,lsh::lsh_id: int
+ --            ,lsh::hash: long
+ --            )
+ -- }
+ 
+ -- project out the query and the matching point
+ NEIGHBORS_PROJ = foreach NEAR_NEIGHBORS {
+  generate query_pt as query_pt, neighbor.pt as matching_pts;
+ };
+ 
+ -- Filter out the hashes which resulted in no matches
+ NOT_NULL = filter NEIGHBORS_PROJ by SIZE(matching_pts) &gt; 0;
+ 
+ -- group by the query
+ NEIGHBORS_GRP = group NOT_NULL by query_pt;
+ describe NEIGHBORS_GRP;
+ 
+ -- Generate the query, the number of matches and the bag of matching points
+ NEIGHBOR_CNT = foreach NEIGHBORS_GRP{
+    DIST_MATCHING_PTS = DISTINCT MATCHING_PTS;
+    generate group as query_pt, COUNT(NOT_NULL), DIST_MATCHING_PTS;
+ };
+ describe NEIGHBOR_CNT;
+ -- NEIGHBOR_CNT: {query_pt: (dim1: double,dim2: double,dim3: double)
+ --               ,long
+ --               ,DIST_MATCHING_PTS: { (matching_pts::dim1: double,matching_pts::dim2: double,matching_pts::dim3: double)
+ --                              }
+ --               }
+ STORE NEIGHBOR_CNT INTO 'neighbors';
+ </pre></div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>sDim</code> - Dimension of the vectors</dd><dd><code>sW</code> - A double representing the quantization parameter (also known as the projection width)</dd><dd><code>sRepeat</code> - Number of internal repetitions (generally this should be 1 as the p-stable hashes have a larger range than one bit)</dd><dd><code>sNumHashes</code> - Size of the hash family (if you're looking for k near neighbors, this is the k)</dd><dd><code>sSeed</code> - Seed to use when constructing LSH family</dd></dl>
+</li>
+</ul>
+<a name="L1PStableHash(java.lang.String, java.lang.String, java.lang.String, java.lang.String)">
+<!--   -->
+</a>
+<ul class="blockListLast">
+<li class="blockList">
+<h4>L1PStableHash</h4>
+<pre>public&nbsp;L1PStableHash(java.lang.String&nbsp;sDim,
+             java.lang.String&nbsp;sW,
+             java.lang.String&nbsp;sRepeat,
+             java.lang.String&nbsp;sNumHashes)</pre>
+</li>
+</ul>
+</li>
+</ul>
+<!-- ============ METHOD DETAIL ========== -->
+<ul class="blockList">
+<li class="blockList"><a name="method_detail">
+<!--   -->
+</a>
+<h3>Method Detail</h3>
+<a name="createLSHCreator()">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>createLSHCreator</h4>
+<pre>protected&nbsp;<a href="../../../../datafu/pig/hash/lsh/interfaces/LSHCreator.html" title="class in datafu.pig.hash.lsh.interfaces">LSHCreator</a>&nbsp;createLSHCreator()</pre>
+<dl>
+<dt><strong>Specified by:</strong></dt>
+<dd><code><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html#createLSHCreator()">createLSHCreator</a></code>&nbsp;in class&nbsp;<code><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html" title="class in datafu.pig.hash.lsh">LSHFunc</a></code></dd>
+</dl>
+</li>
+</ul>
+<a name="getDimension()">
+<!--   -->
+</a>
+<ul class="blockListLast">
+<li class="blockList">
+<h4>getDimension</h4>
+<pre>protected&nbsp;int&nbsp;getDimension()</pre>
+<dl>
+<dt><strong>Specified by:</strong></dt>
+<dd><code><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html#getDimension()">getDimension</a></code>&nbsp;in class&nbsp;<code><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html" title="class in datafu.pig.hash.lsh">LSHFunc</a></code></dd>
+</dl>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+</div>
+<!-- ========= END OF CLASS DATA ========= -->
+<!-- ======= START OF BOTTOM NAVBAR ====== -->
+<div class="bottomNav"><a name="navbar_bottom">
+<!--   -->
+</a><a href="#skip-navbar_bottom" title="Skip navigation links"></a><a name="navbar_bottom_firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="../../../../overview-summary.html">Overview</a></li>
+<li><a href="package-summary.html">Package</a></li>
+<li class="navBarCell1Rev">Class</li>
+<li><a href="package-tree.html">Tree</a></li>
+<li><a href="../../../../deprecated-list.html">Deprecated</a></li>
+<li><a href="../../../../index-all.html">Index</a></li>
+<li><a href="../../../../help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li><a href="../../../../datafu/pig/hash/lsh/CosineDistanceHash.html" title="class in datafu.pig.hash.lsh"><span class="strong">Prev Class</span></a></li>
+<li><a href="../../../../datafu/pig/hash/lsh/L2PStableHash.html" title="class in datafu.pig.hash.lsh"><span class="strong">Next Class</span></a></li>
+</ul>
+<ul class="navList">
+<li><a href="../../../../index.html?datafu/pig/hash/lsh/L1PStableHash.html" target="_top">Frames</a></li>
+<li><a href="L1PStableHash.html" target="_top">No Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_bottom">
+<li><a href="../../../../allclasses-noframe.html">All Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_bottom");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<div>
+<ul class="subNavList">
+<li>Summary:&nbsp;</li>
+<li><a href="#nested_classes_inherited_from_class_org.apache.pig.EvalFunc">Nested</a>&nbsp;|&nbsp;</li>
+<li><a href="#fields_inherited_from_class_datafu.pig.hash.lsh.LSHFunc">Field</a>&nbsp;|&nbsp;</li>
+<li><a href="#constructor_summary">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method_summary">Method</a></li>
+</ul>
+<ul class="subNavList">
+<li>Detail:&nbsp;</li>
+<li>Field&nbsp;|&nbsp;</li>
+<li><a href="#constructor_detail">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method_detail">Method</a></li>
+</ul>
+</div>
+<a name="skip-navbar_bottom">
+<!--   -->
+</a></div>
+<!-- ======== END OF BOTTOM NAVBAR ======= -->
+</body>
+</html>

Added: incubator/datafu/site/docs/datafu/1.3.1/datafu/pig/hash/lsh/L2PStableHash.html
URL: http://svn.apache.org/viewvc/incubator/datafu/site/docs/datafu/1.3.1/datafu/pig/hash/lsh/L2PStableHash.html?rev=1755883&view=auto
==============================================================================
--- incubator/datafu/site/docs/datafu/1.3.1/datafu/pig/hash/lsh/L2PStableHash.html (added)
+++ incubator/datafu/site/docs/datafu/1.3.1/datafu/pig/hash/lsh/L2PStableHash.html Wed Aug 10 22:07:27 2016
@@ -0,0 +1,485 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<!-- NewPage -->
+<html lang="en">
+<head>
+<!-- Generated by javadoc (version 1.7.0_79) on Wed Aug 10 15:01:03 PDT 2016 -->
+<title>L2PStableHash (datafu-pig 1.3.1 API)</title>
+<meta name="date" content="2016-08-10">
+<link rel="stylesheet" type="text/css" href="../../../../stylesheet.css" title="Style">
+</head>
+<body>
+<script type="text/javascript"><!--
+    if (location.href.indexOf('is-external=true') == -1) {
+        parent.document.title="L2PStableHash (datafu-pig 1.3.1 API)";
+    }
+//-->
+</script>
+<noscript>
+<div>JavaScript is disabled on your browser.</div>
+</noscript>
+<!-- ========= START OF TOP NAVBAR ======= -->
+<div class="topNav"><a name="navbar_top">
+<!--   -->
+</a><a href="#skip-navbar_top" title="Skip navigation links"></a><a name="navbar_top_firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="../../../../overview-summary.html">Overview</a></li>
+<li><a href="package-summary.html">Package</a></li>
+<li class="navBarCell1Rev">Class</li>
+<li><a href="package-tree.html">Tree</a></li>
+<li><a href="../../../../deprecated-list.html">Deprecated</a></li>
+<li><a href="../../../../index-all.html">Index</a></li>
+<li><a href="../../../../help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li><a href="../../../../datafu/pig/hash/lsh/L1PStableHash.html" title="class in datafu.pig.hash.lsh"><span class="strong">Prev Class</span></a></li>
+<li><a href="../../../../datafu/pig/hash/lsh/LSHFamily.html" title="class in datafu.pig.hash.lsh"><span class="strong">Next Class</span></a></li>
+</ul>
+<ul class="navList">
+<li><a href="../../../../index.html?datafu/pig/hash/lsh/L2PStableHash.html" target="_top">Frames</a></li>
+<li><a href="L2PStableHash.html" target="_top">No Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_top">
+<li><a href="../../../../allclasses-noframe.html">All Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_top");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<div>
+<ul class="subNavList">
+<li>Summary:&nbsp;</li>
+<li><a href="#nested_classes_inherited_from_class_org.apache.pig.EvalFunc">Nested</a>&nbsp;|&nbsp;</li>
+<li><a href="#fields_inherited_from_class_datafu.pig.hash.lsh.LSHFunc">Field</a>&nbsp;|&nbsp;</li>
+<li><a href="#constructor_summary">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method_summary">Method</a></li>
+</ul>
+<ul class="subNavList">
+<li>Detail:&nbsp;</li>
+<li>Field&nbsp;|&nbsp;</li>
+<li><a href="#constructor_detail">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method_detail">Method</a></li>
+</ul>
+</div>
+<a name="skip-navbar_top">
+<!--   -->
+</a></div>
+<!-- ========= END OF TOP NAVBAR ========= -->
+<!-- ======== START OF CLASS DATA ======== -->
+<div class="header">
+<div class="subTitle">datafu.pig.hash.lsh</div>
+<h2 title="Class L2PStableHash" class="title">Class L2PStableHash</h2>
+</div>
+<div class="contentContainer">
+<ul class="inheritance">
+<li>java.lang.Object</li>
+<li>
+<ul class="inheritance">
+<li>org.apache.pig.EvalFunc&lt;org.apache.pig.data.DataBag&gt;</li>
+<li>
+<ul class="inheritance">
+<li><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html" title="class in datafu.pig.hash.lsh">datafu.pig.hash.lsh.LSHFunc</a></li>
+<li>
+<ul class="inheritance">
+<li>datafu.pig.hash.lsh.L2PStableHash</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+<div class="description">
+<ul class="blockList">
+<li class="blockList">
+<hr>
+<br>
+<pre>public class <span class="strong">L2PStableHash</span>
+extends <a href="../../../../datafu/pig/hash/lsh/LSHFunc.html" title="class in datafu.pig.hash.lsh">LSHFunc</a></pre>
+<div class="block">From wikipedia's article on <a href="http://en.wikipedia.org/wiki/Locality-sensitive_hashing" target="_blank">Locality Sensitive Hashing</a>:
+ <pre>
+ Locality-sensitive hashing (LSH) is a method of performing probabilistic dimension reduction of high-dimensional data. 
+ The basic idea is to hash the input items so that similar items are mapped to the same buckets with high probability 
+ (the number of buckets being much smaller than the universe of possible input items).
+ </pre>
+ 
+ In particular, this implementation implements a locality sensitive hashing scheme which maps high-dimensional vectors which are
+ close together (with high probability) according to the <a href="http://en.wikipedia.org/wiki/Lp_space" target="_blank">L2</a>
+ distance metric into the same buckets.  This implementation uses a 2-stable distribution (a Gaussian distribution) in order
+ to accomplish this.
+ 
+ For more information, see Datar, M.; Immorlica, N.; Indyk, P.; Mirrokni, V.S. (2004). "Locality-Sensitive Hashing Scheme Based on p-Stable Distributions". Proceedings of the Symposium on Computational Geometry.</div>
+</li>
+</ul>
+</div>
+<div class="summary">
+<ul class="blockList">
+<li class="blockList">
+<!-- ======== NESTED CLASS SUMMARY ======== -->
+<ul class="blockList">
+<li class="blockList"><a name="nested_class_summary">
+<!--   -->
+</a>
+<h3>Nested Class Summary</h3>
+<ul class="blockList">
+<li class="blockList"><a name="nested_classes_inherited_from_class_org.apache.pig.EvalFunc">
+<!--   -->
+</a>
+<h3>Nested classes/interfaces inherited from class&nbsp;org.apache.pig.EvalFunc</h3>
+<code>org.apache.pig.EvalFunc.SchemaType</code></li>
+</ul>
+</li>
+</ul>
+<!-- =========== FIELD SUMMARY =========== -->
+<ul class="blockList">
+<li class="blockList"><a name="field_summary">
+<!--   -->
+</a>
+<h3>Field Summary</h3>
+<ul class="blockList">
+<li class="blockList"><a name="fields_inherited_from_class_datafu.pig.hash.lsh.LSHFunc">
+<!--   -->
+</a>
+<h3>Fields inherited from class&nbsp;datafu.pig.hash.lsh.<a href="../../../../datafu/pig/hash/lsh/LSHFunc.html" title="class in datafu.pig.hash.lsh">LSHFunc</a></h3>
+<code><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html#lsh">lsh</a></code></li>
+</ul>
+<ul class="blockList">
+<li class="blockList"><a name="fields_inherited_from_class_org.apache.pig.EvalFunc">
+<!--   -->
+</a>
+<h3>Fields inherited from class&nbsp;org.apache.pig.EvalFunc</h3>
+<code>log, pigLogger, reporter, returnType</code></li>
+</ul>
+</li>
+</ul>
+<!-- ======== CONSTRUCTOR SUMMARY ======== -->
+<ul class="blockList">
+<li class="blockList"><a name="constructor_summary">
+<!--   -->
+</a>
+<h3>Constructor Summary</h3>
+<table class="overviewSummary" border="0" cellpadding="3" cellspacing="0" summary="Constructor Summary table, listing constructors, and an explanation">
+<caption><span>Constructors</span><span class="tabEnd">&nbsp;</span></caption>
+<tr>
+<th class="colOne" scope="col">Constructor and Description</th>
+</tr>
+<tr class="altColor">
+<td class="colOne"><code><strong><a href="../../../../datafu/pig/hash/lsh/L2PStableHash.html#L2PStableHash(java.lang.String,%20java.lang.String,%20java.lang.String,%20java.lang.String)">L2PStableHash</a></strong>(java.lang.String&nbsp;sDim,
+             java.lang.String&nbsp;sW,
+             java.lang.String&nbsp;sRepeat,
+             java.lang.String&nbsp;sNumHashes)</code>&nbsp;</td>
+</tr>
+<tr class="rowColor">
+<td class="colOne"><code><strong><a href="../../../../datafu/pig/hash/lsh/L2PStableHash.html#L2PStableHash(java.lang.String,%20java.lang.String,%20java.lang.String,%20java.lang.String,%20java.lang.String)">L2PStableHash</a></strong>(java.lang.String&nbsp;sDim,
+             java.lang.String&nbsp;sW,
+             java.lang.String&nbsp;sRepeat,
+             java.lang.String&nbsp;sNumHashes,
+             java.lang.String&nbsp;sSeed)</code>
+<div class="block">Locality sensitive hash that maps vectors onto a long in such a way that colliding
+ vectors are "near" one another according to cosine similarity with high probability.</div>
+</td>
+</tr>
+</table>
+</li>
+</ul>
+<!-- ========== METHOD SUMMARY =========== -->
+<ul class="blockList">
+<li class="blockList"><a name="method_summary">
+<!--   -->
+</a>
+<h3>Method Summary</h3>
+<table class="overviewSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation">
+<caption><span>Methods</span><span class="tabEnd">&nbsp;</span></caption>
+<tr>
+<th class="colFirst" scope="col">Modifier and Type</th>
+<th class="colLast" scope="col">Method and Description</th>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>protected <a href="../../../../datafu/pig/hash/lsh/interfaces/LSHCreator.html" title="class in datafu.pig.hash.lsh.interfaces">LSHCreator</a></code></td>
+<td class="colLast"><code><strong><a href="../../../../datafu/pig/hash/lsh/L2PStableHash.html#createLSHCreator()">createLSHCreator</a></strong>()</code>&nbsp;</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>protected int</code></td>
+<td class="colLast"><code><strong><a href="../../../../datafu/pig/hash/lsh/L2PStableHash.html#getDimension()">getDimension</a></strong>()</code>&nbsp;</td>
+</tr>
+</table>
+<ul class="blockList">
+<li class="blockList"><a name="methods_inherited_from_class_datafu.pig.hash.lsh.LSHFunc">
+<!--   -->
+</a>
+<h3>Methods inherited from class&nbsp;datafu.pig.hash.lsh.<a href="../../../../datafu/pig/hash/lsh/LSHFunc.html" title="class in datafu.pig.hash.lsh">LSHFunc</a></h3>
+<code><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html#exec(org.apache.pig.data.Tuple)">exec</a>, <a href="../../../../datafu/pig/hash/lsh/LSHFunc.html#getSeed()">getSeed</a>, <a href="../../../../datafu/pig/hash/lsh/LSHFunc.html#outputSchema(org.apache.pig.impl.logicalLayer.schema.Schema)">outputSchema</a></code></li>
+</ul>
+<ul class="blockList">
+<li class="blockList"><a name="methods_inherited_from_class_org.apache.pig.EvalFunc">
+<!--   -->
+</a>
+<h3>Methods inherited from class&nbsp;org.apache.pig.EvalFunc</h3>
+<code>allowCompileTimeCalculation, finish, getArgToFuncMapping, getCacheFiles, getInputSchema, getLogger, getPigLogger, getReporter, getReturnType, getSchemaName, getSchemaType, getShipFiles, isAsynchronous, progress, setInputSchema, setPigLogger, setReporter, setUDFContextSignature, warn</code></li>
+</ul>
+<ul class="blockList">
+<li class="blockList"><a name="methods_inherited_from_class_java.lang.Object">
+<!--   -->
+</a>
+<h3>Methods inherited from class&nbsp;java.lang.Object</h3>
+<code>clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait</code></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+<div class="details">
+<ul class="blockList">
+<li class="blockList">
+<!-- ========= CONSTRUCTOR DETAIL ======== -->
+<ul class="blockList">
+<li class="blockList"><a name="constructor_detail">
+<!--   -->
+</a>
+<h3>Constructor Detail</h3>
+<a name="L2PStableHash(java.lang.String, java.lang.String, java.lang.String, java.lang.String, java.lang.String)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>L2PStableHash</h4>
+<pre>public&nbsp;L2PStableHash(java.lang.String&nbsp;sDim,
+             java.lang.String&nbsp;sW,
+             java.lang.String&nbsp;sRepeat,
+             java.lang.String&nbsp;sNumHashes,
+             java.lang.String&nbsp;sSeed)</pre>
+<div class="block">Locality sensitive hash that maps vectors onto a long in such a way that colliding
+ vectors are "near" one another according to cosine similarity with high probability.  
+ 
+ <p>
+ Generally, multiple LSH are combined via repetition to increase the range of the hash function to the full set of longs.
+ The number of functions which you want to internally repeat is specified by the sRepeat parameter.
+ 
+ The size of the hash family corresponds to the number of independent hashes you want to apply to the data.
+ In a k-near neighbors style of searching, this corresponds to the number of neighbors you want to find
+ (i.e. the number of vectors within a distance according to cosine similarity).
+ 
+ This UDF, indeed all p-stable LSH functions are parameterized with a quantization parameter (w or r in the literature
+ , depending on where you look).  Consider the following excerpt from Datar, M.; Immorlica, N.; Indyk, P.; Mirrokni, V.S. (2004). "Locality-Sensitive Hashing Scheme Based on p-Stable Distributions". Proceedings of the Symposium on Computational Geometry.
+ 
+ <pre>
+ Decreasing the width of the projection (w) decreases the probability of collision for any two points. 
+ Thus, it has the same effect as increasing k . As a result, we would like to set w as small as possible
+ and in this way decrease the number of projections we need to make. 
+ </pre>
+ 
+ In the literature, the quantization parameter (or width of the projection) is found empirically given a sample of
+ the data and the likely threshold of for the metric.  Tuning this parameter is very important for the performance
+ of this algorithm.
+ 
+ <p>
+ Consider the following example where we input some 3-dimensional points and a set of 3-dimensional queries
+ and find the nearest neighbors of the query points:
+ <pre>
+ -- Create a L2PStableHash of 
+ --   3 dimensional data
+ --   projection width of 200
+ --   1 internal hashes 
+ --   family of 5 hashes
+ --   with a seed of 0
+ 
+ -- This creates a bag of tuples:
+ --   lsh_id:Integer the family ID (in this case, 0-4)
+ --   hash:Long the hash 
+ 
+ define LSH datafu.pig.hash.lsh.L2PStableHash('3', '200', '1', '5', '0');
+ define METRIC datafu.pig.hash.lsh.metric.L2();
+
+ PTS = LOAD 'input' AS (dim1:double, dim2:double, dim3:double);
+ 
+ --hash the input points
+ PTS_HASHED = foreach PTS generate TOTUPLE(dim1, dim2, dim3) as pt
+                    , FLATTEN(LSH(TOTUPLE(dim1, dim2, dim3)));
+ 
+ -- the hash family ID and the hash should group the input points into partitions
+ PARTITIONS = group PTS_HASHED by (lsh_id, hash);
+ 
+ -- take in the query points and hash them
+ QUERIES = LOAD 'queries' as (dim1:double, dim2:double, dim3:double);
+ QUERIES_HASHED = foreach QUERIES generate TOTUPLE(dim1, dim2, dim3) as query_pt
+                        , FLATTEN(LSH(TOTUPLE(dim1, dim2, dim3)))
+                        ;
+ 
+ -- join the hashed query points with the (presumably larger) list of input data split by partitions
+ QUERIES_W_PARTS = join QUERIES_HASHED by (lsh_id, hash), PARTITIONS by (group.$0, group.$1);
+ 
+ -- Now, use the appropriate METRIC UDF (in this case L2 (aka Euclidean) distance) to find the first point within
+ -- a parameterized threshold (in this case, 1000).  It takes:
+ --   query_pt:Tuple the query point
+ --   threshold:Double the threshold, so that if the distance between the query point and a point
+ --                    in the partition is less than this threshold, it returns the point (and stops searching)
+ --   partition:Bag The bag of tuples in the partition.
+ 
+ tuples from 
+ NEAR_NEIGHBORS = foreach QUERIES_W_PARTS generate query_pt as query_pt
+                                                 , METRIC(query_pt, 1000, PTS_HASHED) as neighbor
+                                                 ;
+ describe NEAR_NEIGHBORS;
+ describe NEAR_NEIGHBORS;
+ -- {query_pt: (dim1: double,dim2: double,dim3: double)
+ -- ,neighbor: (pt: (dim1: double,dim2: double,dim3: double)
+ --            ,lsh::lsh_id: int
+ --            ,lsh::hash: long
+ --            )
+ -- }
+ 
+ -- project out the query and the matching point
+ NEIGHBORS_PROJ = foreach NEAR_NEIGHBORS {
+  generate query_pt as query_pt, neighbor.pt as matching_pts;
+ };
+ 
+ -- Filter out the hashes which resulted in no matches
+ NOT_NULL = filter NEIGHBORS_PROJ by SIZE(matching_pts) &gt; 0;
+ 
+ -- group by the query
+ NEIGHBORS_GRP = group NOT_NULL by query_pt;
+ describe NEIGHBORS_GRP;
+ 
+ -- Generate the query, the number of matches and the bag of matching points
+ NEIGHBOR_CNT = foreach NEIGHBORS_GRP{
+    MATCHING_PTS = foreach NOT_NULL generate FLATTEN(matching_pts);
+    DIST_MATCHING_PTS = DISTINCT MATCHING_PTS;
+    generate group as query_pt, COUNT(NOT_NULL), DIST_MATCHING_PTS;
+ };
+ describe NEIGHBOR_CNT;
+ -- NEIGHBOR_CNT: {query_pt: (dim1: double,dim2: double,dim3: double)
+ --               ,long
+ --               ,DIST_MATCHING_PTS: { (matching_pts::dim1: double,matching_pts::dim2: double,matching_pts::dim3: double)
+ --                              }
+ --               }
+ STORE NEIGHBOR_CNT INTO 'neighbors';
+ </pre></div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>sDim</code> - Dimension of the vectors</dd><dd><code>sW</code> - A double representing the quantization parameter (also known as the projection width)</dd><dd><code>sRepeat</code> - Number of internal repetitions (generally this should be 1 as the p-stable hashes have a larger range than one bit)</dd><dd><code>sNumHashes</code> - Size of the hash family (if you're looking for k near neighbors, this is the k)</dd><dd><code>sSeed</code> - Seed to use when constructing LSH family</dd></dl>
+</li>
+</ul>
+<a name="L2PStableHash(java.lang.String, java.lang.String, java.lang.String, java.lang.String)">
+<!--   -->
+</a>
+<ul class="blockListLast">
+<li class="blockList">
+<h4>L2PStableHash</h4>
+<pre>public&nbsp;L2PStableHash(java.lang.String&nbsp;sDim,
+             java.lang.String&nbsp;sW,
+             java.lang.String&nbsp;sRepeat,
+             java.lang.String&nbsp;sNumHashes)</pre>
+</li>
+</ul>
+</li>
+</ul>
+<!-- ============ METHOD DETAIL ========== -->
+<ul class="blockList">
+<li class="blockList"><a name="method_detail">
+<!--   -->
+</a>
+<h3>Method Detail</h3>
+<a name="createLSHCreator()">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>createLSHCreator</h4>
+<pre>protected&nbsp;<a href="../../../../datafu/pig/hash/lsh/interfaces/LSHCreator.html" title="class in datafu.pig.hash.lsh.interfaces">LSHCreator</a>&nbsp;createLSHCreator()</pre>
+<dl>
+<dt><strong>Specified by:</strong></dt>
+<dd><code><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html#createLSHCreator()">createLSHCreator</a></code>&nbsp;in class&nbsp;<code><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html" title="class in datafu.pig.hash.lsh">LSHFunc</a></code></dd>
+</dl>
+</li>
+</ul>
+<a name="getDimension()">
+<!--   -->
+</a>
+<ul class="blockListLast">
+<li class="blockList">
+<h4>getDimension</h4>
+<pre>protected&nbsp;int&nbsp;getDimension()</pre>
+<dl>
+<dt><strong>Specified by:</strong></dt>
+<dd><code><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html#getDimension()">getDimension</a></code>&nbsp;in class&nbsp;<code><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html" title="class in datafu.pig.hash.lsh">LSHFunc</a></code></dd>
+</dl>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+</div>
+<!-- ========= END OF CLASS DATA ========= -->
+<!-- ======= START OF BOTTOM NAVBAR ====== -->
+<div class="bottomNav"><a name="navbar_bottom">
+<!--   -->
+</a><a href="#skip-navbar_bottom" title="Skip navigation links"></a><a name="navbar_bottom_firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="../../../../overview-summary.html">Overview</a></li>
+<li><a href="package-summary.html">Package</a></li>
+<li class="navBarCell1Rev">Class</li>
+<li><a href="package-tree.html">Tree</a></li>
+<li><a href="../../../../deprecated-list.html">Deprecated</a></li>
+<li><a href="../../../../index-all.html">Index</a></li>
+<li><a href="../../../../help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li><a href="../../../../datafu/pig/hash/lsh/L1PStableHash.html" title="class in datafu.pig.hash.lsh"><span class="strong">Prev Class</span></a></li>
+<li><a href="../../../../datafu/pig/hash/lsh/LSHFamily.html" title="class in datafu.pig.hash.lsh"><span class="strong">Next Class</span></a></li>
+</ul>
+<ul class="navList">
+<li><a href="../../../../index.html?datafu/pig/hash/lsh/L2PStableHash.html" target="_top">Frames</a></li>
+<li><a href="L2PStableHash.html" target="_top">No Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_bottom">
+<li><a href="../../../../allclasses-noframe.html">All Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_bottom");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<div>
+<ul class="subNavList">
+<li>Summary:&nbsp;</li>
+<li><a href="#nested_classes_inherited_from_class_org.apache.pig.EvalFunc">Nested</a>&nbsp;|&nbsp;</li>
+<li><a href="#fields_inherited_from_class_datafu.pig.hash.lsh.LSHFunc">Field</a>&nbsp;|&nbsp;</li>
+<li><a href="#constructor_summary">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method_summary">Method</a></li>
+</ul>
+<ul class="subNavList">
+<li>Detail:&nbsp;</li>
+<li>Field&nbsp;|&nbsp;</li>
+<li><a href="#constructor_detail">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method_detail">Method</a></li>
+</ul>
+</div>
+<a name="skip-navbar_bottom">
+<!--   -->
+</a></div>
+<!-- ======== END OF BOTTOM NAVBAR ======= -->
+</body>
+</html>

Added: incubator/datafu/site/docs/datafu/1.3.1/datafu/pig/hash/lsh/LSHFamily.html
URL: http://svn.apache.org/viewvc/incubator/datafu/site/docs/datafu/1.3.1/datafu/pig/hash/lsh/LSHFamily.html?rev=1755883&view=auto
==============================================================================
--- incubator/datafu/site/docs/datafu/1.3.1/datafu/pig/hash/lsh/LSHFamily.html (added)
+++ incubator/datafu/site/docs/datafu/1.3.1/datafu/pig/hash/lsh/LSHFamily.html Wed Aug 10 22:07:27 2016
@@ -0,0 +1,264 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<!-- NewPage -->
+<html lang="en">
+<head>
+<!-- Generated by javadoc (version 1.7.0_79) on Wed Aug 10 15:01:03 PDT 2016 -->
+<title>LSHFamily (datafu-pig 1.3.1 API)</title>
+<meta name="date" content="2016-08-10">
+<link rel="stylesheet" type="text/css" href="../../../../stylesheet.css" title="Style">
+</head>
+<body>
+<script type="text/javascript"><!--
+    if (location.href.indexOf('is-external=true') == -1) {
+        parent.document.title="LSHFamily (datafu-pig 1.3.1 API)";
+    }
+//-->
+</script>
+<noscript>
+<div>JavaScript is disabled on your browser.</div>
+</noscript>
+<!-- ========= START OF TOP NAVBAR ======= -->
+<div class="topNav"><a name="navbar_top">
+<!--   -->
+</a><a href="#skip-navbar_top" title="Skip navigation links"></a><a name="navbar_top_firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="../../../../overview-summary.html">Overview</a></li>
+<li><a href="package-summary.html">Package</a></li>
+<li class="navBarCell1Rev">Class</li>
+<li><a href="package-tree.html">Tree</a></li>
+<li><a href="../../../../deprecated-list.html">Deprecated</a></li>
+<li><a href="../../../../index-all.html">Index</a></li>
+<li><a href="../../../../help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li><a href="../../../../datafu/pig/hash/lsh/L2PStableHash.html" title="class in datafu.pig.hash.lsh"><span class="strong">Prev Class</span></a></li>
+<li><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html" title="class in datafu.pig.hash.lsh"><span class="strong">Next Class</span></a></li>
+</ul>
+<ul class="navList">
+<li><a href="../../../../index.html?datafu/pig/hash/lsh/LSHFamily.html" target="_top">Frames</a></li>
+<li><a href="LSHFamily.html" target="_top">No Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_top">
+<li><a href="../../../../allclasses-noframe.html">All Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_top");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<div>
+<ul class="subNavList">
+<li>Summary:&nbsp;</li>
+<li>Nested&nbsp;|&nbsp;</li>
+<li>Field&nbsp;|&nbsp;</li>
+<li><a href="#constructor_summary">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method_summary">Method</a></li>
+</ul>
+<ul class="subNavList">
+<li>Detail:&nbsp;</li>
+<li>Field&nbsp;|&nbsp;</li>
+<li><a href="#constructor_detail">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method_detail">Method</a></li>
+</ul>
+</div>
+<a name="skip-navbar_top">
+<!--   -->
+</a></div>
+<!-- ========= END OF TOP NAVBAR ========= -->
+<!-- ======== START OF CLASS DATA ======== -->
+<div class="header">
+<div class="subTitle">datafu.pig.hash.lsh</div>
+<h2 title="Class LSHFamily" class="title">Class LSHFamily</h2>
+</div>
+<div class="contentContainer">
+<ul class="inheritance">
+<li>java.lang.Object</li>
+<li>
+<ul class="inheritance">
+<li>datafu.pig.hash.lsh.LSHFamily</li>
+</ul>
+</li>
+</ul>
+<div class="description">
+<ul class="blockList">
+<li class="blockList">
+<hr>
+<br>
+<pre>public class <span class="strong">LSHFamily</span>
+extends java.lang.Object</pre>
+<div class="block">A family of k locality sensitive hashes.  For a given point, k hashes will be computed.</div>
+</li>
+</ul>
+</div>
+<div class="summary">
+<ul class="blockList">
+<li class="blockList">
+<!-- ======== CONSTRUCTOR SUMMARY ======== -->
+<ul class="blockList">
+<li class="blockList"><a name="constructor_summary">
+<!--   -->
+</a>
+<h3>Constructor Summary</h3>
+<table class="overviewSummary" border="0" cellpadding="3" cellspacing="0" summary="Constructor Summary table, listing constructors, and an explanation">
+<caption><span>Constructors</span><span class="tabEnd">&nbsp;</span></caption>
+<tr>
+<th class="colOne" scope="col">Constructor and Description</th>
+</tr>
+<tr class="altColor">
+<td class="colOne"><code><strong><a href="../../../../datafu/pig/hash/lsh/LSHFamily.html#LSHFamily(java.util.List)">LSHFamily</a></strong>(java.util.List&lt;<a href="../../../../datafu/pig/hash/lsh/interfaces/LSH.html" title="class in datafu.pig.hash.lsh.interfaces">LSH</a>&gt;&nbsp;hashes)</code>
+<div class="block">Construct a family of hashes</div>
+</td>
+</tr>
+</table>
+</li>
+</ul>
+<!-- ========== METHOD SUMMARY =========== -->
+<ul class="blockList">
+<li class="blockList"><a name="method_summary">
+<!--   -->
+</a>
+<h3>Method Summary</h3>
+<table class="overviewSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation">
+<caption><span>Methods</span><span class="tabEnd">&nbsp;</span></caption>
+<tr>
+<th class="colFirst" scope="col">Modifier and Type</th>
+<th class="colLast" scope="col">Method and Description</th>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>java.lang.Iterable&lt;java.lang.Long&gt;</code></td>
+<td class="colLast"><code><strong><a href="../../../../datafu/pig/hash/lsh/LSHFamily.html#apply(org.apache.commons.math.linear.RealVector)">apply</a></strong>(org.apache.commons.math.linear.RealVector&nbsp;vector)</code>
+<div class="block">Compute the family of k-hashes for a vector.</div>
+</td>
+</tr>
+</table>
+<ul class="blockList">
+<li class="blockList"><a name="methods_inherited_from_class_java.lang.Object">
+<!--   -->
+</a>
+<h3>Methods inherited from class&nbsp;java.lang.Object</h3>
+<code>clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait</code></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+<div class="details">
+<ul class="blockList">
+<li class="blockList">
+<!-- ========= CONSTRUCTOR DETAIL ======== -->
+<ul class="blockList">
+<li class="blockList"><a name="constructor_detail">
+<!--   -->
+</a>
+<h3>Constructor Detail</h3>
+<a name="LSHFamily(java.util.List)">
+<!--   -->
+</a>
+<ul class="blockListLast">
+<li class="blockList">
+<h4>LSHFamily</h4>
+<pre>public&nbsp;LSHFamily(java.util.List&lt;<a href="../../../../datafu/pig/hash/lsh/interfaces/LSH.html" title="class in datafu.pig.hash.lsh.interfaces">LSH</a>&gt;&nbsp;hashes)</pre>
+<div class="block">Construct a family of hashes</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>hashes</code> - Hashes which will be applied in turn to a given point</dd></dl>
+</li>
+</ul>
+</li>
+</ul>
+<!-- ============ METHOD DETAIL ========== -->
+<ul class="blockList">
+<li class="blockList"><a name="method_detail">
+<!--   -->
+</a>
+<h3>Method Detail</h3>
+<a name="apply(org.apache.commons.math.linear.RealVector)">
+<!--   -->
+</a>
+<ul class="blockListLast">
+<li class="blockList">
+<h4>apply</h4>
+<pre>public&nbsp;java.lang.Iterable&lt;java.lang.Long&gt;&nbsp;apply(org.apache.commons.math.linear.RealVector&nbsp;vector)</pre>
+<div class="block">Compute the family of k-hashes for a vector.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>vector</code> - the vector</dd>
+<dt><span class="strong">Returns:</span></dt><dd>An iterable of hashes</dd></dl>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+</div>
+<!-- ========= END OF CLASS DATA ========= -->
+<!-- ======= START OF BOTTOM NAVBAR ====== -->
+<div class="bottomNav"><a name="navbar_bottom">
+<!--   -->
+</a><a href="#skip-navbar_bottom" title="Skip navigation links"></a><a name="navbar_bottom_firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="../../../../overview-summary.html">Overview</a></li>
+<li><a href="package-summary.html">Package</a></li>
+<li class="navBarCell1Rev">Class</li>
+<li><a href="package-tree.html">Tree</a></li>
+<li><a href="../../../../deprecated-list.html">Deprecated</a></li>
+<li><a href="../../../../index-all.html">Index</a></li>
+<li><a href="../../../../help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li><a href="../../../../datafu/pig/hash/lsh/L2PStableHash.html" title="class in datafu.pig.hash.lsh"><span class="strong">Prev Class</span></a></li>
+<li><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html" title="class in datafu.pig.hash.lsh"><span class="strong">Next Class</span></a></li>
+</ul>
+<ul class="navList">
+<li><a href="../../../../index.html?datafu/pig/hash/lsh/LSHFamily.html" target="_top">Frames</a></li>
+<li><a href="LSHFamily.html" target="_top">No Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_bottom">
+<li><a href="../../../../allclasses-noframe.html">All Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_bottom");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<div>
+<ul class="subNavList">
+<li>Summary:&nbsp;</li>
+<li>Nested&nbsp;|&nbsp;</li>
+<li>Field&nbsp;|&nbsp;</li>
+<li><a href="#constructor_summary">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method_summary">Method</a></li>
+</ul>
+<ul class="subNavList">
+<li>Detail:&nbsp;</li>
+<li>Field&nbsp;|&nbsp;</li>
+<li><a href="#constructor_detail">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method_detail">Method</a></li>
+</ul>
+</div>
+<a name="skip-navbar_bottom">
+<!--   -->
+</a></div>
+<!-- ======== END OF BOTTOM NAVBAR ======= -->
+</body>
+</html>