You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ct...@apache.org on 2021/01/28 16:33:32 UTC

svn commit: r1070647 [14/35] - in /websites/production/lucene/content/solr/guide/8_8: ./ images/math-expressions/ meta-docs/

Modified: websites/production/lucene/content/solr/guide/8_8/machine-learning.html
==============================================================================
--- websites/production/lucene/content/solr/guide/8_8/machine-learning.html (original)
+++ websites/production/lucene/content/solr/guide/8_8/machine-learning.html Thu Jan 28 16:33:25 2021
@@ -8,7 +8,7 @@
 <meta name="description" content="">
 <meta name="keywords" content=" ">
 
-<title>Machine Learning | Apache Solr Reference Guide 8.8-DRAFT</title>
+<title>Machine Learning | Apache Solr Reference Guide 8.8</title>
 
 <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.1.3/css/bootstrap.min.css" integrity="sha384-MCw98/SFnGE8fJT3GXwEOngsV7Zt27NXFoaoApmYm81iuXoPkFOJwJ8ERdknLPMO" crossorigin="anonymous">
 <link rel="stylesheet" type="text/css" href="https://maxcdn.bootstrapcdn.com/font-awesome/4.5.0/css/font-awesome.min.css">
@@ -68,7 +68,7 @@
 
     </script>
 </head>
-<body class="DRAFT" id="machine-learning">
+<body class="" id="machine-learning">
 <div class="container-fluid">
   <div class="row">
   <nav id="sidebar" class="col-2 d-none d-md-block">
@@ -76,11 +76,6 @@
 <div class="sidebar-header">
   <div class="sidebarTitle text-center">Apache Solr Reference Guide</div>
   
-  <p class="draft-notice">
-    This is an unofficial DRAFT of the Guide for 8.8.
-    <a href="https://lucene.apache.org/solr/guide/">Official releases are available from the Solr website</a>.
-  </p>
-  
 
   <!--comment out this block if you want to hide search-->
     <!--start search-->
@@ -872,11 +867,36 @@
       </li>
       
       <li class="sb-level2">
-        <a href="math-expressions.html">Math Expressions</a>
+        <a href="math-expressions.html">Streaming Expressions and Math Expressions</a>
         
         <ul>
           
           <li class="sb-level3">
+            <a href="visualization.html">Visualization</a>
+            
+          </li>
+          
+          <li class="sb-level3">
+            <a href="math-start.html">Getting Started</a>
+            
+          </li>
+          
+          <li class="sb-level3">
+            <a href="loading.html">Loading Data</a>
+            
+          </li>
+          
+          <li class="sb-level3">
+            <a href="search-sample.html">Searching, Sampling and Aggregation</a>
+            
+          </li>
+          
+          <li class="sb-level3">
+            <a href="transform.html">Transforming Data</a>
+            
+          </li>
+          
+          <li class="sb-level3">
             <a href="scalar-math.html">Scalar Math</a>
             
           </li>
@@ -897,12 +917,12 @@
           </li>
           
           <li class="sb-level3">
-            <a href="vectorization.html">Streams and Vectorization</a>
+            <a href="term-vectors.html">Text Analysis and Term Vectors</a>
             
           </li>
           
           <li class="sb-level3">
-            <a href="term-vectors.html">Text Analysis and Term Vectors</a>
+            <a href="probability-distributions.html">Probability Distributions</a>
             
           </li>
           
@@ -912,12 +932,12 @@
           </li>
           
           <li class="sb-level3">
-            <a href="probability-distributions.html">Probability Distributions</a>
+            <a href="regression.html">Linear Regression</a>
             
           </li>
           
           <li class="sb-level3">
-            <a href="simulations.html">Monte Carlo Simulations</a>
+            <a href="curve-fitting.html">Curve Fitting</a>
             
           </li>
           
@@ -927,32 +947,32 @@
           </li>
           
           <li class="sb-level3">
-            <a href="regression.html">Linear Regression</a>
+            <a href="numerical-analysis.html">Interpolation, Derivatives and Integrals</a>
             
           </li>
           
           <li class="sb-level3">
-            <a href="numerical-analysis.html">Interpolation, Derivatives and Integrals</a>
+            <a href="dsp.html">Digital Signal Processing</a>
             
           </li>
           
           <li class="sb-level3">
-            <a href="curve-fitting.html">Curve Fitting</a>
+            <a href="simulations.html">Monte Carlo Simulations</a>
             
           </li>
           
           <li class="sb-level3">
-            <a href="dsp.html">Digital Signal Processing</a>
+            <a href="machine-learning.html">Machine Learning</a>
             
           </li>
           
           <li class="sb-level3">
-            <a href="machine-learning.html">Machine Learning</a>
+            <a href="computational-geometry.html">Computational Geometry</a>
             
           </li>
           
           <li class="sb-level3">
-            <a href="computational-geometry.html">Computational Geometry</a>
+            <a href="logs.html">Log Analytics</a>
             
           </li>
           
@@ -1654,29 +1674,38 @@
   
   <nav class="toc float-right justify-content-end">
     <ul class="sectlevel1">
-<li><a href="#feature-scaling">Feature Scaling</a>
+<li><a href="#distance-and-distance-matrices">Distance and Distance Matrices</a>
 <ul class="sectlevel2">
-<li><a href="#minmax-scaling">Min/Max Scaling</a></li>
-<li><a href="#standardization">Standardization</a></li>
-<li><a href="#unit-vectors">Unit Vectors</a></li>
+<li><a href="#distance-matrices">Distance Matrices</a></li>
+</ul>
+</li>
+<li><a href="#k-nearest-neighbor-knn">K-Nearest Neighbor (KNN)</a></li>
+<li><a href="#k-nearest-neighbor-regression">K-Nearest Neighbor Regression</a>
+<ul class="sectlevel2">
+<li><a href="#2d-non-linear-regression">2D Non-Linear Regression</a></li>
+<li><a href="#multivariate-non-linear-regression">Multivariate Non-Linear Regression</a></li>
+</ul>
+</li>
+<li><a href="#knnsearch">knnSearch</a></li>
+<li><a href="#dbscan">DBSCAN</a>
+<ul class="sectlevel2">
+<li><a href="#2d-cluster-visualization">2D Cluster Visualization</a></li>
 </ul>
 </li>
-<li><a href="#distance-and-distance-measures">Distance and Distance Measures</a></li>
 <li><a href="#k-means-clustering">K-Means Clustering</a>
 <ul class="sectlevel2">
-<li><a href="#centroid-features">Centroid Features</a></li>
-<li><a href="#cluster-features">Cluster Features</a></li>
+<li><a href="#clustered-scatter-plot">Clustered Scatter Plot</a></li>
+<li><a href="#plotting-the-centroids">Plotting the Centroids</a></li>
+<li><a href="#phrase-extraction">Phrase Extraction</a></li>
 </ul>
 </li>
 <li><a href="#multi-k-means-clustering">Multi K-Means Clustering</a></li>
 <li><a href="#fuzzy-k-means-clustering">Fuzzy K-Means Clustering</a></li>
-<li><a href="#k-nearest-neighbor-knn">K-Nearest Neighbor (KNN)</a></li>
-<li><a href="#k-nearest-neighbor-regression">K-Nearest Neighbor Regression</a>
+<li><a href="#feature-scaling">Feature Scaling</a>
 <ul class="sectlevel2">
-<li><a href="#prediction-and-residuals">Prediction and Residuals</a></li>
-<li><a href="#setting-feature-scaling">Setting Feature Scaling</a></li>
-<li><a href="#setting-robust-regression">Setting Robust Regression</a></li>
-<li><a href="#setting-the-distance-measure">Setting the Distance Measure</a></li>
+<li><a href="#minmax-scaling">Min/Max Scaling</a></li>
+<li><a href="#standardization">Standardization</a></li>
+<li><a href="#unit-vectors">Unit Vectors</a></li>
 </ul>
 </li>
 </ul>
@@ -1686,124 +1715,16 @@
   <section class="content">
      <section id="preamble" aria-label="Preamble"><p>This section of the math expressions user guide covers machine learning
 functions.</p></section>
-<section class="sect1"><h2 id="feature-scaling">Feature Scaling</h2><p>Before performing machine learning operations its often necessary to
-scale the feature vectors so they can be compared at the same scale.</p>
-<p>All the scaling function operate on vectors and matrices.
-When operating on a matrix the rows of the matrix are scaled.</p>
-<section class="sect2"><h3 id="minmax-scaling">Min/Max Scaling</h3><p>The <code>minMaxScale</code> function scales a vector or matrix between a minimum and maximum value.
-By default it will scale between 0 and 1 if min/max values are not provided.</p>
-<p>Below is a simple example of min/max scaling between 0 and 1.
-Notice that once brought into the same scale the vectors are the same.</p>
-<div class="listingblock"><pre class="rouge highlight"><code class="language-text" data-lang="text"><pre class="highlight"><code>let(a=array(20, 30, 40, 50),
-    b=array(200, 300, 400, 500),
-    c=matrix(a, b),
-    d=minMaxScale(c))</code></pre></code></pre></div>
-<p>This expression returns the following response:</p>
-<div class="listingblock"><pre class="rouge highlight"><code class="language-json" data-lang="json"><pre class="highlight"><code><span style="background-color: #f8f8f8">{</span><span style="color: #bbbbbb">
-  </span><span style="color: #000080">"result-set"</span><span style="background-color: #f8f8f8">:</span><span style="color: #bbbbbb"> </span><span style="background-color: #f8f8f8">{</span><span style="color: #bbbbbb">
-    </span><span style="color: #000080">"docs"</span><span style="background-color: #f8f8f8">:</span><span style="color: #bbbbbb"> </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
-      </span><span style="background-color: #f8f8f8">{</span><span style="color: #bbbbbb">
-        </span><span style="color: #000080">"d"</span><span style="background-color: #f8f8f8">:</span><span style="color: #bbbbbb"> </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
-          </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
-            </span><span style="color: #009999">0</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #009999">0.3333333333333333</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #009999">0.6666666666666666</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #009999">1</span><span style="color: #bbbbbb">
-          </span><span style="background-color: #f8f8f8">],</span><span style="color: #bbbbbb">
-          </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
-            </span><span style="color: #009999">0</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #009999">0.3333333333333333</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #009999">0.6666666666666666</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #009999">1</span><span style="color: #bbbbbb">
-          </span><span style="background-color: #f8f8f8">]</span><span style="color: #bbbbbb">
-        </span><span style="background-color: #f8f8f8">]</span><span style="color: #bbbbbb">
-      </span><span style="background-color: #f8f8f8">},</span><span style="color: #bbbbbb">
-      </span><span style="background-color: #f8f8f8">{</span><span style="color: #bbbbbb">
-        </span><span style="color: #000080">"EOF"</span><span style="background-color: #f8f8f8">:</span><span style="color: #bbbbbb"> </span><span style="color: #000000;font-weight: bold">true</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-        </span><span style="color: #000080">"RESPONSE_TIME"</span><span style="background-color: #f8f8f8">:</span><span style="color: #bbbbbb"> </span><span style="color: #009999">0</span><span style="color: #bbbbbb">
-      </span><span style="background-color: #f8f8f8">}</span><span style="color: #bbbbbb">
-    </span><span style="background-color: #f8f8f8">]</span><span style="color: #bbbbbb">
-  </span><span style="background-color: #f8f8f8">}</span><span style="color: #bbbbbb">
-</span><span style="background-color: #f8f8f8">}</span></code></pre></code></pre></div></section>
-<section class="sect2"><h3 id="standardization">Standardization</h3><p>The <code>standardize</code> function scales a vector so that it has a mean of 0 and a standard deviation of 1.
-Standardization can be used with machine learning algorithms, such as
-<a href="https://en.wikipedia.org/wiki/Support_vector_machine">Support Vector Machine (SVM)</a>, that perform better
-when the data has a normal distribution.</p>
-<div class="listingblock"><pre class="rouge highlight"><code class="language-text" data-lang="text"><pre class="highlight"><code>let(a=array(20, 30, 40, 50),
-    b=array(200, 300, 400, 500),
-    c=matrix(a, b),
-    d=standardize(c))</code></pre></code></pre></div>
-<p>This expression returns the following response:</p>
-<div class="listingblock"><pre class="rouge highlight"><code class="language-json" data-lang="json"><pre class="highlight"><code><span style="background-color: #f8f8f8">{</span><span style="color: #bbbbbb">
-  </span><span style="color: #000080">"result-set"</span><span style="background-color: #f8f8f8">:</span><span style="color: #bbbbbb"> </span><span style="background-color: #f8f8f8">{</span><span style="color: #bbbbbb">
-    </span><span style="color: #000080">"docs"</span><span style="background-color: #f8f8f8">:</span><span style="color: #bbbbbb"> </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
-      </span><span style="background-color: #f8f8f8">{</span><span style="color: #bbbbbb">
-        </span><span style="color: #000080">"d"</span><span style="background-color: #f8f8f8">:</span><span style="color: #bbbbbb"> </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
-          </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
-            </span><span style="color: #009999">-1.161895003862225</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #009999">-0.3872983346207417</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #009999">0.3872983346207417</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #009999">1.161895003862225</span><span style="color: #bbbbbb">
-          </span><span style="background-color: #f8f8f8">],</span><span style="color: #bbbbbb">
-          </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
-            </span><span style="color: #009999">-1.1618950038622249</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #009999">-0.38729833462074165</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #009999">0.38729833462074165</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #009999">1.1618950038622249</span><span style="color: #bbbbbb">
-          </span><span style="background-color: #f8f8f8">]</span><span style="color: #bbbbbb">
-        </span><span style="background-color: #f8f8f8">]</span><span style="color: #bbbbbb">
-      </span><span style="background-color: #f8f8f8">},</span><span style="color: #bbbbbb">
-      </span><span style="background-color: #f8f8f8">{</span><span style="color: #bbbbbb">
-        </span><span style="color: #000080">"EOF"</span><span style="background-color: #f8f8f8">:</span><span style="color: #bbbbbb"> </span><span style="color: #000000;font-weight: bold">true</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-        </span><span style="color: #000080">"RESPONSE_TIME"</span><span style="background-color: #f8f8f8">:</span><span style="color: #bbbbbb"> </span><span style="color: #009999">17</span><span style="color: #bbbbbb">
-      </span><span style="background-color: #f8f8f8">}</span><span style="color: #bbbbbb">
-    </span><span style="background-color: #f8f8f8">]</span><span style="color: #bbbbbb">
-  </span><span style="background-color: #f8f8f8">}</span><span style="color: #bbbbbb">
-</span><span style="background-color: #f8f8f8">}</span></code></pre></code></pre></div></section>
-<section class="sect2"><h3 id="unit-vectors">Unit Vectors</h3><p>The <code>unitize</code> function scales vectors to a magnitude of 1. A vector with a
-magnitude of 1 is known as a unit vector. Unit vectors are preferred when the vector math deals
-with vector direction rather than magnitude.</p>
-<div class="listingblock"><pre class="rouge highlight"><code class="language-text" data-lang="text"><pre class="highlight"><code>let(a=array(20, 30, 40, 50),
-    b=array(200, 300, 400, 500),
-    c=matrix(a, b),
-    d=unitize(c))</code></pre></code></pre></div>
-<p>This expression returns the following response:</p>
-<div class="listingblock"><pre class="rouge highlight"><code class="language-json" data-lang="json"><pre class="highlight"><code><span style="background-color: #f8f8f8">{</span><span style="color: #bbbbbb">
-  </span><span style="color: #000080">"result-set"</span><span style="background-color: #f8f8f8">:</span><span style="color: #bbbbbb"> </span><span style="background-color: #f8f8f8">{</span><span style="color: #bbbbbb">
-    </span><span style="color: #000080">"docs"</span><span style="background-color: #f8f8f8">:</span><span style="color: #bbbbbb"> </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
-      </span><span style="background-color: #f8f8f8">{</span><span style="color: #bbbbbb">
-        </span><span style="color: #000080">"d"</span><span style="background-color: #f8f8f8">:</span><span style="color: #bbbbbb"> </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
-          </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
-            </span><span style="color: #009999">0.2721655269759087</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #009999">0.40824829046386296</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #009999">0.5443310539518174</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #009999">0.6804138174397716</span><span style="color: #bbbbbb">
-          </span><span style="background-color: #f8f8f8">],</span><span style="color: #bbbbbb">
-          </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
-            </span><span style="color: #009999">0.2721655269759087</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #009999">0.4082482904638631</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #009999">0.5443310539518174</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #009999">0.6804138174397717</span><span style="color: #bbbbbb">
-          </span><span style="background-color: #f8f8f8">]</span><span style="color: #bbbbbb">
-        </span><span style="background-color: #f8f8f8">]</span><span style="color: #bbbbbb">
-      </span><span style="background-color: #f8f8f8">},</span><span style="color: #bbbbbb">
-      </span><span style="background-color: #f8f8f8">{</span><span style="color: #bbbbbb">
-        </span><span style="color: #000080">"EOF"</span><span style="background-color: #f8f8f8">:</span><span style="color: #bbbbbb"> </span><span style="color: #000000;font-weight: bold">true</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-        </span><span style="color: #000080">"RESPONSE_TIME"</span><span style="background-color: #f8f8f8">:</span><span style="color: #bbbbbb"> </span><span style="color: #009999">6</span><span style="color: #bbbbbb">
-      </span><span style="background-color: #f8f8f8">}</span><span style="color: #bbbbbb">
-    </span><span style="background-color: #f8f8f8">]</span><span style="color: #bbbbbb">
-  </span><span style="background-color: #f8f8f8">}</span><span style="color: #bbbbbb">
-</span><span style="background-color: #f8f8f8">}</span></code></pre></code></pre></div></section></section>
-<section class="sect1"><h2 id="distance-and-distance-measures">Distance and Distance Measures</h2><p>The <code>distance</code> function computes the distance for two numeric arrays or a distance matrix for the columns of a matrix.</p>
-<p>There are five distance measure functions that return a function that performs the actual distance calculation:</p>
-<div class="ulist"><ul><li><p><code>euclidean</code> (default)</p></li><li><p><code>manhattan</code></p></li><li><p><code>canberra</code></p></li><li><p><code>earthMovers</code></p></li><li><p><code>haversineMeters</code> (Geospatial distance measure)</p></li></ul></div>
+<section class="sect1"><h2 id="distance-and-distance-matrices">Distance and Distance Matrices</h2><p>The <code>distance</code> function computes the distance for two numeric arrays or a distance matrix for the columns of a matrix.</p>
+<p>There are six distance measure functions that return a function that performs the actual distance calculation:</p>
+<div class="ulist"><ul><li><p><code>euclidean</code> (default)</p></li><li><p><code>manhattan</code></p></li><li><p><code>canberra</code></p></li><li><p><code>earthMovers</code></p></li><li><p><code>cosine</code></p></li><li><p><code>haversineMeters</code> (Geospatial distance measure)</p></li></ul></div>
 <p>The distance measure functions can be used with all machine learning functions
 that support distance measures.</p>
 <p>Below is an example for computing Euclidean distance for two numeric arrays:</p>
 <div class="listingblock"><pre class="rouge highlight"><code class="language-text" data-lang="text"><pre class="highlight"><code>let(a=array(20, 30, 40, 50),
     b=array(21, 29, 41, 49),
     c=distance(a, b))</code></pre></code></pre></div>
-<p>This expression returns the following response:</p>
+<p>When this expression is sent to the <code>/stream</code> handler it responds with:</p>
 <div class="listingblock"><pre class="rouge highlight"><code class="language-json" data-lang="json"><pre class="highlight"><code><span style="background-color: #f8f8f8">{</span><span style="color: #bbbbbb">
   </span><span style="color: #000080">"result-set"</span><span style="background-color: #f8f8f8">:</span><span style="color: #bbbbbb"> </span><span style="background-color: #f8f8f8">{</span><span style="color: #bbbbbb">
     </span><span style="color: #000080">"docs"</span><span style="background-color: #f8f8f8">:</span><span style="color: #bbbbbb"> </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
@@ -1817,11 +1738,11 @@ that support distance measures.</p>
     </span><span style="background-color: #f8f8f8">]</span><span style="color: #bbbbbb">
   </span><span style="background-color: #f8f8f8">}</span><span style="color: #bbbbbb">
 </span><span style="background-color: #f8f8f8">}</span></code></pre></code></pre></div>
-<p>Below the distance is calculated using <strong>Manahattan</strong> distance.</p>
+<p>Below the distance is calculated using Manhattan distance.</p>
 <div class="listingblock"><pre class="rouge highlight"><code class="language-text" data-lang="text"><pre class="highlight"><code>let(a=array(20, 30, 40, 50),
     b=array(21, 29, 41, 49),
     c=distance(a, b, manhattan()))</code></pre></code></pre></div>
-<p>This expression returns the following response:</p>
+<p>When this expression is sent to the <code>/stream</code> handler it responds with:</p>
 <div class="listingblock"><pre class="rouge highlight"><code class="language-json" data-lang="json"><pre class="highlight"><code><span style="background-color: #f8f8f8">{</span><span style="color: #bbbbbb">
   </span><span style="color: #000080">"result-set"</span><span style="background-color: #f8f8f8">:</span><span style="color: #bbbbbb"> </span><span style="background-color: #f8f8f8">{</span><span style="color: #bbbbbb">
     </span><span style="color: #000080">"docs"</span><span style="background-color: #f8f8f8">:</span><span style="color: #bbbbbb"> </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
@@ -1835,488 +1756,525 @@ that support distance measures.</p>
     </span><span style="background-color: #f8f8f8">]</span><span style="color: #bbbbbb">
   </span><span style="background-color: #f8f8f8">}</span><span style="color: #bbbbbb">
 </span><span style="background-color: #f8f8f8">}</span></code></pre></code></pre></div>
-<p>Below is an example for computing a distance matrix for columns
-of a matrix:</p>
-<div class="listingblock"><pre class="rouge highlight"><code class="language-text" data-lang="text"><pre class="highlight"><code>let(a=array(20, 30, 40),
-    b=array(21, 29, 41),
-    c=array(31, 40, 50),
-    d=matrix(a, b, c),
-    c=distance(d))</code></pre></code></pre></div>
-<p>This expression returns the following response:</p>
-<div class="listingblock"><pre class="rouge highlight"><code class="language-json" data-lang="json"><pre class="highlight"><code><span style="background-color: #f8f8f8">{</span><span style="color: #bbbbbb">
-  </span><span style="color: #000080">"result-set"</span><span style="background-color: #f8f8f8">:</span><span style="color: #bbbbbb"> </span><span style="background-color: #f8f8f8">{</span><span style="color: #bbbbbb">
-    </span><span style="color: #000080">"docs"</span><span style="background-color: #f8f8f8">:</span><span style="color: #bbbbbb"> </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
-      </span><span style="background-color: #f8f8f8">{</span><span style="color: #bbbbbb">
-        </span><span style="color: #000080">"e"</span><span style="background-color: #f8f8f8">:</span><span style="color: #bbbbbb"> </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
-          </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
-            </span><span style="color: #009999">0</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #009999">15.652475842498529</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #009999">34.07345007480164</span><span style="color: #bbbbbb">
-          </span><span style="background-color: #f8f8f8">],</span><span style="color: #bbbbbb">
-          </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
-            </span><span style="color: #009999">15.652475842498529</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #009999">0</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #009999">18.547236990991408</span><span style="color: #bbbbbb">
-          </span><span style="background-color: #f8f8f8">],</span><span style="color: #bbbbbb">
-          </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
-            </span><span style="color: #009999">34.07345007480164</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #009999">18.547236990991408</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #009999">0</span><span style="color: #bbbbbb">
-          </span><span style="background-color: #f8f8f8">]</span><span style="color: #bbbbbb">
-        </span><span style="background-color: #f8f8f8">]</span><span style="color: #bbbbbb">
-      </span><span style="background-color: #f8f8f8">},</span><span style="color: #bbbbbb">
-      </span><span style="background-color: #f8f8f8">{</span><span style="color: #bbbbbb">
-        </span><span style="color: #000080">"EOF"</span><span style="background-color: #f8f8f8">:</span><span style="color: #bbbbbb"> </span><span style="color: #000000;font-weight: bold">true</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-        </span><span style="color: #000080">"RESPONSE_TIME"</span><span style="background-color: #f8f8f8">:</span><span style="color: #bbbbbb"> </span><span style="color: #009999">24</span><span style="color: #bbbbbb">
-      </span><span style="background-color: #f8f8f8">}</span><span style="color: #bbbbbb">
-    </span><span style="background-color: #f8f8f8">]</span><span style="color: #bbbbbb">
-  </span><span style="background-color: #f8f8f8">}</span><span style="color: #bbbbbb">
-</span><span style="background-color: #f8f8f8">}</span></code></pre></code></pre></div></section>
+<section class="sect2"><h3 id="distance-matrices">Distance Matrices</h3><p>Distance matrices are powerful tools for visualizing the distance
+between two or more
+vectors.</p>
+<p>The <code>distance</code> function builds a distance matrix
+if a matrix is passed as the parameter. The distance matrix is computed for the <strong>columns</strong>
+of the matrix.</p>
+<p>The example below demonstrates the power of distance matrices combined with 2 dimensional faceting.</p>
+<p>In this example the <code>facet2D</code> function is used to generate a two dimensional facet aggregation
+over the fields <code>complaint_type_s</code> and <code>zip_s</code> from the <code>nyc311</code> complaints database.
+The <strong>top 20</strong> complaint types and the <strong>top 25</strong> zip codes for each complaint type are aggregated.
+The result is a stream of tuples each containing the fields <code>complaint_type_s</code>, <code>zip_s</code> and the count for the pair.</p>
+<p>The <code>pivot</code> function is then used to pivot the fields into a <strong>matrix</strong> with the <code>zip_s</code>
+field as the <strong>rows</strong> and the <code>complaint_type_s</code> field as the <strong>columns</strong>. The <code>count(*)</code> field populates
+the values in the cells of the matrix.</p>
+<p>The <code>distance</code> function is then used to compute the distance matrix for the columns
+of the matrix using <code>cosine</code> distance. This produces a distance matrix
+that shows distance between complaint types based on the zip codes they appear in.</p>
+<p>Finally the <code>zplot</code> function is used to plot the distance matrix as a heat map. Notice that the
+heat map has been configured so that the intensity of color increases as the distance between vectors
+decreases.</p>
+<div class="imageblock"><img src="images/math-expressions/distance.png" alt="distance"></div>
+<p>The heat map is interactive, so mousing over one of the cells pops up the values
+for the cell.</p>
+<div class="imageblock"><img src="images/math-expressions/distanceview.png" alt="distanceview"></div>
+<p>Notice that HEAT/HOT WATER and UNSANITARY CONDITION complaints have a cosine distance of .1 (rounded to the nearest
+tenth).</p></section></section>
+<section class="sect1"><h2 id="k-nearest-neighbor-knn">K-Nearest Neighbor (KNN)</h2><p>The <code>knn</code> function searches the rows of a matrix with a search vector and
+returns a matrix of the k-nearest neighbors. This allows for secondary vector
+searches over result sets.</p>
+<p>The <code>knn</code> function supports changing of the distance measure by providing one of the following
+distance measure functions:</p>
+<div class="ulist"><ul><li><p><code>euclidean</code> (Default)</p></li><li><p><code>manhattan</code></p></li><li><p><code>canberra</code></p></li><li><p><code>earthMovers</code></p></li><li><p><code>cosine</code></p></li><li><p><code>haversineMeters</code> (Geospatial distance measure)</p></li></ul></div>
+<p>The example below shows how to perform a secondary search over an aggregation
+result set. The goal of the example is to find zip codes in the nyc311 complaint
+database that have similar complaint types to the zip code 10280.</p>
+<p>The first step in the example is to use the <code>facet2D</code> function to perform a two
+dimensional aggregation over the <code>zip_s</code> and <code>complaint_type_s</code> fields. In the example
+the top 119 zip codes and top 5 complaint types for each zip code are calculated
+for the borough of Manhattan. The result is a list of tuples each containing
+the <code>zip_s</code>, <code>complaint_type_s</code> and the <code>count(*)</code> for the combination.</p>
+<p>The list of tuples is then <strong>pivoted</strong> into a matrix with the <code>pivot</code> function.
+The <code>pivot</code> function in this example returns a matrix with rows of zip codes
+and columns of complaint types.
+The <code>count(*)</code> field from the tuples populates the cells of the matrix.
+This matrix will be used as the secondary search matrix.</p>
+<p>The next step is to locate the vector for the 10280 zip code.
+This is done in three steps in the example.
+The first step is to retrieve the row labels from the matrix with the <code>getRowLabels</code> function.
+The row labels in this case are zip codes which were populated by the <code>pivot</code> function.
+Then the <code>indexOf</code> function is used to find the <strong>index</strong> of the "10280" zip code in the list of row labels.
+The <code>rowAt</code> function is then used to return the vector at that <strong>index</strong> from the matrix.
+This vector is the <strong>search vector</strong>.</p>
+<p>Now that we have a matrix and search vector we can use the <code>knn</code> function to perform the search.
+In the example the <code>knn</code> function searches the matrix with the search vector with a K of 5, using
+<strong>cosine</strong> distance. Cosine distance is useful for comparing sparse vectors which is the case in this
+example. The <code>knn</code> function returns a matrix with the top 5 nearest neighbors to the search vector.</p>
+<p>The <code>knn</code> function populates the row and column labels of the return matrix and
+also adds a vector of <strong>distances</strong> for each row as an attribute to the matrix.</p>
+<p>In the example the <code>zplot</code> function extracts the row labels and
+the distance vector with the <code>getRowLabels</code> and <code>getAttribute</code> functions.
+The <code>topFeatures</code> function is used to extract
+the top 5 column labels for each zip code vector, based on the counts for each
+column. Then <code>zplot</code> outputs the data in a format that can be visualized in
+a table with Zeppelin-Solr.</p>
+<div class="imageblock"><img src="images/math-expressions/knn.png" alt="knn"></div>
+<p>The table above shows each zip code returned by the <code>knn</code> function along
+with the list of complaints and distances. These are the zip codes that are most similar
+to the 10280 zip code based on their top 5 complaint types.</p></section>
+<section class="sect1"><h2 id="k-nearest-neighbor-regression">K-Nearest Neighbor Regression</h2><p>K-nearest neighbor regression is a non-linear, bivariate and multivariate regression method.
+KNN regression is a lazy learning
+technique which means it does not fit a model to the training set in advance. Instead the
+entire training set of observations and outcomes are held in memory and predictions are made
+by averaging the outcomes of the k-nearest neighbors.</p>
+<p>The <code>knnRegress</code> function is used to perform nearest neighbor regression.</p>
+<section class="sect2"><h3 id="2d-non-linear-regression">2D Non-Linear Regression</h3><p>The example below shows the <strong>regression plot</strong> for KNN regression applied to a 2D scatter plot.</p>
+<p>In this example the <code>random</code> function is used to draw 500 random samples from the <code>logs</code> collection
+containing two fields <code>filesize_d</code> and <code>eresponse_d</code>. The sample is then vectorized with the
+<code>filesize_d</code> field stored in a vector assigned to variable <strong>x</strong> and the <code>eresponse_d</code> vector stored in
+variable <code>y</code>. The <code>knnRegress</code> function is then applied with <code>20</code> as the nearest neighbor parameter,
+which returns a KNN function which can be used to predict values.
+The <code>predict</code> function is then called on the KNN function to predict values for the original <code>x</code> vector.
+Finally <code>zplot</code> is used to plot the original <code>x</code> and <code>y</code> vectors along with the predictions.</p>
+<div class="imageblock"><img src="images/math-expressions/knnRegress.png" alt="knnRegress"></div>
+<p>Notice that the regression plot shows a non-linear relations ship between the <code>filesize_d</code>
+field and the <code>eresponse_d</code> field. Also note that KNN regression
+plots a non-linear curve through the scatter plot. The larger the size
+of K (nearest neighbors), the smoother the line.</p></section>
+<section class="sect2"><h3 id="multivariate-non-linear-regression">Multivariate Non-Linear Regression</h3><p>The <code>knnRegress</code> function is also a powerful and flexible tool for
+multi-variate non-linear regression.</p>
+<p>In the example below a multi-variate regression is performed using
+a database designed for analyzing and predicting wine quality. The
+database contains nearly 1600 records with 9 predictors of wine quality:
+pH, alcohol, fixed_acidity, sulphates, density, free_sulfur_dioxide,
+volatile_acidity, citric_acid, residual_sugar. There is also a field
+called quality assigned to each wine ranging
+from 3 to 8.</p>
+<p>KNN regression can be used to predict wine quality for vectors containing
+the predictor values.</p>
+<p>In the example a search is performed on the <code>redwine</code> collection to
+return all the rows in the database of observations. Then the quality field and
+predictor fields are read into vectors and set to variables.</p>
+<p>The predictor variables are added as rows to a matrix which is
+transposed so each row in the matrix contains one observation with the 9
+predictor values.
+This is our observation matrix which is assigned to the variable <code>obs</code>.</p>
+<p>Then the <code>knnRegress</code> function regresses the observations with quality outcomes.
+The value for K is set to 5 in the example, so the average quality of the 5
+nearest neighbors will be used to calculate the quality.</p>
+<p>The <code>predict</code> function is then used to generate a vector of predictions
+for the entire observation set. These predictions will be used to determine
+how well the KNN regression performed over the observation data.</p>
+<p>The error, or <strong>residuals</strong>, for the regression are then calculated by
+subtracting the <strong>predicted</strong> quality from the <strong>observed</strong> quality.
+The <code>ebeSubtract</code> function is used to perform the element-by-element
+subtraction between the two vectors.</p>
+<p>Finally the <code>zplot</code> function formats the predictions and errors for
+for the visualization of the <strong>residual plot</strong>.</p>
+<div class="imageblock"><img src="images/math-expressions/redwine1.png" alt="redwine1"></div>
+<p>The residual plot plots the <strong>predicted</strong> values on the x-axis and the <strong>error</strong> for the
+prediction on the y-axis. The scatter plot shows how the errors
+are distributed across the full range of predictions.</p>
+<p>The residual plot can be interpreted to understand how the KNN regression performed on the
+training data.</p>
+<div class="ulist"><ul><li><p>The plot shows the prediction error appears to be fairly evenly distributed
+above and below zero. The density of the errors increases as it approaches zero. The
+bubble size reflects the density of errors at the specific point in the plot.
+This provides an intuitive feel for the distribution of the model&#8217;s error.</p></li><li><p>The plot also visualizes the variance of the error across the range of
+predictions. This provides an intuitive understanding of whether the KNN predictions
+will have similar error variance across the full range predictions.</p></li></ul></div>
+<p>The residuals can also be visualized using a histogram to better understand
+the shape of the residuals distribution. The example below shows the same KNN
+regression as above with a plot of the distribution of the errors.</p>
+<p>In the example the <code>zplot</code> function is used to plot the <code>empiricalDistribution</code>
+function of the residuals, with an 11 bin histogram.</p>
+<div class="imageblock"><img src="images/math-expressions/redwine2.png" alt="redwine2"></div>
+<p>Notice that the errors follow a bell curve centered close to 0. From this plot
+we can see the probability of getting prediction errors between -1 and 1 is quite high.</p>
+<p><strong>Additional KNN Regression Parameters</strong></p>
+<p>The <code>knnRegression</code> function has three additional parameters that make it suitable for many different regression scenarios.</p>
+<div class="olist arabic"><ol class="arabic"><li><p>Any of the distance measures can be used for the regression simply by adding the function to the call.
+This allows for regression analysis over sparse vectors (<code>cosine</code>), dense vectors and geo-spatial lat/lon vectors (<code>haversineMeters</code>).</p><p>Sample syntax:</p>
+<div class="listingblock"><pre class="rouge highlight"><code class="language-text" data-lang="text"><pre class="highlight"><code>r=knnRegress(obs, quality, 5, cosine()),</code></pre></code></pre></div></li><li><p>The <code>robust</code> named parameter can be used to perform a regression analysis that is robust to outliers in the outcomes.
+When the <code>robust</code> parameter is used the median outcome of the k-nearest neighbors is used rather than the average.</p><p>Sample syntax:</p>
+<div class="listingblock"><pre class="rouge highlight"><code class="language-text" data-lang="text"><pre class="highlight"><code>r=knnRegress(obs, quality, 5, robust="true"),</code></pre></code></pre></div></li><li><p>The <code>scale</code> named parameter can be used to scale the columns of the observations and search vectors
+at prediction time. This can improve the performance of the KNN regression when the feature columns
+are at different scales causing the distance calculations to be place too much weight on the larger columns.</p><p>Sample syntax:</p>
+<div class="listingblock"><pre class="rouge highlight"><code class="language-text" data-lang="text"><pre class="highlight"><code>r=knnRegress(obs, quality, 5, scale="true"),</code></pre></code></pre></div></li></ol></div></section></section>
+<section class="sect1"><h2 id="knnsearch">knnSearch</h2><p>The <code>knnSearch</code> function returns the k-nearest neighbors
+for a document based on text similarity.
+Under the covers the <code>knnSearch</code> function uses Solr&#8217;s <a href="other-parsers.html#more-like-this-query-parser">More Like This</a> query parser plugin.
+This capability uses the search engine&#8217;s query, term statistics, scoring, and ranking capability to perform a fast, nearest neighbor search for similar documents over large distributed indexes.</p>
+<p>The results of this search can be used directly or provide <strong>candidates</strong> for machine learning operations such as a secondary KNN vector search.</p>
+<p>The example below shows the <code>knnSearch</code> function on a movie reviews data set. The search returns the 50 documents most similar to a specific document ID (<code>83e9b5b0&#8230;&#8203;</code>) based on the similarity of the <code>review_t</code> field.
+The <code>mindf</code> and <code>maxdf</code> specify the minimum and maximum document frequency of the terms used to perform the search.
+These parameters can make the query faster by eliminating high frequency terms and also improves accuracy by removing noise terms from the search.</p>
+<div class="imageblock"><img src="images/math-expressions/knnSearch.png" alt="knnSearch"></div>
+<div class="admonitionblock note">
+<table>
+<tr>
+<td class="icon">
+<i class="fa icon-note" title="Note"></i>
+</td>
+<td class="content">
+In this example the <code>select</code>
+function is used to truncate the review in the output to 220 characters to make it easier
+to read in a table.
+</td>
+</tr>
+</table>
+</div></section>
+<section class="sect1"><h2 id="dbscan">DBSCAN</h2><p>DBSCAN clustering is a powerful density-based clustering algorithm which is particularly well suited for geospatial clustering.
+DBSCAN uses two parameters to filter result sets to clusters of specific density:</p>
+<div class="ulist"><ul><li><p><code>eps</code> (Epsilon): Defines the distance between points to be considered as neighbors</p></li><li><p><code>min</code> points: The minimum number of points needed in a cluster for it to be returned.</p></li></ul></div>
+<section class="sect2"><h3 id="2d-cluster-visualization">2D Cluster Visualization</h3><p>The <code>zplot</code> function has direct support for plotting 2D clusters by using the <code>clusters</code> named parameter.</p>
+<p>The example below uses DBSCAN clustering and cluster visualization to find
+the <strong>hot spots</strong> on a map for rat sightings in the NYC 311 complaints database.</p>
+<p>In this example the <code>random</code> function draws a sample of records from the <code>nyc311</code> collection where
+the complaint description matches "rat sighting" and latitude is populated in the record.
+The latitude and longitude fields are then vectorized and added as rows to a matrix.
+The matrix is transposed so each row contains a single latitude, longitude
+point.
+The <code>dbscan</code> function is then used to cluster the latitude and longitude points.
+Notice that the <code>dbscan</code> function in the example has four parameters.</p>
+<div class="ulist"><ul><li><p><code>obs</code> : The observation matrix of lat/lon points</p></li><li><p><code>eps</code> : The distance between points to be considered a cluster. 100 meters in the example.</p></li><li><p><code>min points</code>: The minimum points in a cluster for the cluster to be returned by the function. <code>5</code> in the example.</p></li><li><p><code>distance measure</code>: An optional distance measure used to determine the
+distance between points. The default is Euclidean distance.
+The example uses <code>haversineMeters</code> which returns the distance in meters which is much more meaningful for geospatial use cases.</p></li></ul></div>
+<p>Finally, the <code>zplot</code> function is used to visualize the clusters on a map with Zeppelin-Solr.
+The map below has been zoomed to a specific area of Brooklyn with a high density of rat sightings.</p>
+<div class="imageblock"><img src="images/math-expressions/dbscan1.png" alt="dbscan1"></div>
+<p>Notice in the visualization that only 1019 points were returned from the 5000 samples.
+This is the power of the DBSCAN algorithm to filter records that don&#8217;t match the criteria
+of a cluster. The points that are plotted all belong to clearly defined clusters.</p>
+<p>The map visualization can be zoomed further to explore the locations of specific clusters.
+The example below shows a zoom into an area of dense clusters.</p>
+<div class="imageblock"><img src="images/math-expressions/dbscan2.png" alt="dbscan2"></div></section></section>
 <section class="sect1"><h2 id="k-means-clustering">K-Means Clustering</h2><p>The <code>kmeans</code> functions performs k-means clustering of the rows of a matrix.
 Once the clustering has been completed there are a number of useful functions available
-for examining the clusters and centroids.</p>
-<p>The examples below cluster <em>term vectors</em>.
+for examining and visualizing the clusters and centroids.</p>
+<section class="sect2"><h3 id="clustered-scatter-plot">Clustered Scatter Plot</h3><p>In this example we&#8217;ll again be clustering 2D lat/lon points of rat sightings. But unlike the DBSCAN example, k-means clustering
+does not on its own
+perform any noise reduction. So in order to reduce the noise a smaller random sample is selected from the data than was used
+for the DBSCAN example.</p>
+<p>We&#8217;ll see that sampling itself is a powerful noise reduction tool which helps visualize the cluster density.
+This is because there is a higher probability that samples will be drawn from higher density clusters and a lower
+probability that samples will be drawn from lower density clusters.</p>
+<p>In this example the <code>random</code> function draws a sample of 1500 records from the <code>nyc311</code> (complaints database) collection where
+the complaint description matches "rat sighting" and latitude is populated in the record. The latitude and longitude fields
+are then vectorized and added as rows to a matrix. The matrix is transposed so each row contains a single latitude, longitude
+point. The <code>kmeans</code> function is then used to cluster the latitude and longitude points into 21 clusters.
+Finally, the <code>zplot</code> function is used to visualize the clusters as a scatter plot.</p>
+<div class="imageblock"><img src="images/math-expressions/2DCluster1.png" alt="2DCluster1"></div>
+<p>The scatter plot above shows each lat/lon point plotted on a Euclidean plain with longitude on the
+x-axis and
+latitude on the y-axis. The plot is dense enough so the outlines of the different boroughs are visible
+if you know the boroughs of New York City.</p>
+<p>Each cluster is shown in a different color. This plot provides interesting
+insight into the densities of rat sightings throughout the five boroughs of New York City. For
+example it highlights a cluster of dense sightings in Brooklyn at cluster1
+surrounded by less dense but still high activity clusters.</p></section>
+<section class="sect2"><h3 id="plotting-the-centroids">Plotting the Centroids</h3><p>The centroids of each cluster can then be plotted on a map to visualize the center of the
+clusters. In the example below the centroids are extracted from the clusters using the <code>getCentroids</code>
+function, which returns a matrix of the centroids.</p>
+<p>The centroids matrix contains 2D lat/lon points. The <code>colAt</code> function can then be used
+to extract the latitude and longitude columns by index from the matrix so they can be
+plotted with <code>zplot</code>. A map visualization is used below to display the centroids.</p>
+<div class="imageblock"><img src="images/math-expressions/centroidplot.png" alt="centroidplot"></div>
+<p>The map can then be zoomed to get a closer look at the centroids in the high density areas shown
+in the cluster scatter plot.</p>
+<div class="imageblock"><img src="images/math-expressions/centroidzoom.png" alt="centroidzoom"></div></section>
+<section class="sect2"><h3 id="phrase-extraction">Phrase Extraction</h3><p>K-means clustering produces centroids or <strong>prototype</strong> vectors which can be used to represent
+each cluster. In this example the key features of the centroids are extracted
+to represent the key phrases for clusters of TF-IDF term vectors.</p>
+<div class="admonitionblock note">
+<table>
+<tr>
+<td class="icon">
+<i class="fa icon-note" title="Note"></i>
+</td>
+<td class="content">
+The example below works with TF-IDF <em>term vectors</em>.
 The section <a href="term-vectors.html#term-vectors">Text Analysis and Term Vectors</a> offers
-a full explanation of these features.</p>
-<section class="sect2"><h3 id="centroid-features">Centroid Features</h3><p>In the example below the <code>kmeans</code> function is used to cluster a result set from the Enron email data-set
-and then the top features are extracted from the cluster centroids.</p>
-<div class="listingblock"><pre class="rouge highlight"><code class="language-text" data-lang="text"><pre class="highlight"><code>let(a=select(random(enron, q="body:oil", rows="500", fl="id, body"), <i class="conum" data-value="1"></i>
-                    id,
-                    analyze(body, body_bigram) as terms),
-    b=termVectors(a, maxDocFreq=.10, minDocFreq=.05, minTermLength=14, exclude="_,copyright"),<i class="conum" data-value="2"></i>
-    c=kmeans(b, 5), <i class="conum" data-value="3"></i>
-    d=getCentroids(c), <i class="conum" data-value="4"></i>
-    e=topFeatures(d, 5)) <i class="conum" data-value="5"></i>
-</code></pre></code></pre></div>
-<p>Let&#8217;s look at what data is assigned to each variable:</p>
-<div class="colist arabic"><table><tr><td><i class="conum" data-value="1"></i><b>1</b></td><td><strong><code>a</code></strong>: The <code>random</code> function returns a sample of 500 documents from the "enron"
-collection that match the query "body:oil". The <code>select</code> function selects the <code>id</code> and
-and annotates each tuple with the analyzed bigram terms from the <code>body</code> field.</td></tr><tr><td><i class="conum" data-value="2"></i><b>2</b></td><td><strong><code>b</code></strong>: The <code>termVectors</code> function creates a TF-IDF term vector matrix from the
-tuples stored in variable <strong><code>a</code></strong>. Each row in the matrix represents a document. The columns of the matrix
-are the bigram terms that were attached to each tuple.</td></tr><tr><td><i class="conum" data-value="3"></i><b>3</b></td><td><strong><code>c</code></strong>: The <code>kmeans</code> function clusters the rows of the matrix into 5 clusters. The k-means clustering is performed using the Euclidean distance measure.</td></tr><tr><td><i class="conum" data-value="4"></i><b>4</b></td><td><strong><code>d</code></strong>: The <code>getCentroids</code> function returns a matrix of cluster centroids. Each row in the matrix is a centroid
-from one of the 5 clusters. The columns of the matrix are the same bigrams terms of the term vector matrix.</td></tr><tr><td><i class="conum" data-value="5"></i><b>5</b></td><td><strong><code>e</code></strong>: The <code>topFeatures</code> function returns the column labels for the top 5 features of each centroid in the matrix.
-This returns the top 5 bigram terms for each centroid.</td></tr></table></div>
+a full explanation of this features.
+</td>
+</tr>
+</table>
+</div>
+<p>In the example the <code>search</code> function returns documents where the <code>review_t</code> field matches the phrase "star wars".
+The <code>select</code> function is run over the result set and applies the <code>analyze</code> function
+which uses the Lucene/Solr analyzer attached to the schema field <code>text_bigrams</code> to re-analyze the <code>review_t</code>
+field. This analyzer returns bigrams which are then annotated to documents in a field called <code>terms</code>.</p>
+<p>The <code>termVectors</code> function then creates TD-IDF term vectors from the bigrams stored in the <code>terms</code> field.
+The <code>kmeans</code> function is then used to cluster the bigram term vectors into 5 clusters.
+Finally the top 5 features are extracted from the centroids and returned.
+Notice that the features are all bigram phrases with semantic significance.</p>
+<div class="listingblock"><pre class="rouge highlight"><code class="language-text" data-lang="text"><pre class="highlight"><code>let(a=select(search(reviews, q="review_t:\"star wars\"", rows="500"),
+             id,
+             analyze(review_t, text_bigrams) as terms),
+    vectors=termVectors(a, maxDocFreq=.10, minDocFreq=.03, minTermLength=13, exclude="_,br,have"),
+    clusters=kmeans(vectors, 5),
+    centroids=getCentroids(clusters),
+    phrases=topFeatures(centroids, 5))</code></pre></code></pre></div>
+<p>When this expression is sent to the <code>/stream</code> handler it responds with:</p>
+<div class="listingblock"><pre class="rouge highlight"><code class="language-text" data-lang="text"><pre class="highlight"><code>{
+  "result-set": {
+    "docs": [
+      {
+        "phrases": [
+          [
+            "empire strikes",
+            "rebel alliance",
+            "princess leia",
+            "luke skywalker",
+            "phantom menace"
+          ],
+          [
+            "original star",
+            "main characters",
+            "production values",
+            "anakin skywalker",
+            "luke skywalker"
+          ],
+          [
+            "carrie fisher",
+            "original films",
+            "harrison ford",
+            "luke skywalker",
+            "ian mcdiarmid"
+          ],
+          [
+            "phantom menace",
+            "original trilogy",
+            "harrison ford",
+            "john williams",
+            "empire strikes"
+          ],
+          [
+            "science fiction",
+            "fiction films",
+            "forbidden planet",
+            "character development",
+            "worth watching"
+          ]
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 46
+      }
+    ]
+  }
+}</code></pre></code></pre></div></section></section>
+<section class="sect1"><h2 id="multi-k-means-clustering">Multi K-Means Clustering</h2><p>K-means clustering will produce different outcomes depending on
+the initial placement of the centroids. K-means is fast enough
+that multiple trials can be performed so that the best outcome can be selected.</p>
+<p>The <code>multiKmeans</code> function runs the k-means clustering algorithm for a given number of trials and selects the
+best result based on which trial produces the lowest intra-cluster variance.</p>
+<p>The example below is identical to the phrase extraction example except that it uses <code>multiKmeans</code> with 15 trials,
+rather than a single trial of the <code>kmeans</code> function.</p>
+<div class="listingblock"><pre class="rouge highlight"><code class="language-text" data-lang="text"><pre class="highlight"><code>let(a=select(search(reviews, q="review_t:\"star wars\"", rows="500"),
+             id,
+             analyze(review_t, text_bigrams) as terms),
+    vectors=termVectors(a, maxDocFreq=.10, minDocFreq=.03, minTermLength=13, exclude="_,br,have"),
+    clusters=multiKmeans(vectors, 5, 15),
+    centroids=getCentroids(clusters),
+    phrases=topFeatures(centroids, 5))</code></pre></code></pre></div>
 <p>This expression returns the following response:</p>
 <div class="listingblock"><pre class="rouge highlight"><code class="language-json" data-lang="json"><pre class="highlight"><code><span style="background-color: #f8f8f8">{</span><span style="color: #bbbbbb">
   </span><span style="color: #000080">"result-set"</span><span style="background-color: #f8f8f8">:</span><span style="color: #bbbbbb"> </span><span style="background-color: #f8f8f8">{</span><span style="color: #bbbbbb">
     </span><span style="color: #000080">"docs"</span><span style="background-color: #f8f8f8">:</span><span style="color: #bbbbbb"> </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
       </span><span style="background-color: #f8f8f8">{</span><span style="color: #bbbbbb">
-        </span><span style="color: #000080">"e"</span><span style="background-color: #f8f8f8">:</span><span style="color: #bbbbbb"> </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
+        </span><span style="color: #000080">"phrases"</span><span style="background-color: #f8f8f8">:</span><span style="color: #bbbbbb"> </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
           </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"enron enronxgate"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"north american"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"energy services"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"conference call"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"power generation"</span><span style="color: #bbbbbb">
+            </span><span style="color: #d14">"science fiction"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
+            </span><span style="color: #d14">"original star"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
+            </span><span style="color: #d14">"production values"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
+            </span><span style="color: #d14">"fiction films"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
+            </span><span style="color: #d14">"forbidden planet"</span><span style="color: #bbbbbb">
           </span><span style="background-color: #f8f8f8">],</span><span style="color: #bbbbbb">
           </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"financial times"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"chief financial"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"financial officer"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"exchange commission"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"houston chronicle"</span><span style="color: #bbbbbb">
+            </span><span style="color: #d14">"empire strikes"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
+            </span><span style="color: #d14">"princess leia"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
+            </span><span style="color: #d14">"luke skywalker"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
+            </span><span style="color: #d14">"phantom menace"</span><span style="color: #bbbbbb">
           </span><span style="background-color: #f8f8f8">],</span><span style="color: #bbbbbb">
           </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"southern california"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"california edison"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"public utilities"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"utilities commission"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"rate increases"</span><span style="color: #bbbbbb">
+            </span><span style="color: #d14">"carrie fisher"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
+            </span><span style="color: #d14">"harrison ford"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
+            </span><span style="color: #d14">"luke skywalker"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
+            </span><span style="color: #d14">"empire strikes"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
+            </span><span style="color: #d14">"original films"</span><span style="color: #bbbbbb">
           </span><span style="background-color: #f8f8f8">],</span><span style="color: #bbbbbb">
           </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"rolling blackouts"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"public utilities"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"electricity prices"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"federal energy"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"price controls"</span><span style="color: #bbbbbb">
+            </span><span style="color: #d14">"phantom menace"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
+            </span><span style="color: #d14">"original trilogy"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
+            </span><span style="color: #d14">"harrison ford"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
+            </span><span style="color: #d14">"character development"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
+            </span><span style="color: #d14">"john williams"</span><span style="color: #bbbbbb">
           </span><span style="background-color: #f8f8f8">],</span><span style="color: #bbbbbb">
           </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"california edison"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"regulatory commission"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"southern california"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"federal energy"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"power generators"</span><span style="color: #bbbbbb">
+            </span><span style="color: #d14">"rebel alliance"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
+            </span><span style="color: #d14">"empire strikes"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
+            </span><span style="color: #d14">"princess leia"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
+            </span><span style="color: #d14">"original trilogy"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
+            </span><span style="color: #d14">"luke skywalker"</span><span style="color: #bbbbbb">
           </span><span style="background-color: #f8f8f8">]</span><span style="color: #bbbbbb">
         </span><span style="background-color: #f8f8f8">]</span><span style="color: #bbbbbb">
       </span><span style="background-color: #f8f8f8">},</span><span style="color: #bbbbbb">
       </span><span style="background-color: #f8f8f8">{</span><span style="color: #bbbbbb">
         </span><span style="color: #000080">"EOF"</span><span style="background-color: #f8f8f8">:</span><span style="color: #bbbbbb"> </span><span style="color: #000000;font-weight: bold">true</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-        </span><span style="color: #000080">"RESPONSE_TIME"</span><span style="background-color: #f8f8f8">:</span><span style="color: #bbbbbb"> </span><span style="color: #009999">982</span><span style="color: #bbbbbb">
+        </span><span style="color: #000080">"RESPONSE_TIME"</span><span style="background-color: #f8f8f8">:</span><span style="color: #bbbbbb"> </span><span style="color: #009999">84</span><span style="color: #bbbbbb">
       </span><span style="background-color: #f8f8f8">}</span><span style="color: #bbbbbb">
     </span><span style="background-color: #f8f8f8">]</span><span style="color: #bbbbbb">
   </span><span style="background-color: #f8f8f8">}</span><span style="color: #bbbbbb">
 </span><span style="background-color: #f8f8f8">}</span></code></pre></code></pre></div></section>
-<section class="sect2"><h3 id="cluster-features">Cluster Features</h3><p>The example below examines the top features of a specific cluster. This example uses the same techniques
-as the centroids example but the top features are extracted from a cluster rather than the centroids.</p>
-<div class="listingblock"><pre class="rouge highlight"><code class="language-text" data-lang="text"><pre class="highlight"><code>let(a=select(random(collection3, q="body:oil", rows="500", fl="id, body"),
-                    id,
-                    analyze(body, body_bigram) as terms),
-    b=termVectors(a, maxDocFreq=.09, minDocFreq=.03, minTermLength=14, exclude="_,copyright"),
-    c=kmeans(b, 25),
-    d=getCluster(c, 0), <i class="conum" data-value="1"></i>
-    e=topFeatures(d, 4)) <i class="conum" data-value="2"></i>
-</code></pre></code></pre></div>
-<div class="colist arabic"><table><tr><td><i class="conum" data-value="1"></i><b>1</b></td><td>The <code>getCluster</code> function returns a cluster by its index. Each cluster is a matrix containing term vectors
-that have been clustered together based on their features.</td></tr><tr><td><i class="conum" data-value="2"></i><b>2</b></td><td>The <code>topFeatures</code> function is used to extract the top 4 features from each term vector
-in the cluster.</td></tr></table></div>
-<p>This expression returns the following response:</p>
+<section class="sect1"><h2 id="fuzzy-k-means-clustering">Fuzzy K-Means Clustering</h2><p>The <code>fuzzyKmeans</code> function is a soft clustering algorithm which
+allows vectors to be assigned to more then one cluster. The <code>fuzziness</code> parameter
+is a value between <code>1</code> and <code>2</code> that determines how fuzzy to make the cluster assignment.</p>
+<p>After the clustering has been performed the <code>getMembershipMatrix</code> function can be called
+on the clustering result to return a matrix describing the probabilities
+of cluster membership for each vector.
+This matrix can be used to understand relationships between clusters.</p>
+<p>In the example below <code>fuzzyKmeans</code> is used to cluster the movie reviews matching the phrase "star wars".
+But instead of looking at the clusters or centroids, the <code>getMembershipMatrix</code> is used to return the
+membership probabilities for each document. The membership matrix is comprised of a row for each
+vector that was clustered. There is a column in the matrix for each cluster.
+The values in the matrix contain the probability that a specific vector belongs to a specific cluster.</p>
+<p>In the example the <code>distance</code> function is then used to create a <strong>distance matrix</strong> from the columns of the
+membership matrix. The distance matrix is then visualized with the <code>zplot</code> function as a heat map.</p>
+<p>In the example <code>cluster1</code> and <code>cluster5</code> have the shortest distance between the clusters.
+Further analysis of the features in both clusters can be performed to understand
+the relationship between <code>cluster1</code> and <code>cluster5</code>.</p>
+<div class="imageblock"><img src="images/math-expressions/fuzzyk.png" alt="fuzzyk"></div>
+<div class="admonitionblock note">
+<table>
+<tr>
+<td class="icon">
+<i class="fa icon-note" title="Note"></i>
+</td>
+<td class="content">
+The heat map has been configured to increase in color intensity as the distance shortens.
+</td>
+</tr>
+</table>
+</div></section>
+<section class="sect1"><h2 id="feature-scaling">Feature Scaling</h2><p>Before performing machine learning operations its often necessary to
+scale the feature vectors so they can be compared at the same scale.</p>
+<p>All the scaling functions below operate on vectors and matrices.
+When operating on a matrix the rows of the matrix are scaled.</p>
+<section class="sect2"><h3 id="minmax-scaling">Min/Max Scaling</h3><p>The <code>minMaxScale</code> function scales a vector or matrix between a minimum and maximum value.
+By default it will scale between <code>0</code> and <code>1</code> if min/max values are not provided.</p>
+<p>Below is a plot of a sine wave, with an amplitude of 1, before and
+after it has been scaled between -5 and 5.</p>
+<div class="imageblock"><img src="images/math-expressions/minmaxscale.png" alt="minmaxscale"></div>
+<p>Below is a simple example of min/max scaling of a matrix between 0 and 1.
+Notice that once brought into the same scale the vectors are the same.</p>
+<div class="listingblock"><pre class="rouge highlight"><code class="language-text" data-lang="text"><pre class="highlight"><code>let(a=array(20, 30, 40, 50),
+    b=array(200, 300, 400, 500),
+    c=matrix(a, b),
+    d=minMaxScale(c))</code></pre></code></pre></div>
+<p>When this expression is sent to the <code>/stream</code> handler it responds with:</p>
 <div class="listingblock"><pre class="rouge highlight"><code class="language-json" data-lang="json"><pre class="highlight"><code><span style="background-color: #f8f8f8">{</span><span style="color: #bbbbbb">
   </span><span style="color: #000080">"result-set"</span><span style="background-color: #f8f8f8">:</span><span style="color: #bbbbbb"> </span><span style="background-color: #f8f8f8">{</span><span style="color: #bbbbbb">
     </span><span style="color: #000080">"docs"</span><span style="background-color: #f8f8f8">:</span><span style="color: #bbbbbb"> </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
       </span><span style="background-color: #f8f8f8">{</span><span style="color: #bbbbbb">
-        </span><span style="color: #000080">"e"</span><span style="background-color: #f8f8f8">:</span><span style="color: #bbbbbb"> </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
-          </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"electricity board"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"maharashtra state"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"power purchase"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"state electricity"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"reserved enron"</span><span style="color: #bbbbbb">
-          </span><span style="background-color: #f8f8f8">],</span><span style="color: #bbbbbb">
-          </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"electricity board"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"maharashtra state"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"state electricity"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"purchase agreement"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"independent power"</span><span style="color: #bbbbbb">
-          </span><span style="background-color: #f8f8f8">],</span><span style="color: #bbbbbb">
-          </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"maharashtra state"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"reserved enron"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"federal government"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"state government"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"dabhol project"</span><span style="color: #bbbbbb">
-          </span><span style="background-color: #f8f8f8">],</span><span style="color: #bbbbbb">
-          </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"purchase agreement"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"power purchase"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"electricity board"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"maharashtra state"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"state government"</span><span style="color: #bbbbbb">
-          </span><span style="background-color: #f8f8f8">],</span><span style="color: #bbbbbb">
-          </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"investment grade"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"portland general"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"general electric"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"holding company"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"transmission lines"</span><span style="color: #bbbbbb">
-          </span><span style="background-color: #f8f8f8">],</span><span style="color: #bbbbbb">
-          </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"state government"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"state electricity"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"purchase agreement"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"electricity board"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"maharashtra state"</span><span style="color: #bbbbbb">
-          </span><span style="background-color: #f8f8f8">],</span><span style="color: #bbbbbb">
+        </span><span style="color: #000080">"d"</span><span style="background-color: #f8f8f8">:</span><span style="color: #bbbbbb"> </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
           </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"electricity board"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"state electricity"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"energy management"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"maharashtra state"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"energy markets"</span><span style="color: #bbbbbb">
+            </span><span style="color: #009999">0</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
+            </span><span style="color: #009999">0.3333333333333333</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
+            </span><span style="color: #009999">0.6666666666666666</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
+            </span><span style="color: #009999">1</span><span style="color: #bbbbbb">
           </span><span style="background-color: #f8f8f8">],</span><span style="color: #bbbbbb">
           </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"electricity board"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"maharashtra state"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"state electricity"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"state government"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"second quarter"</span><span style="color: #bbbbbb">
+            </span><span style="color: #009999">0</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
+            </span><span style="color: #009999">0.3333333333333333</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
+            </span><span style="color: #009999">0.6666666666666666</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
+            </span><span style="color: #009999">1</span><span style="color: #bbbbbb">
           </span><span style="background-color: #f8f8f8">]</span><span style="color: #bbbbbb">
         </span><span style="background-color: #f8f8f8">]</span><span style="color: #bbbbbb">
       </span><span style="background-color: #f8f8f8">},</span><span style="color: #bbbbbb">
       </span><span style="background-color: #f8f8f8">{</span><span style="color: #bbbbbb">
         </span><span style="color: #000080">"EOF"</span><span style="background-color: #f8f8f8">:</span><span style="color: #bbbbbb"> </span><span style="color: #000000;font-weight: bold">true</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-        </span><span style="color: #000080">"RESPONSE_TIME"</span><span style="background-color: #f8f8f8">:</span><span style="color: #bbbbbb"> </span><span style="color: #009999">978</span><span style="color: #bbbbbb">
+        </span><span style="color: #000080">"RESPONSE_TIME"</span><span style="background-color: #f8f8f8">:</span><span style="color: #bbbbbb"> </span><span style="color: #009999">0</span><span style="color: #bbbbbb">
       </span><span style="background-color: #f8f8f8">}</span><span style="color: #bbbbbb">
     </span><span style="background-color: #f8f8f8">]</span><span style="color: #bbbbbb">
   </span><span style="background-color: #f8f8f8">}</span><span style="color: #bbbbbb">
-</span><span style="background-color: #f8f8f8">}</span></code></pre></code></pre></div></section></section>
-<section class="sect1"><h2 id="multi-k-means-clustering">Multi K-Means Clustering</h2><p>K-means clustering will produce different results depending on
-the initial placement of the centroids. K-means is fast enough
-that multiple trials can be performed and the best outcome selected.</p>
-<p>The <code>multiKmeans</code> function runs the k-means clustering algorithm for a given number of trials and selects the
-best result based on which trial produces the lowest intra-cluster variance.</p>
-<p>The example below is identical to centroids example except that it uses <code>multiKmeans</code> with 100 trials,
-rather than a single trial of the <code>kmeans</code> function.</p>
-<div class="listingblock"><pre class="rouge highlight"><code class="language-text" data-lang="text"><pre class="highlight"><code>let(a=select(random(collection3, q="body:oil", rows="500", fl="id, body"),
-                    id,
-                    analyze(body, body_bigram) as terms),
-    b=termVectors(a, maxDocFreq=.09, minDocFreq=.03, minTermLength=14, exclude="_,copyright"),
-    c=multiKmeans(b, 5, 100),
-    d=getCentroids(c),
-    e=topFeatures(d, 5))</code></pre></code></pre></div>
-<p>This expression returns the following response:</p>
+</span><span style="background-color: #f8f8f8">}</span></code></pre></code></pre></div></section>
+<section class="sect2"><h3 id="standardization">Standardization</h3><p>The <code>standardize</code> function scales a vector so that it has a
+mean of 0 and a standard deviation of 1.</p>
+<p>Below is a plot of a sine wave, with an amplitude of 1, before and
+after it has been standardized.</p>
+<div class="imageblock"><img src="images/math-expressions/standardize.png" alt="standardize"></div>
+<p>Below is a simple example of of a standardized matrix.
+Notice that once brought into the same scale the vectors are the same.</p>
+<div class="listingblock"><pre class="rouge highlight"><code class="language-text" data-lang="text"><pre class="highlight"><code>let(a=array(20, 30, 40, 50),
+    b=array(200, 300, 400, 500),
+    c=matrix(a, b),
+    d=standardize(c))</code></pre></code></pre></div>
+<p>When this expression is sent to the <code>/stream</code> handler it responds with:</p>
 <div class="listingblock"><pre class="rouge highlight"><code class="language-json" data-lang="json"><pre class="highlight"><code><span style="background-color: #f8f8f8">{</span><span style="color: #bbbbbb">
   </span><span style="color: #000080">"result-set"</span><span style="background-color: #f8f8f8">:</span><span style="color: #bbbbbb"> </span><span style="background-color: #f8f8f8">{</span><span style="color: #bbbbbb">
     </span><span style="color: #000080">"docs"</span><span style="background-color: #f8f8f8">:</span><span style="color: #bbbbbb"> </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
       </span><span style="background-color: #f8f8f8">{</span><span style="color: #bbbbbb">
-        </span><span style="color: #000080">"e"</span><span style="background-color: #f8f8f8">:</span><span style="color: #bbbbbb"> </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
-          </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"enron enronxgate"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"energy trading"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"energy markets"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"energy services"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"unleaded gasoline"</span><span style="color: #bbbbbb">
-          </span><span style="background-color: #f8f8f8">],</span><span style="color: #bbbbbb">
-          </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"maharashtra state"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"electricity board"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"state electricity"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"energy trading"</span><span style="background-color: #f8f8f8">,</span><span style="color: #bbbbbb">
-            </span><span style="color: #d14">"chief financial"</span><span style="color: #bbbbbb">
-          </span><span style="background-color: #f8f8f8">],</span><span style="color: #bbbbbb">
-          </span><span style="background-color: #f8f8f8">[</span><span style="color: #bbbbbb">

[... 307 lines stripped ...]