You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2012/02/11 11:22:31 UTC

svn commit: r1243022 [23/38] - in /mahout/site/new_website: ./ MAHOUT/ MAHOUT/2010/ MAHOUT/2010/09/ MAHOUT/2010/09/14/ MAHOUT/2011/ MAHOUT/2011/10/ MAHOUT/2011/10/21/ MAHOUT/books-tutorials-and-talks.data/ MAHOUT/books-tutorials-talks.data/ MAHOUT/book...

Added: mahout/site/new_website/MAHOUT/random-forests.html
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/random-forests.html?rev=1243022&view=auto
==============================================================================
--- mahout/site/new_website/MAHOUT/random-forests.html (added)
+++ mahout/site/new_website/MAHOUT/random-forests.html Sat Feb 11 10:22:15 2012
@@ -0,0 +1,282 @@
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<HTML>
+  <HEAD>
+    <LINK type="text/css" rel="stylesheet" href="resources/space.css">
+    <STYLE type="text/css">
+      .footer {
+        background-image:      url('https://cwiki.apache.org/confluence/images/border/border_bottom.gif');
+        background-repeat:     repeat-x;
+        background-position:   left top;
+        padding-top:           4px;
+        color:                 #666;
+      }
+    </STYLE>
+    <SCRIPT type="text/javascript" language="javascript">
+      var hide = null;
+      var show = null;
+      var children = null;
+
+      function init() {
+        /* Search form initialization */
+        var form = document.forms['search'];
+        if (form != null) {
+          form.elements['domains'].value = location.hostname;
+          form.elements['sitesearch'].value = location.hostname;
+        }
+
+        /* Children initialization */
+        hide = document.getElementById('hide');
+        show = document.getElementById('show');
+        children = document.all != null ?
+                   document.all['children'] :
+                   document.getElementById('children');
+        if (children != null) {
+          children.style.display = 'none';
+          show.style.display = 'inline';
+          hide.style.display = 'none';
+        }
+      }
+
+      function showChildren() {
+        children.style.display = 'block';
+        show.style.display = 'none';
+        hide.style.display = 'inline';
+      }
+
+      function hideChildren() {
+        children.style.display = 'none';
+        show.style.display = 'inline';
+        hide.style.display = 'none';
+      }
+    </SCRIPT>
+    <TITLE>Random Forests</TITLE>
+  <META http-equiv="Content-Type" content="text/html;charset=UTF-8"></HEAD>
+  <BODY onload="init()">
+    <TABLE border="0" cellpadding="2" cellspacing="0" width="100%">
+      <TR class="topBar">
+        <TD align="left" valign="middle" class="topBarDiv" align="left" nowrap="">
+          &nbsp;<A href="mahout-wiki.html" title="Apache Mahout">Apache Mahout</A>&nbsp;&gt;&nbsp;<A href="mahout-wiki.html" title="Mahout Wiki">Mahout Wiki</A>&nbsp;&gt;&nbsp;<A href="algorithms.html" title="Algorithms">Algorithms</A>&nbsp;&gt;&nbsp;<A href="" title="Random Forests">Random Forests</A>
+        </TD>
+        <TD align="right" valign="middle" nowrap="">
+          <FORM name="search" action="http://www.google.com/search" method="get">
+            <INPUT type="hidden" name="ie" value="UTF-8">
+            <INPUT type="hidden" name="oe" value="UTF-8">
+            <INPUT type="hidden" name="domains" value="">
+            <INPUT type="hidden" name="sitesearch" value="">
+            <INPUT type="text" name="q" maxlength="255" value="">        
+            <INPUT type="submit" name="btnG" value="Google Search">
+          </FORM>
+        </TD>
+      </TR> 
+    </TABLE>
+
+    <DIV id="PageContent">
+      <DIV class="pageheader" style="padding: 6px 0px 0px 0px;">
+        <!-- We'll enable this once we figure out how to access (and save) the logo resource -->
+        <!--img src="/wiki/images/confluence_logo.gif" style="float: left; margin: 4px 4px 4px 10px;" border="0"-->
+        <DIV style="margin: 0px 10px 0px 10px" class="smalltext">Apache Mahout</DIV>
+        <DIV style="margin: 0px 10px 8px 10px" class="pagetitle">Random Forests</DIV>
+
+        <DIV class="greynavbar" align="right" style="padding: 2px 10px; margin: 0px;">
+          <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=112565">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/notep_16.gif" height="16" width="16" border="0" align="absmiddle" title="Edit Page"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=112565">Edit Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/browse_space.gif" height="16" width="16" border="0" align="absmiddle" title="Browse Space"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">Browse Space</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=112565">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_page_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add Page"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=112565">Add Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=112565">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_blogentry_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add News"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=112565">Add News</A>
+        </DIV>
+      </DIV>
+      <DIV class="pagesubheading" style="margin: 0px 10px 0px 10px;">
+        #editReport()
+      </DIV>
+
+      <DIV class="pagecontent">
+        <DIV class="wiki-content">
+          <H3><A name="RandomForests-HowtogrowaDecisionTree"></A>How to grow a Decision Tree</H3>
+
+<P>source : [3]</P>
+
+<P>LearnUnprunedTree(<B>X</B>,<B>Y</B>)</P>
+
+<P>Input: <B>X</B> a matrix of <B>R</B> rows and <B>M</B> columns where <B>X</B><B><SUB>ij</SUB></B> = the value of the <B>j</B>'th attribute in the <B>i</B>'th input datapoint. Each column consists of either all real values or all categorical values.<BR>
+Input: <B>Y</B> a vector of <B>R</B> elements, where <B>Y</B><B><SUB>i</SUB></B> = the output class of the <B>i</B>'th datapoint. The <B>Y</B><B><SUB>i</SUB></B> values are categorical.<BR>
+Output: An Unpruned decision tree</P>
+
+
+<P>If all records in <B>X</B> have identical values in all their attributes (this includes the case where <B>R&lt;2</B>), return a Leaf Node predicting the majority output, breaking ties randomly. This case also includes<BR>
+If all values in <B>Y</B> are the same, return a Leaf Node predicting this value as the output<BR>
+Else<BR>
+&nbsp;&nbsp;&nbsp; select <B>m</B> variables at random out of the <B>M</B> variables<BR>
+&nbsp;&nbsp;&nbsp; For <B>j</B> = 1 .. <B>m</B><BR>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; If <B>j</B>'th attribute is categorical<BR>
+<B>&nbsp; &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; IG</B><B><SUB>j</SUB></B> = IG(<B>Y</B>&#124;<B>X</B><B><SUB>j</SUB></B>) (see Information Gain)&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;<BR>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; Else (<B>j</B>'th attribute is real-valued)<BR>
+<B>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; IG</B><B><SUB>j</SUB></B> = IG*(<B>Y</B>&#124;<B>X</B><B><SUB>j</SUB></B>) (see Information Gain)<BR>
+&nbsp;&nbsp;&nbsp; Let <B>j&#42;</B> = argmax<SUB>j</SUB> <B>IG</B><B><SUB>j</SUB></B> (this is the splitting attribute we'll use)<BR>
+&nbsp;&nbsp;&nbsp; If <B>j&#42;</B> is categorical then<BR>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; For each value <B>v</B> of the <B>j</B>'th attribute<BR>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; Let <B>X</B><B><SUP>v</SUP></B> = subset of rows of <B>X</B> in which <B>X</B><B><SUB>ij</SUB></B> = <B>v</B>. Let <B>Y</B><B><SUP>v</SUP></B> = corresponding subset of <B>Y</B><BR>
+&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; Let <B>Child</B><B><SUP>v</SUP></B> = LearnUnprunedTree(<B>X</B><B><SUP>v</SUP></B>,<B>Y</B><B><SUP>v</SUP></B>)<BR>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; Return a decision tree node, splitting on <B>j</B>'th attribute. The number of children equals the number of values of the <B>j</B>'th attribute, and the <B>v</B>'th child is <B>Child</B><B><SUP>v</SUP></B><BR>
+&nbsp;&nbsp;&nbsp; Else <B>j&#42;</B> is real-valued and let <B>t</B> be the best split threshold<BR>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; Let <B>X</B><B><SUP>LO</SUP></B> = subset of rows of <B>X</B> in which <B>X</B><B><SUB>ij</SUB></B> <B>&lt;= t</B>. Let <B>Y</B><B><SUP>LO</SUP></B> = corresponding subset of <B>Y</B><BR>
+&nbsp; &nbsp; &nbsp; &nbsp; Let <B>Child</B><B><SUP>LO</SUP></B> = LearnUnprunedTree(<B>X</B><B><SUP>LO</SUP></B>,<B>Y</B><B><SUP>LO</SUP></B>)<BR>
+&nbsp; &nbsp; &nbsp; &nbsp; Let <B>X</B><B><SUP>HI</SUP></B> = subset of rows of <B>X</B> in which <B>X</B><B><SUB>ij</SUB></B> <B>&gt; t</B>. Let <B>Y</B><B><SUP>HI</SUP></B> = corresponding subset of <B>Y</B><BR>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; Let <B>Child</B><B><SUP>HI</SUP></B> = LearnUnprunedTree(<B>X</B><B><SUP>HI</SUP></B>,<B>Y</B><B><SUP>HI</SUP></B>)<BR>
+&nbsp; &nbsp; &nbsp; &nbsp; Return a decision tree node, splitting on <B>j</B>'th attribute. It has two children corresponding to whether the <B>j</B>'th attribute is above or below the given threshold.</P>
+
+<P><B>Note</B>: There are alternatives to Information Gain for splitting nodes<BR>
+&nbsp;</P>
+
+<H3><A name="RandomForests-Informationgain"></A>Information gain</H3>
+
+<P>source : [3]</P>
+<OL>
+	<LI><H4><A name="RandomForests-nominalattributes"></A>nominal attributes</H4></LI>
+</OL>
+
+
+<P>suppose X can have one of m values V<SUB>1</SUB>,V<SUB>2</SUB>,...,V<SUB>m</SUB><BR>
+P(X=V<SUB>1</SUB>)=p<SUB>1</SUB>, P(X=V<SUB>2</SUB>)=p<SUB>2</SUB>,...,P(X=V<SUB>m</SUB>)=p<SUB>m</SUB><BR>
+&nbsp;<BR>
+H(X)= &#45;sum<SUB>j=1</SUB><SUP>m</SUP> p<SUB>j</SUB> log<SUB>2</SUB> p<SUB>j</SUB> (The entropy of X)<BR>
+H(Y&#124;X=v) = the entropy of Y among only those records in which X has value v<BR>
+H(Y&#124;X) = sum<SUB>j</SUB> p<SUB>j</SUB> H(Y&#124;X=v<SUB>j</SUB>)<BR>
+IG(Y&#124;X) = H(Y) - H(Y&#124;X)</P>
+<OL>
+	<LI><H4><A name="RandomForests-realvaluedattributes"></A>real-valued attributes</H4></LI>
+</OL>
+
+
+<P>suppose X is real valued<BR>
+define IG(Y&#124;X:t) as H(Y) - H(Y&#124;X:t)<BR>
+define H(Y&#124;X:t) = H(Y&#124;X&lt;t) P(X&lt;t) + H(Y&#124;X&gt;=t) P(X&gt;=t)<BR>
+define IG*(Y&#124;X) = max<SUB>t</SUB> IG(Y&#124;X:t)</P>
+
+<H3><A name="RandomForests-HowtogrowaRandomForest"></A>How to grow a Random Forest</H3>
+
+<P>source : [1]</P>
+
+<P>Each tree is grown as follows:</P>
+<OL>
+	<LI>if the number of cases in the training set is <B>N</B>, sample <B>N</B> cases at random &#45;but with replacement, from the original data. This sample will be the training set for the growing tree.</LI>
+	<LI>if there are <B>M</B> input variables, a number <B>m &lt;&lt; M</B> is specified such that at each node, <B>m</B> variables are selected at random out of the <B>M</B> and the best split on these <B>m</B> is used to split the node. The value of <B>m</B> is held constant during the forest growing.</LI>
+	<LI>each tree is grown to its large extent possible. There is no pruning.</LI>
+</OL>
+
+
+<H3><A name="RandomForests-RandomForestparameters"></A>Random Forest parameters</H3>
+
+<P>source : [2]<BR>
+Random Forests are easy to use, the only 2 parameters a user of the technique has to determine are the number of trees to be used and the number of variables (<B>m</B>) to be randomly selected from the available set of variables.<BR>
+Breinman's recommendations are to pick a large number of trees, as well as the square root of the number of variables for <B>m</B>.<BR>
+&nbsp;</P>
+
+<H3><A name="RandomForests-Howtopredictthelabelofacase"></A>How to predict the label of a case</H3>
+
+<P>Classify(<B>node</B>,<B>V</B>)<BR>
+&nbsp;&nbsp;&nbsp; Input: <B>node</B> from the decision tree, if <B>node.attribute = j</B> then the split is done on the <B>j</B>'th attribute</P>
+
+<P>&nbsp;&nbsp; &nbsp;Input: <B>V</B> a vector of <B>M</B> columns where <B>V</B><B><SUB>j</SUB></B> = the value of the <B>j</B>'th attribute.<BR>
+&nbsp;&nbsp;&nbsp; Output: label of <B>V</B></P>
+
+<P>&nbsp;&nbsp;&nbsp; If <B>node</B> is a Leaf then<BR>
+&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp; Return the value predicted by <B>node</B></P>
+
+<P>&nbsp;&nbsp; &nbsp;Else<BR>
+&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; Let <B>j = node.attribute</B><BR>
+&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; If <B>j</B> is categorical then<BR>
+&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; Let <B>v</B> = <B>V</B><B><SUB>j</SUB></B><BR>
+&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; Let <B>child</B><B><SUP>v</SUP></B> = child node corresponding to the attribute's value <B>v</B><BR>
+&nbsp; &nbsp; &nbsp; &nbsp;&nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;&nbsp; Return Classify(<B>child</B><B><SUP>v</SUP></B>,<B>V</B>)</P>
+
+<P>&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; Else <B>j</B> is real-valued<BR>
+&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; Let <B>t = node.threshold</B> (split threshold)<BR>
+&nbsp; &nbsp; &nbsp; &nbsp;&nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;&nbsp; If Vj &lt; t then<BR>
+&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;&nbsp;&nbsp; Let <B>child</B><B><SUP>LO</SUP></B> = child node corresponding to (<B>&lt;t</B>)<BR>
+&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; Return Classify(<B>child</B><B><SUP>LO</SUP></B>,<B>V</B>)<BR>
+&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; Else<BR>
+&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp; &nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;&nbsp; Let <B>child</B><B><SUP>HI</SUP></B> = child node corresponding to (<B>&gt;=t</B>)<BR>
+&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp; &nbsp;&nbsp; Return Classify(<B>child</B><B><SUP>HI</SUP></B>,<B>V</B>)<BR>
+&nbsp;</P>
+
+<H3><A name="RandomForests-Theoutofbag%28oob%29errorestimation"></A>The out of bag (oob) error estimation</H3>
+
+<P>source : [1]</P>
+
+<P>in random forests, there is no need for cross-validation or a separate test set to get an unbiased estimate of the test set error. It is estimated internally, during the run, as follows:</P>
+<UL>
+	<LI>each tree is constructed using a different bootstrap sample from the original data. About one-third of the cases left of the bootstrap sample and not used in the construction of the <EM>kth</EM> tree.</LI>
+	<LI>put each case left out in the construction of the <EM>kth</EM> tree down the <EM>kth</EM>tree to get a classification. In this way, a test set classification is obtained for each case in about one-thrid of the trees. At the end of the run, take <B>j</B> to be the class that got mort of the the votes every time case <B>n</B> was <EM>oob</EM>. The proportion of times that <B>j</B> is not equal to the true class of <B>n</B> averaged over all cases is the <EM>oob error estimate</EM>. This has proven to be unbiased in many tests.</LI>
+</UL>
+
+
+<H3><A name="RandomForests-OtherRFuses"></A>Other RF uses</H3>
+
+<P>source : [1]</P>
+<UL>
+	<LI>variable importance</LI>
+	<LI>gini importance</LI>
+	<LI>proximities</LI>
+	<LI>scaling</LI>
+	<LI>prototypes</LI>
+	<LI>missing values replacement for the training set</LI>
+	<LI>missing values replacement for the test set</LI>
+	<LI>detecting mislabeled cases</LI>
+	<LI>detecting outliers</LI>
+	<LI>detecting novelties</LI>
+	<LI>unsupervised learning</LI>
+	<LI>balancing prediction error<BR>
+Please refer to [1] for a detailed description</LI>
+</UL>
+
+
+<H3><A name="RandomForests-References"></A>References</H3>
+
+<P>[1]&nbsp; Random Forests - Classification Description<BR>
+&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;<A href="http://www.stat.berkeley.edu/~breiman/RandomForests/cc_home.htm" class="external-link" rel="nofollow">http://www.stat.berkeley.edu/~breiman/RandomForests/cc_home.htm</A><BR>
+[2]&nbsp; B. Larivi&egrave;re &amp; D. Van Den Poel, 2004. &quot;Predicting Customer Retention and Profitability by Using Random Forests and Regression Forests Techniques,&quot;<BR>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; Working Papers of Faculty of Economics and Business Administration, Ghent University, Belgium 04/282, Ghent University,<BR>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; Faculty of Economics and Business Administration.<BR>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; Available online : <A href="http://ideas.repec.org/p/rug/rugwps/04-282.html" class="external-link" rel="nofollow">http://ideas.repec.org/p/rug/rugwps/04-282.html</A><BR>
+[3]&nbsp; Decision Trees - Andrew W. Moore[4]<BR>
+&nbsp; &nbsp; &nbsp; &nbsp; <A href="http://www.cs.cmu.edu/~awm/tutorials%5B1" class="external-link" rel="nofollow">http://www.cs.cmu.edu/~awm/tutorials[1\</A>]<BR>
+[4]&nbsp; Information Gain - Andrew W. Moore<BR>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <A href="http://www.cs.cmu.edu/~awm/tutorials" class="external-link" rel="nofollow">http://www.cs.cmu.edu/~awm/tutorials</A></P>
+        </DIV>
+
+        
+      </DIV>
+    </DIV>
+    <DIV class="footer">
+      Generated by
+      <A href="http://www.atlassian.com/confluence/">Atlassian Confluence</A> (Version: 3.2 Build: 1810 Mar 16, 2010)
+      <A href="http://could.it/autoexport/">Auto Export Plugin</A> (Version: 1.0.0-dkulp)
+    </DIV>
+<SCRIPT type="text/javascript">
+
+  var _gaq = _gaq || [];
+  _gaq.push(['_setAccount', 'UA-17359171-1']);
+  _gaq.push(['_setDomainName', 'none']);
+  _gaq.push(['_setAllowLinker', true]);
+  _gaq.push(['_trackPageview']);
+
+  (function() {
+    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+  })();
+
+</SCRIPT>
+  </BODY>
+</HTML>
\ No newline at end of file

Added: mahout/site/new_website/MAHOUT/recommendation-learning.html
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/recommendation-learning.html?rev=1243022&view=auto
==============================================================================
--- mahout/site/new_website/MAHOUT/recommendation-learning.html (added)
+++ mahout/site/new_website/MAHOUT/recommendation-learning.html Sat Feb 11 10:22:15 2012
@@ -0,0 +1,118 @@
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<HTML>
+  <HEAD>
+    <LINK type="text/css" rel="stylesheet" href="resources/space.css">
+    <STYLE type="text/css">
+      .footer {
+        background-image:      url('http://cwiki.apache.org/confluence/images/border/border_bottom.gif');
+        background-repeat:     repeat-x;
+        background-position:   left top;
+        padding-top:           4px;
+        color:                 #666;
+      }
+    </STYLE>
+    <SCRIPT type="text/javascript" language="javascript">
+      var hide = null;
+      var show = null;
+      var children = null;
+
+      function init() {
+        /* Search form initialization */
+        var form = document.forms['search'];
+        if (form != null) {
+          form.elements['domains'].value = location.hostname;
+          form.elements['sitesearch'].value = location.hostname;
+        }
+
+        /* Children initialization */
+        hide = document.getElementById('hide');
+        show = document.getElementById('show');
+        children = document.all != null ?
+                   document.all['children'] :
+                   document.getElementById('children');
+        if (children != null) {
+          children.style.display = 'none';
+          show.style.display = 'inline';
+          hide.style.display = 'none';
+        }
+      }
+
+      function showChildren() {
+        children.style.display = 'block';
+        show.style.display = 'none';
+        hide.style.display = 'inline';
+      }
+
+      function hideChildren() {
+        children.style.display = 'none';
+        show.style.display = 'inline';
+        hide.style.display = 'none';
+      }
+    </SCRIPT>
+    <TITLE>Recommendation Learning</TITLE>
+  <META http-equiv="Content-Type" content="text/html;charset=UTF-8"></HEAD>
+  <BODY onload="init()">
+    <TABLE border="0" cellpadding="2" cellspacing="0" width="100%">
+      <TR class="topBar">
+        <TD align="left" valign="middle" class="topBarDiv" align="left" nowrap="">
+          &nbsp;<A href="index.html" title="Apache Lucene Mahout">Apache Lucene Mahout</A>&nbsp;&gt;&nbsp;<A href="index.html" title="index">index</A>&nbsp;&gt;&nbsp;<A href="algorithms.html" title="Algorithms">Algorithms</A>&nbsp;&gt;&nbsp;<A href="" title="Recommendation Learning">Recommendation Learning</A>
+        </TD>
+        <TD align="right" valign="middle" nowrap="">
+          <FORM name="search" action="http://www.google.com/search" method="get">
+            <INPUT type="hidden" name="ie" value="UTF-8">
+            <INPUT type="hidden" name="oe" value="UTF-8">
+            <INPUT type="hidden" name="domains" value="">
+            <INPUT type="hidden" name="sitesearch" value="">
+            <INPUT type="text" name="q" maxlength="255" value="">        
+            <INPUT type="submit" name="btnG" value="Google Search">
+          </FORM>
+        </TD>
+      </TR> 
+    </TABLE>
+
+    <DIV id="PageContent">
+      <DIV class="pageheader" style="padding: 6px 0px 0px 0px;">
+        <!-- We'll enable this once we figure out how to access (and save) the logo resource -->
+        <!--img src="/wiki/images/confluence_logo.gif" style="float: left; margin: 4px 4px 4px 10px;" border="0"-->
+        <DIV style="margin: 0px 10px 0px 10px" class="smalltext">Apache Lucene Mahout</DIV>
+        <DIV style="margin: 0px 10px 8px 10px" class="pagetitle">Recommendation Learning</DIV>
+
+        <DIV class="greynavbar" align="right" style="padding: 2px 10px; margin: 0px;">
+          <A href="http://cwiki.apache.org/confluence/pages/editpage.action?pageId=14058307">
+            <IMG src="http://cwiki.apache.org/confluence/images/icons/notep_16.gif" height="16" width="16" border="0" align="absmiddle" title="Edit Page"></A>
+            <A href="http://cwiki.apache.org/confluence/pages/editpage.action?pageId=14058307">Edit Page</A>
+          &nbsp;
+          <A href="http://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">
+            <IMG src="http://cwiki.apache.org/confluence/images/icons/browse_space.gif" height="16" width="16" border="0" align="absmiddle" title="Browse Space"></A>
+            <A href="http://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">Browse Space</A>
+          &nbsp;
+          <A href="http://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=14058307">
+            <IMG src="http://cwiki.apache.org/confluence/images/icons/add_page_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add Page"></A>
+          <A href="http://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=14058307">Add Page</A>
+          &nbsp;
+          <A href="http://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=14058307">
+            <IMG src="http://cwiki.apache.org/confluence/images/icons/add_blogentry_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add News"></A>
+          <A href="http://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=14058307">Add News</A>
+        </DIV>
+      </DIV>
+      <DIV class="pagesubheading" style="margin: 0px 10px 0px 10px;">
+                    Added by <A href="http://cwiki.apache.org/confluence/users/viewuserprofile.action?username=s_kumar">Satyendra Kumar</A>, last edited by <A href="http://cwiki.apache.org/confluence/users/viewuserprofile.action?username=s_kumar">Satyendra Kumar</A> on Feb 22, 2010
+                      
+      </DIV>
+
+      <DIV class="pagecontent">
+        <DIV class="wiki-content">
+          
+        </DIV>
+
+        
+      </DIV>
+    </DIV>
+    <DIV class="footer">
+      Generated by
+      <A href="http://www.atlassian.com/confluence/">Atlassian Confluence</A> (Version: 2.10.4 Build: 1520 Jul 24, 2009)
+      <A href="http://could.it/autoexport/">Auto Export Plugin</A> (Version: 1.0.0.beta1)
+    </DIV>
+  </BODY>
+</HTML>

Added: mahout/site/new_website/MAHOUT/recommendationexamples.html
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/recommendationexamples.html?rev=1243022&view=auto
==============================================================================
--- mahout/site/new_website/MAHOUT/recommendationexamples.html (added)
+++ mahout/site/new_website/MAHOUT/recommendationexamples.html Sat Feb 11 10:22:15 2012
@@ -0,0 +1,172 @@
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<HTML>
+  <HEAD>
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/space.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/master.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/wiki-content.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/abs.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/menu.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/menu-ie.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/tables.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/panels.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/master-ie.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/renderer-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/content-types.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/login.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/information-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/layout-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/default-theme.css">
+    <LINK type="text/css" rel="stylesheet" href="resources/space.css">
+    <STYLE type="text/css">
+      .footer {
+        background-image:      url('https://cwiki.apache.org/confluence/images/border/border_bottom.gif');
+        background-repeat:     repeat-x;
+        background-position:   left top;
+        padding-top:           4px;
+        color:                 #666;
+      }
+    </STYLE>
+    <SCRIPT type="text/javascript" language="javascript">
+      var hide = null;
+      var show = null;
+      var children = null;
+
+      function init() {
+        /* Search form initialization */
+        var form = document.forms['search'];
+        if (form != null) {
+          form.elements['domains'].value = location.hostname;
+          form.elements['sitesearch'].value = location.hostname;
+        }
+
+        /* Children initialization */
+        hide = document.getElementById('hide');
+        show = document.getElementById('show');
+        children = document.all != null ?
+                   document.all['children'] :
+                   document.getElementById('children');
+        if (children != null) {
+          children.style.display = 'none';
+          show.style.display = 'inline';
+          hide.style.display = 'none';
+        }
+      }
+
+      function showChildren() {
+        children.style.display = 'block';
+        show.style.display = 'none';
+        hide.style.display = 'inline';
+      }
+
+      function hideChildren() {
+        children.style.display = 'none';
+        show.style.display = 'inline';
+        hide.style.display = 'none';
+      }
+    </SCRIPT>
+    <TITLE>RecommendationExamples</TITLE>
+  <META http-equiv="Content-Type" content="text/html;charset=UTF-8"></HEAD>
+  <BODY onload="init()">
+    <TABLE border="0" cellpadding="2" cellspacing="0" width="100%">
+      <TR class="topBar">
+        <TD align="left" valign="middle" class="topBarDiv" align="left" nowrap="">
+          &nbsp;<A href="mahout-wiki.html" title="Apache Mahout">Apache Mahout</A>&nbsp;&gt;&nbsp;<A href="mahout-wiki.html" title="Mahout Wiki">Mahout Wiki</A>&nbsp;&gt;&nbsp;<A href="quickstart.html" title="Quickstart">Quickstart</A>&nbsp;&gt;&nbsp;<A href="" title="RecommendationExamples">RecommendationExamples</A>
+        </TD>
+        <TD align="right" valign="middle" nowrap="">
+          <FORM name="search" action="http://www.google.com/search" method="get">
+            <INPUT type="hidden" name="ie" value="UTF-8">
+            <INPUT type="hidden" name="oe" value="UTF-8">
+            <INPUT type="hidden" name="domains" value="">
+            <INPUT type="hidden" name="sitesearch" value="">
+            <INPUT type="text" name="q" maxlength="255" value="">        
+            <INPUT type="submit" name="btnG" value="Google Search">
+          </FORM>
+        </TD>
+      </TR> 
+    </TABLE>
+
+    <DIV id="PageContent">
+      <DIV class="pageheader" style="padding: 6px 0px 0px 0px;">
+        <!-- We'll enable this once we figure out how to access (and save) the logo resource -->
+        <!--img src="/wiki/images/confluence_logo.gif" style="float: left; margin: 4px 4px 4px 10px;" border="0"-->
+        <DIV style="margin: 0px 10px 0px 10px" class="smalltext">Apache Mahout</DIV>
+        <DIV style="margin: 0px 10px 8px 10px" class="pagetitle">RecommendationExamples</DIV>
+
+        <DIV class="greynavbar" align="right" style="padding: 2px 10px; margin: 0px;">
+          <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=4587991">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/notep_16.gif" height="16" width="16" border="0" align="absmiddle" title="Edit Page"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=4587991">Edit Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/browse_space.gif" height="16" width="16" border="0" align="absmiddle" title="Browse Space"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">Browse Space</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=4587991">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_page_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add Page"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=4587991">Add Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=4587991">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_blogentry_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add News"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=4587991">Add News</A>
+        </DIV>
+      </DIV>
+
+      <DIV class="pagecontent">
+        <DIV class="wiki-content">
+          <H1><A name="RecommendationExamples-Introduction"></A>Introduction </H1>
+
+<P>This quick start page describes how to run the recommendation examples provided by Mahout. Mahout comes with four recommendation mining examples. They are based on netflixx, jester, grouplens and bookcrossing respectively.</P>
+
+<H1><A name="RecommendationExamples-Steps"></A>Steps </H1>
+
+<H2><A name="RecommendationExamples-Testingitononesinglemachine"></A>Testing it on one single machine </H2>
+
+<P>In the examples directory type: </P>
+<DIV class="code panel" style="border-width: 1px;"><DIV class="codeContent panelContent">
+<PRE class="code-java"> 
+mvn -q exec:java -Dexec.mainClass=<SPAN class="code-quote">&quot;org.apache.mahout.cf.taste.example.bookcrossing.BookCrossingRecommenderEvaluatorRunner&quot;</SPAN> -Dexec.args=<SPAN class="code-quote">&quot;&lt;OPTIONS&gt;&quot;</SPAN> 
+mvn -q exec:java -Dexec.mainClass=<SPAN class="code-quote">&quot;org.apache.mahout.cf.taste.example.netflix.NetflixRecommenderEvaluatorRunner&quot;</SPAN> -Dexec.args=<SPAN class="code-quote">&quot;&lt;OPTIONS&gt;&quot;</SPAN> 
+mvn -q exec:java -Dexec.mainClass=<SPAN class="code-quote">&quot;org.apache.mahout.cf.taste.example.netflix.TransposeToByUser&quot;</SPAN> -Dexec.args=<SPAN class="code-quote">&quot;&lt;OPTIONS&gt;&quot;</SPAN> 
+mvn -q exec:java -Dexec.mainClass=<SPAN class="code-quote">&quot;org.apache.mahout.cf.taste.example.jester.JesterRecommenderEvaluatorRunner&quot;</SPAN> -Dexec.args=<SPAN class="code-quote">&quot;&lt;OPTIONS&gt;&quot;</SPAN> 
+mvn -q exec:java -Dexec.mainClass=<SPAN class="code-quote">&quot;org.apache.mahout.cf.taste.example.grouplens.GroupLensRecommenderEvaluatorRunner&quot;</SPAN> -Dexec.args=<SPAN class="code-quote">&quot;&lt;OPTIONS&gt;&quot;</SPAN> 
+</PRE>
+</DIV></DIV> 
+
+<P>Here, the command line options need only be:</P>
+
+<DIV class="code panel" style="border-width: 1px;"><DIV class="codeContent panelContent">
+<PRE class="code-java">
+-i [input file]
+</PRE>
+</DIV></DIV>
+
+
+<P>Note that the GroupLens example is designed for the &quot;1 million&quot; data set, available at <A href="http://www.grouplens.org/node/73" class="external-link" rel="nofollow">http://www.grouplens.org/node/73</A> . And  the &quot;input file&quot; above is the ratings.dat contained in the zipfile from the data set . This file has an unusual format and so has a special parser. The example code here can be easily modified to use a regular FileDataModel and thus work on more standard input, including the other data sets available at this site.</P>
+        </DIV>
+
+        
+      </DIV>
+    </DIV>
+    <DIV class="footer">
+      Generated by
+      <A href="http://www.atlassian.com/confluence/">Atlassian Confluence</A> (Version: 3.4.9 Build: 2042 Feb 14, 2011)
+      <A href="http://could.it/autoexport/">Auto Export Plugin</A> (Version: 1.0.0-dkulp)
+    </DIV>
+<SCRIPT type="text/javascript">
+
+  var _gaq = _gaq || [];
+  _gaq.push(['_setAccount', 'UA-17359171-1']);
+  _gaq.push(['_setDomainName', 'none']);
+  _gaq.push(['_setAllowLinker', true]);
+  _gaq.push(['_trackPageview']);
+
+  (function() {
+    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+  })();
+
+</SCRIPT>
+  </BODY>
+</HTML>
\ No newline at end of file

Added: mahout/site/new_website/MAHOUT/recommender-documentation.data/taste-architecture.png
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/recommender-documentation.data/taste-architecture.png?rev=1243022&view=auto
==============================================================================
Binary file - no diff available.

Propchange: mahout/site/new_website/MAHOUT/recommender-documentation.data/taste-architecture.png
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: mahout/site/new_website/MAHOUT/recommender-documentation.data/taste-architecture.png.jpeg
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/recommender-documentation.data/taste-architecture.png.jpeg?rev=1243022&view=auto
==============================================================================
Binary file - no diff available.

Propchange: mahout/site/new_website/MAHOUT/recommender-documentation.data/taste-architecture.png.jpeg
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: mahout/site/new_website/MAHOUT/recommender-documentation.html
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/recommender-documentation.html?rev=1243022&view=auto
==============================================================================
--- mahout/site/new_website/MAHOUT/recommender-documentation.html (added)
+++ mahout/site/new_website/MAHOUT/recommender-documentation.html Sat Feb 11 10:22:15 2012
@@ -0,0 +1,418 @@
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<HTML>
+  <HEAD>
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/space.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/master.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/wiki-content.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/abs.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/menu.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/menu-ie.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/tables.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/panels.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/master-ie.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/renderer-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/content-types.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/login.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/information-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/layout-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/default-theme.css">
+    <LINK type="text/css" rel="stylesheet" href="resources/space.css">
+    <STYLE type="text/css">
+      .footer {
+        background-image:      url('https://cwiki.apache.org/confluence/images/border/border_bottom.gif');
+        background-repeat:     repeat-x;
+        background-position:   left top;
+        padding-top:           4px;
+        color:                 #666;
+      }
+    </STYLE>
+    <SCRIPT type="text/javascript" language="javascript">
+      var hide = null;
+      var show = null;
+      var children = null;
+
+      function init() {
+        /* Search form initialization */
+        var form = document.forms['search'];
+        if (form != null) {
+          form.elements['domains'].value = location.hostname;
+          form.elements['sitesearch'].value = location.hostname;
+        }
+
+        /* Children initialization */
+        hide = document.getElementById('hide');
+        show = document.getElementById('show');
+        children = document.all != null ?
+                   document.all['children'] :
+                   document.getElementById('children');
+        if (children != null) {
+          children.style.display = 'none';
+          show.style.display = 'inline';
+          hide.style.display = 'none';
+        }
+      }
+
+      function showChildren() {
+        children.style.display = 'block';
+        show.style.display = 'none';
+        hide.style.display = 'inline';
+      }
+
+      function hideChildren() {
+        children.style.display = 'none';
+        show.style.display = 'inline';
+        hide.style.display = 'none';
+      }
+    </SCRIPT>
+    <TITLE>Recommender Documentation</TITLE>
+  <META http-equiv="Content-Type" content="text/html;charset=UTF-8"></HEAD>
+  <BODY onload="init()">
+    <TABLE border="0" cellpadding="2" cellspacing="0" width="100%">
+      <TR class="topBar">
+        <TD align="left" valign="middle" class="topBarDiv" align="left" nowrap="">
+          &nbsp;<A href="mahout-wiki.html" title="Apache Mahout">Apache Mahout</A>&nbsp;&gt;&nbsp;<A href="mahout-wiki.html" title="Mahout Wiki">Mahout Wiki</A>&nbsp;&gt;&nbsp;<A href="algorithms.html" title="Algorithms">Algorithms</A>&nbsp;&gt;&nbsp;<A href="" title="Recommender Documentation">Recommender Documentation</A>
+        </TD>
+        <TD align="right" valign="middle" nowrap="">
+          <FORM name="search" action="http://www.google.com/search" method="get">
+            <INPUT type="hidden" name="ie" value="UTF-8">
+            <INPUT type="hidden" name="oe" value="UTF-8">
+            <INPUT type="hidden" name="domains" value="">
+            <INPUT type="hidden" name="sitesearch" value="">
+            <INPUT type="text" name="q" maxlength="255" value="">        
+            <INPUT type="submit" name="btnG" value="Google Search">
+          </FORM>
+        </TD>
+      </TR> 
+    </TABLE>
+
+    <DIV id="PageContent">
+      <DIV class="pageheader" style="padding: 6px 0px 0px 0px;">
+        <!-- We'll enable this once we figure out how to access (and save) the logo resource -->
+        <!--img src="/wiki/images/confluence_logo.gif" style="float: left; margin: 4px 4px 4px 10px;" border="0"-->
+        <DIV style="margin: 0px 10px 0px 10px" class="smalltext">Apache Mahout</DIV>
+        <DIV style="margin: 0px 10px 8px 10px" class="pagetitle">Recommender Documentation</DIV>
+
+        <DIV class="greynavbar" align="right" style="padding: 2px 10px; margin: 0px;">
+          <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=22872433">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/notep_16.gif" height="16" width="16" border="0" align="absmiddle" title="Edit Page"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=22872433">Edit Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/browse_space.gif" height="16" width="16" border="0" align="absmiddle" title="Browse Space"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">Browse Space</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=22872433">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_page_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add Page"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=22872433">Add Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=22872433">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_blogentry_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add News"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=22872433">Add News</A>
+        </DIV>
+      </DIV>
+
+      <DIV class="pagecontent">
+        <DIV class="wiki-content">
+          <H2><A name="RecommenderDocumentation-Overview"></A>Overview</H2>
+
+<P><EM>This documentation concerns the non-distributed, non-Hadoop-based recommender engine / collaborative filtering code inside Mahout. It was formerly a separate project called &quot;Taste&quot; and has continued development inside Mahout alongside other Hadoop-based code. It may be viewed as a somewhat separate, older, more comprehensive and more mature aspect of this code, compared to current development efforts focusing on Hadoop-based distributed recommenders. This remains the best entry point into Mahout recommender engines of all kinds.</EM></P>
+
+<P>A Mahout-based collaborative filtering engine takes users' preferences for items (&quot;tastes&quot;) and returns estimated preferences for other items. For example, a site that sells books or CDs could easily use Mahout to figure out, from past purchase data, which CDs a customer might be interested in listening to.</P>
+
+<P>Mahout provides a rich set of components from which you can construct a customized recommender system from a selection of algorithms. Mahout is designed to be enterprise-ready; it's designed for performance, scalability and flexibility.</P>
+
+<P>Mahout recommenders are not just for Java; it can be run as an external server which exposes recommendation logic to your application via web services and HTTP.</P>
+
+<P>Top-level packages define the Mahout interfaces to these key abstractions:</P>
+<UL>
+	<LI>DataModel</LI>
+	<LI>UserSimilarity</LI>
+	<LI>ItemSimilarity</LI>
+	<LI>UserNeighborhood</LI>
+	<LI>Recommender</LI>
+</UL>
+
+
+<P>Subpackages of org.apache.mahout.cf.taste.impl hold implementations of these interfaces. These are the pieces from which you will build your own recommendation engine. That's it! For the academically inclined, Mahout supports both <B>memory-based</B>, <B>item-based</B> recommender systems, <B>slope one</B> recommenders, and a couple other experimental implementations. It does not currently support <B>model-based</B> recommenders.</P>
+
+<H2><A name="RecommenderDocumentation-Architecture"></A>Architecture</H2>
+
+<P><SPAN class="image-wrap" style=""><IMG src="recommender-documentation.data/taste-architecture.png" style="border: 0px solid black"></SPAN></P>
+
+<P>This diagram shows the relationship between various Mahout components in a user-based recommender. An item-based recommender system is similar except that there are no PreferenceInferrers or Neighborhood algorithms involved.</P>
+
+<H3><A name="RecommenderDocumentation-Recommender"></A>Recommender</H3>
+<P>A Recommender is the core abstraction in Mahout. Given a DataModel, it can produce recommendations. Applications will most likely use the GenericUserBasedRecommender implementation GenericItemBasedRecommender, possibly decorated by CachingRecommender.</P>
+
+<H3><A name="RecommenderDocumentation-DataModel"></A>DataModel</H3>
+<P>A DataModel is the interface to information about user preferences. An implementation might draw this data from any source, but a database is the most likely source. Mahout provides MySQLJDBCDataModel, for example, to access preference data from a database via JDBC and MySQL. Another exists for PostgreSQL. Mahout also provides a FileDataModel.</P>
+
+<P>There are no abstractions for a user or item in the object model (not anymore). Users and items are identified solely by an ID value in the framework. Further, this ID value must be numeric; it is a Java long type through the APIs. A Preference object or PreferenceArray object encapsulates the relation between user and preferred items (or items and users preferring them).</P>
+
+<P>Finally, Mahout supports, in various ways, a so-called &quot;boolean&quot; data model in which users do not express preferences of varying strengths for items, but simply express an association or none at all. For example, while users might express a preference from 1 to 5 in the context of a movie recommender site, there may be no notion of a preference value between users and pages in the context of recommending pages on a web site: there is only a notion of an association, or none, between a user and pages that have been visited.</P>
+
+<H3><A name="RecommenderDocumentation-UserSimilarity"></A>UserSimilarity</H3>
+<P>A UserSimilarity defines a notion of similarity between two Users. This is a crucial part of a recommendation engine. These are attached to a Neighborhood implementation. ItemSimilarities are analagous, but find similarity between Items.</P>
+
+<H3><A name="RecommenderDocumentation-UserNeighborhood"></A>UserNeighborhood</H3>
+<P>In a user-based recommender, recommendations are produced by finding a &quot;neighborhood&quot; of similar users near a given user. A UserNeighborhood defines a means of determining that neighborhood &mdash; for example, nearest 10 users. Implementations typically need a UserSimilarity to operate.</P>
+
+<H2><A name="RecommenderDocumentation-Requirements"></A>Requirements</H2>
+<H3><A name="RecommenderDocumentation-Required"></A>Required</H3>
+
+<UL>
+	<LI><A href="http://www.java.com/getjava/index.jsp" class="external-link" rel="nofollow">Java/ J2SE 6.0</A></LI>
+</UL>
+
+
+<H3><A name="RecommenderDocumentation-Optional"></A>Optional</H3>
+<UL>
+	<LI><A href="http://maven.apache.org/" class="external-link" rel="nofollow">Apache Maven</A>  2.2.1 or later, if you want to build from source or build examples. (Mac users note that even OS X 10.5 ships with Maven 2.0.6, which will not work.)</LI>
+	<LI>Mahout web applications require a <A href="http://java.sun.com/products/servlet/index.jsp" class="external-link" rel="nofollow">Servlet 2.3+</A> container, such as <A href="http://jakarta.apache.org/tomcat/" class="external-link" rel="nofollow">Apache Tomcat</A>. It may in fact work with oldercontainers with slight modification.</LI>
+</UL>
+
+
+<H2><A name="RecommenderDocumentation-Demo"></A>Demo</H2>
+
+<P>To build and run the demo, follow the instructions below, which are written for Unix-like operating systems:</P>
+
+<UL>
+	<LI>Obtain a copy of the Mahout distribution, either from SVN or as a downloaded archive.</LI>
+	<LI>Download the &quot;1 Million MovieLens Dataset&quot; from <A href="http://www.grouplens.org/" class="external-link" rel="nofollow">Grouplens.org</A></LI>
+	<LI>Unpack the archive and copy movies.dat and ratings.dat to trunk/integration/src/main/resources/org/apache/mahout/cf/taste/example/grouplens under the Mahout distribution directory.</LI>
+	<LI>Navigate to the directory where you unpacked the Mahout distribution, and navigate to trunk.</LI>
+	<LI>Run mvn -DskipTests install, which builds and installs Mahout core to your local repository</LI>
+	<LI>cd integration</LI>
+	<LI>You may need to give Maven more memory: in a bash shell, export MAVEN_OPTS=-Xmx1024M</LI>
+	<LI>mvn jetty:run.</LI>
+	<LI>Get recommendations by accessing the web application in your browser: <A href="http://localhost:8080/mahout-integration/RecommenderServlet?userID=1" class="external-link" rel="nofollow">http://localhost:8080/mahout-integration/RecommenderServlet?userID=1</A> This will produce a simple preference-item ID list which could be consumed by a client application. Get more useful human-readable output with the debug parameter: <A href="http://localhost:8080/mahout-integration/RecommenderServlet?userID=1&debug=true" class="external-link" rel="nofollow">http://localhost:8080/mahout-integration/RecommenderServlet?userID=1&amp;debug=true</A></LI>
+</UL>
+
+
+
+<H2><A name="RecommenderDocumentation-Examples"></A>Examples</H2>
+<H3><A name="RecommenderDocumentation-UserbasedRecommender"></A>User-based Recommender</H3>
+<P>User-based recommenders are the &quot;original&quot;, conventional style of recommender system. They can produce good recommendations when tweaked properly; they are not necessarily the fastest recommender systems and are thus suitable for small data sets (roughly, less than ten million ratings). We'll start with an example of this.</P>
+
+<P>First, create a DataModel of some kind. Here, we'll use a simple on based on data in a file. The file should be in CSV format, with lines of the form &quot;userID,itemID,prefValue&quot; (e.g. &quot;39505,290002,3.5&quot;):</P>
+
+<DIV class="code panel" style="border-width: 1px;"><DIV class="codeContent panelContent">
+<PRE class="code-java">
+DataModel model = <SPAN class="code-keyword">new</SPAN> FileDataModel(<SPAN class="code-keyword">new</SPAN> File(<SPAN class="code-quote">&quot;data.txt&quot;</SPAN>));
+</PRE>
+</DIV></DIV>
+
+<P>We'll use the PearsonCorrelationSimilarity implementation of UserSimilarity as our user correlation algorithm, and add an optional preference inference algorithm:</P>
+
+<DIV class="code panel" style="border-width: 1px;"><DIV class="codeContent panelContent">
+<PRE class="code-java">
+UserSimilarity userSimilarity = <SPAN class="code-keyword">new</SPAN> PearsonCorrelationSimilarity(model);
+<SPAN class="code-comment">// Optional:
+</SPAN>userSimilarity.setPreferenceInferrer(<SPAN class="code-keyword">new</SPAN> AveragingPreferenceInferrer());
+</PRE>
+</DIV></DIV>
+
+<P>Now we create a UserNeighborhood algorithm. Here we use nearest-3:</P>
+
+<DIV class="code panel" style="border-width: 1px;"><DIV class="codeContent panelContent">
+<PRE class="code-java">
+UserNeighborhood neighborhood =
+          <SPAN class="code-keyword">new</SPAN> NearestNUserNeighborhood(3, userSimilarity, model);</PRE>
+</DIV></DIV>
+
+<P>Now we can create our Recommender, and add a caching decorator:</P>
+
+<DIV class="code panel" style="border-width: 1px;"><DIV class="codeContent panelContent">
+<PRE class="code-java">
+Recommender recommender =
+          <SPAN class="code-keyword">new</SPAN> GenericUserBasedRecommender(model, neighborhood, userSimilarity);
+Recommender cachingRecommender = <SPAN class="code-keyword">new</SPAN> CachingRecommender(recommender);
+</PRE>
+</DIV></DIV>
+
+<P>Now we can get 10 recommendations for user ID &quot;1234&quot; &mdash; done!</P>
+<DIV class="code panel" style="border-width: 1px;"><DIV class="codeContent panelContent">
+<PRE class="code-java">
+List&lt;RecommendedItem&gt; recommendations =
+          cachingRecommender.recommend(1234, 10);
+</PRE>
+</DIV></DIV>
+
+<H3><A name="RecommenderDocumentation-ItembasedRecommender"></A>Item-based Recommender</H3>
+
+<P>We could have created an item-based recommender instead. Item-based recommender base recommendation not on user similarity, but on item similarity. In theory these are about the same approach to the problem, just from different angles. However the similarity of two items is relatively fixed, more so than the similarity of two users. So, item-based recommenders can use pre-computed similarity values in the computations, which make them much faster. For large data sets, item-based recommenders are more appropriate.</P>
+
+<P>Let's start over, again with a FileDataModel to start:</P>
+
+<DIV class="code panel" style="border-width: 1px;"><DIV class="codeContent panelContent">
+<PRE class="code-java">
+DataModel model = <SPAN class="code-keyword">new</SPAN> FileDataModel(<SPAN class="code-keyword">new</SPAN> File(<SPAN class="code-quote">&quot;data.txt&quot;</SPAN>));
+</PRE>
+</DIV></DIV>
+
+<P>We'll also need an ItemSimilarity. We could use PearsonCorrelationSimilarity, which computes item similarity in realtime, but, this is generally too slow to be useful. Instead, in a real application, you would feed a list of pre-computed correlations to a GenericItemSimilarity: </P>
+
+<DIV class="code panel" style="border-width: 1px;"><DIV class="codeContent panelContent">
+<PRE class="code-java">
+<SPAN class="code-comment">// Construct the list of pre-computed correlations
+</SPAN>Collection&lt;GenericItemSimilarity.ItemItemSimilarity&gt; correlations =
+          ...;
+ItemSimilarity itemSimilarity =
+          <SPAN class="code-keyword">new</SPAN> GenericItemSimilarity(correlations);
+
+</PRE>
+</DIV></DIV>
+
+<P>Then we can finish as before to produce recommendations:</P>
+
+<DIV class="code panel" style="border-width: 1px;"><DIV class="codeContent panelContent">
+<PRE class="code-java">
+Recommender recommender =
+          <SPAN class="code-keyword">new</SPAN> GenericItemBasedRecommender(model, itemSimilarity);
+Recommender cachingRecommender = <SPAN class="code-keyword">new</SPAN> CachingRecommender(recommender);
+...
+List&lt;RecommendedItem&gt; recommendations =
+          cachingRecommender.recommend(1234, 10);
+</PRE>
+</DIV></DIV>
+
+<H3><A name="RecommenderDocumentation-SlopeOneRecommender"></A>Slope-One Recommender</H3>
+<P>This is a simple yet effective Recommender and we present another example to round out the list:</P>
+
+<DIV class="code panel" style="border-width: 1px;"><DIV class="codeContent panelContent">
+<PRE class="code-java">
+DataModel model = <SPAN class="code-keyword">new</SPAN> FileDataModel(<SPAN class="code-keyword">new</SPAN> File(<SPAN class="code-quote">&quot;data.txt&quot;</SPAN>));
+          <SPAN class="code-comment">// Make a weighted slope one recommender
+</SPAN>          Recommender recommender = <SPAN class="code-keyword">new</SPAN> SlopeOneRecommender(model);
+          Recommender cachingRecommender = <SPAN class="code-keyword">new</SPAN> CachingRecommender(recommender);
+        </PRE>
+</DIV></DIV>
+
+
+
+<H2><A name="RecommenderDocumentation-Integrationwithyourapplication"></A>Integration with your application</H2>
+<H3><A name="RecommenderDocumentation-Direct"></A>Direct</H3>
+
+<P>You can create a Recommender, as shown above, wherever you like in your Java application, and use it. This includes simple Java applications or GUI applications, server applications, and J2EE web applications.</P>
+
+<H3><A name="RecommenderDocumentation-Standaloneserver"></A>Standalone server</H3>
+<P>A Mahout recommender can also be run as an external server, which may be the only option for non-Java applications. It can be exposed as a web application via org.apach.mahout.cf.taste.web.RecommenderServlet, and your application can then access recommendations via simple HTTP requests and response. See above, and see the javadoc for details.</P>
+
+<H2><A name="RecommenderDocumentation-Performance"></A>Performance</H2>
+<H3><A name="RecommenderDocumentation-RuntimePerformance"></A>Runtime Performance</H3>
+<P>The more data you give, the better. Though Mahout is designed for performance, you will undoubtedly run into performance issues at some point. For best results, consider using the following command-line flags to your JVM:</P>
+
+<UL>
+	<LI>-server: Enables the server VM, which is generally appropriate for long-running, computation-intensive applications.</LI>
+	<LI>-Xms1024m -Xmx1024m: Make the heap as big as possible &ndash; a gigabyte doesn't hurt when dealing with tens millions of preferences. Mahout recommenders will generally use as much memory as you give it for caching, which helps performance. Set the initial and max size to the same value to avoid wasting time growing the heap, and to avoid having the JVM run minor collections to avoid growing the heap, which will clear cached values.</LI>
+	<LI>-da -dsa: Disable all assertions.</LI>
+	<LI>-XX:NewRatio=9: Increase heap allocated to 'old' objects, which is most of them in this framework</LI>
+	<LI>-XX:+UseParallelGC -XX:+UseParallelOldGC (multi-processor machines only): Use a GC algorithm designed to take advantage of multiple processors, and designed for throughput. This is a default in J2SE 5.0.</LI>
+	<LI>-XX:-DisableExplicitGC: Disable calls to System.gc(). These calls can only hurt in the presence of modern GC algorithms; they may force Mahout to remove cached data needlessly. This flag isn't needed if you're sure your code and third-party code you use doesn't call this method.</LI>
+</UL>
+
+
+<P>Also consider the following tips:</P>
+
+<UL>
+	<LI>Use CachingRecommender on top of your custom Recommender implementation.</LI>
+	<LI>When using JDBCDataModel, make sure you've taken basic steps to optimize the table storing preference data. Create a primary key on the user ID and item ID columns, and an index on them. Set them to be non-null. And so on. Tune your database for lots of concurrent reads! When using JDBC, the database is almost always the bottleneck. Plenty of memory and caching are even more important.</LI>
+	<LI>Also, pooling database connections is essential to performance. If using a J2EE container, it probably provides a way to configure connection pools. If you are creating your own DataSource directly, try wrapping it in org.apache.mahout.cf.taste.impl.model.jdbc.ConnectionPoolDataSource</LI>
+	<LI>See MySQL-specific notes on performance in the javadoc for MySQLJDBCDataModel.</LI>
+</UL>
+
+
+<H3><A name="RecommenderDocumentation-AlgorithmPerformance%3AWhichOneIsBest%3F"></A>Algorithm Performance: Which One Is Best?</H3>
+<P>There is no right answer; it depends on your data, your application, environment, and performance needs. Mahout provides the building blocks from which you can construct the best Recommender for your application. The links below provide research on this topic. You will probably need a bit of trial-and-error to find a setup that works best. The code sample above provides a good starting point.</P>
+
+<P>Fortunately, Mahout provides a way to evaluate the accuracy of your Recommender on your own data, in org.apache.mahout.cf.taste.eval&quot;</P>
+
+<DIV class="code panel" style="border-width: 1px;"><DIV class="codeContent panelContent">
+<PRE class="code-java">
+DataModel myModel = ...;
+RecommenderBuilder builder = <SPAN class="code-keyword">new</SPAN> RecommenderBuilder() {
+  <SPAN class="code-keyword">public</SPAN> Recommender buildRecommender(DataModel model) {
+    <SPAN class="code-comment">// build and <SPAN class="code-keyword">return</SPAN> the Recommender to evaluate here
+</SPAN>  }
+};
+RecommenderEvaluator evaluator =
+          <SPAN class="code-keyword">new</SPAN> AverageAbsoluteDifferenceRecommenderEvaluator();
+<SPAN class="code-object">double</SPAN> evaluation = evaluator.evaluate(builder, myModel, 0.9, 1.0);
+</PRE>
+</DIV></DIV>
+
+<P>For &quot;boolean&quot; data model situations, where there are no notions of preference value, the above evaluation based on estimated preference does not make sense. In this case, try this kind of evaluation, which presents traditional information retrieval figures like precision and recall, which are more meaningful:</P>
+
+<DIV class="code panel" style="border-width: 1px;"><DIV class="codeContent panelContent">
+<PRE class="code-java">
+...
+RecommenderIRStatsEvaluator evaluator =
+        <SPAN class="code-keyword">new</SPAN> GenericRecommenderIRStatsEvaluator();
+IRStatistics stats =
+        evaluator.evaluate(builder, <SPAN class="code-keyword">null</SPAN>, myModel, <SPAN class="code-keyword">null</SPAN>, 3,
+RecommenderIRStatsEvaluator.CHOOSE_THRESHOLD,
+        &amp;sect;1.0);
+</PRE>
+</DIV></DIV>
+
+
+<H2><A name="RecommenderDocumentation-UsefulLinks"></A>Useful Links</H2>
+<P>You'll want to look at these packages too, which offer more algorithms and approaches that you may find useful:</P>
+
+<UL>
+	<LI><A href="http://www.nongnu.org/cofi/" class="external-link" rel="nofollow">Cofi</A>: A Java-Based Collaborative Filtering Library</LI>
+	<LI><A href="http://eecs.oregonstate.edu/iis/CoFE/" class="external-link" rel="nofollow">CoFE</A></LI>
+</UL>
+
+
+<P>Here's a handful of research papers that I've read and found particularly useful:</P>
+
+<P>J.S. Breese, D. Heckerman and C. Kadie, &quot;<A href="http://research.microsoft.com/research/pubs/view.aspx?tr_id=166" class="external-link" rel="nofollow">Empirical Analysis of Predictive Algorithms for Collaborative Filtering</A>,&quot; in Proceedings of the Fourteenth Conference on Uncertainity in Artificial Intelligence (UAI 1998), 1998.</P>
+
+<P>B. Sarwar, G. Karypis, J. Konstan and J. Riedl, &quot;<A href="http://www10.org/cdrom/papers/519/" class="external-link" rel="nofollow">Item-based collaborative filtering recommendation algorithms</A>&quot; in Proceedings of the Tenth International Conference on the World Wide Web (WWW 10), pp. 285-295, 2001.</P>
+
+<P>P. Resnick, N. Iacovou, M. Suchak, P. Bergstrom and J. Riedl, &quot;<A href="http://doi.acm.org/10.1145/192844.192905" class="external-link" rel="nofollow">GroupLens: an open architecture for collaborative filtering of netnews</A>&quot; in Proceedings of the 1994 ACM conference on Computer Supported Cooperative Work (CSCW 1994), pp. 175-186, 1994.</P>
+
+<P>J.L. Herlocker, J.A. Konstan, A. Borchers and J. Riedl, &quot;<A href="http://www.grouplens.org/papers/pdf/algs.pdf" class="external-link" rel="nofollow">An algorithmic framework for performing collaborative filtering</A>&quot; in Proceedings of the 22nd annual international ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR 99), pp. 230-237, 1999.</P>
+
+<P>Clifford Lyon, &quot;<A href="http://materialobjects.com/cf/MovieRecommender.pdf" class="external-link" rel="nofollow">Movie Recommender</A>&quot; CSCI E-280 final project, Harvard University, 2004.</P>
+
+<P>Daniel Lemire, Anna Maclachlan, &quot;<A href="http://www.daniel-lemire.com/fr/abstracts/SDM2005.html" class="external-link" rel="nofollow">Slope One Predictors for Online Rating-Based Collaborative Filtering</A>,&quot; Proceedings of SIAM Data Mining (SDM '05), 2005.</P>
+
+<P>Michelle Anderson, Marcel Ball, Harold Boley, Stephen Greene, Nancy Howse, Daniel Lemire and Sean McGrath, &quot;<A href="http://www.daniel-lemire.com/fr/documents/publications/racofi_nrc.pdf" class="external-link" rel="nofollow">RACOFI: A Rule-Applying Collaborative Filtering System</A>&quot;,&quot; Proceedings of COLA '03, 2003.</P>
+
+<P>These links will take you to all the collaborative filtering reading you could ever want!</P>
+<UL>
+	<LI><A href="http://www.paulperry.net/notes/cf.asp" class="external-link" rel="nofollow">Paul Perry's notes</A></LI>
+	<LI><A href="http://jamesthornton.com/cf/" class="external-link" rel="nofollow">James Thornton's collaborative filtering resources</A></LI>
+	<LI><A href="http://www.daniel-lemire.com/blog/" class="external-link" rel="nofollow">Daniel Lemire's blog</A> which frequently covers collaborative filtering topics</LI>
+</UL>
+
+        </DIV>
+
+        
+      </DIV>
+    </DIV>
+    <DIV class="footer">
+      Generated by
+      <A href="http://www.atlassian.com/confluence/">Atlassian Confluence</A> (Version: 3.4.9 Build: 2042 Feb 14, 2011)
+      <A href="http://could.it/autoexport/">Auto Export Plugin</A> (Version: 1.0.0-dkulp)
+    </DIV>
+<SCRIPT type="text/javascript">
+
+  var _gaq = _gaq || [];
+  _gaq.push(['_setAccount', 'UA-17359171-1']);
+  _gaq.push(['_setDomainName', 'none']);
+  _gaq.push(['_setAllowLinker', true]);
+  _gaq.push(['_trackPageview']);
+
+  (function() {
+    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+  })();
+
+</SCRIPT>
+  </BODY>
+</HTML>
\ No newline at end of file

Added: mahout/site/new_website/MAHOUT/recommender-first-timer-faq.html
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/recommender-first-timer-faq.html?rev=1243022&view=auto
==============================================================================
--- mahout/site/new_website/MAHOUT/recommender-first-timer-faq.html (added)
+++ mahout/site/new_website/MAHOUT/recommender-first-timer-faq.html Sat Feb 11 10:22:15 2012
@@ -0,0 +1,163 @@
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<HTML>
+  <HEAD>
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/space.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/master.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/wiki-content.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/abs.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/menu.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/menu-ie.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/tables.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/panels.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/master-ie.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/renderer-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/content-types.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/login.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/information-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/layout-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/default-theme.css">
+    <LINK type="text/css" rel="stylesheet" href="resources/space.css">
+    <STYLE type="text/css">
+      .footer {
+        background-image:      url('https://cwiki.apache.org/confluence/images/border/border_bottom.gif');
+        background-repeat:     repeat-x;
+        background-position:   left top;
+        padding-top:           4px;
+        color:                 #666;
+      }
+    </STYLE>
+    <SCRIPT type="text/javascript" language="javascript">
+      var hide = null;
+      var show = null;
+      var children = null;
+
+      function init() {
+        /* Search form initialization */
+        var form = document.forms['search'];
+        if (form != null) {
+          form.elements['domains'].value = location.hostname;
+          form.elements['sitesearch'].value = location.hostname;
+        }
+
+        /* Children initialization */
+        hide = document.getElementById('hide');
+        show = document.getElementById('show');
+        children = document.all != null ?
+                   document.all['children'] :
+                   document.getElementById('children');
+        if (children != null) {
+          children.style.display = 'none';
+          show.style.display = 'inline';
+          hide.style.display = 'none';
+        }
+      }
+
+      function showChildren() {
+        children.style.display = 'block';
+        show.style.display = 'none';
+        hide.style.display = 'inline';
+      }
+
+      function hideChildren() {
+        children.style.display = 'none';
+        show.style.display = 'inline';
+        hide.style.display = 'none';
+      }
+    </SCRIPT>
+    <TITLE>Recommender First-Timer FAQ</TITLE>
+  <META http-equiv="Content-Type" content="text/html;charset=UTF-8"></HEAD>
+  <BODY onload="init()">
+    <TABLE border="0" cellpadding="2" cellspacing="0" width="100%">
+      <TR class="topBar">
+        <TD align="left" valign="middle" class="topBarDiv" align="left" nowrap="">
+          &nbsp;<A href="mahout-wiki.html" title="Apache Mahout">Apache Mahout</A>&nbsp;&gt;&nbsp;<A href="mahout-wiki.html" title="Mahout Wiki">Mahout Wiki</A>&nbsp;&gt;&nbsp;<A href="algorithms.html" title="Algorithms">Algorithms</A>&nbsp;&gt;&nbsp;<A href="" title="Recommender First-Timer FAQ">Recommender First-Timer FAQ</A>
+        </TD>
+        <TD align="right" valign="middle" nowrap="">
+          <FORM name="search" action="http://www.google.com/search" method="get">
+            <INPUT type="hidden" name="ie" value="UTF-8">
+            <INPUT type="hidden" name="oe" value="UTF-8">
+            <INPUT type="hidden" name="domains" value="">
+            <INPUT type="hidden" name="sitesearch" value="">
+            <INPUT type="text" name="q" maxlength="255" value="">        
+            <INPUT type="submit" name="btnG" value="Google Search">
+          </FORM>
+        </TD>
+      </TR> 
+    </TABLE>
+
+    <DIV id="PageContent">
+      <DIV class="pageheader" style="padding: 6px 0px 0px 0px;">
+        <!-- We'll enable this once we figure out how to access (and save) the logo resource -->
+        <!--img src="/wiki/images/confluence_logo.gif" style="float: left; margin: 4px 4px 4px 10px;" border="0"-->
+        <DIV style="margin: 0px 10px 0px 10px" class="smalltext">Apache Mahout</DIV>
+        <DIV style="margin: 0px 10px 8px 10px" class="pagetitle">Recommender First-Timer FAQ</DIV>
+
+        <DIV class="greynavbar" align="right" style="padding: 2px 10px; margin: 0px;">
+          <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=24191983">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/notep_16.gif" height="16" width="16" border="0" align="absmiddle" title="Edit Page"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=24191983">Edit Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/browse_space.gif" height="16" width="16" border="0" align="absmiddle" title="Browse Space"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">Browse Space</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=24191983">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_page_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add Page"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=24191983">Add Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=24191983">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_blogentry_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add News"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=24191983">Add News</A>
+        </DIV>
+      </DIV>
+
+      <DIV class="pagecontent">
+        <DIV class="wiki-content">
+          <P>Many people with an interest in recommenders arrive at Mahout since they're building a first recommender system. Some starting questions have been asked enough times to warrant a FAQ collecting advice and rules-of-thumb to newcomers.</P>
+
+<P>For the interested, these topics are treated in detail in the book <A href="http://manning.com/owen/" class="external-link" rel="nofollow">Mahout in Action</A>.</P>
+
+<P>Don't start with a distributed, Hadoop-based recommender; take on that complexity only if necessary. Start with non-distributed recommenders. It is simpler, has fewer requirements, and is more flexible. </P>
+
+<P>As a crude rule of thumb, a system with up to 100M user-item associations (ratings, preferences) should &quot;fit&quot; onto one modern server machine with 4GB of heap available and run acceptably as a real-time recommender. The system is invariably memory-bound since keeping data in memory is essential to performance.</P>
+
+<P>Beyond this point it gets expensive to deploy a machine with enough RAM, so, designing for a distributed makes sense when nearing this scale. However most applications don't &quot;really&quot; have 100M associations to process. Data can be sampled; noisy and old data can often be aggressively pruned without significant impact on the result.</P>
+
+<P>The next question is whether or not your system has preference values, or ratings. Do users and items merely have an association or not, such as the existence or lack of a click? or is behavior translated into some scalar value representing the user's degree of preference for the item.</P>
+
+<P>If you have ratings, then a good place to start is a GenericItemBasedRecommender, plus a PearsonCorrelationSimilarity similarity metric. If you don't have ratings, then a good place to start is GenericBooleanPrefItemBasedRecommender and LogLikelihoodSimilarity.</P>
+
+<P>If you want to do content-based item-item similarity, you need to implement your own ItemSimilarity.</P>
+
+<P>If your data can be simply exported to a CSV file, use FileDataModel and push new files periodically.<BR>
+If your data is in a database, use MySQLJDBCDataModel (or its &quot;BooleanPref&quot; counterpart if appropriate, or its PostgreSQL counterpart, etc.) and put on top a ReloadFromJDBCDataModel.</P>
+
+<P>This should give a reasonable starter system which responds fast. The nature of the system is that new data comes in from the file or database only periodically &ndash; perhaps on the order of minutes. If that's not OK, you'll have to look into some more specialized work &ndash; SlopeOneRecommender deals with updates quickly, or, it is possible to do some work to update the GenericDataModel in real time. </P>
+        </DIV>
+
+        
+      </DIV>
+    </DIV>
+    <DIV class="footer">
+      Generated by
+      <A href="http://www.atlassian.com/confluence/">Atlassian Confluence</A> (Version: 3.2 Build: 1810 Mar 16, 2010)
+      <A href="http://could.it/autoexport/">Auto Export Plugin</A> (Version: 1.0.0-dkulp)
+    </DIV>
+<SCRIPT type="text/javascript">
+
+  var _gaq = _gaq || [];
+  _gaq.push(['_setAccount', 'UA-17359171-1']);
+  _gaq.push(['_setDomainName', 'none']);
+  _gaq.push(['_setAllowLinker', true]);
+  _gaq.push(['_trackPageview']);
+
+  (function() {
+    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+  })();
+
+</SCRIPT>
+  </BODY>
+</HTML>
\ No newline at end of file

Added: mahout/site/new_website/MAHOUT/recommenderimplementation.data/taste-architecture.png
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/recommenderimplementation.data/taste-architecture.png?rev=1243022&view=auto
==============================================================================
Binary file - no diff available.

Propchange: mahout/site/new_website/MAHOUT/recommenderimplementation.data/taste-architecture.png
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: mahout/site/new_website/MAHOUT/recommenderimplementation.data/taste-architecture.png.jpeg
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/recommenderimplementation.data/taste-architecture.png.jpeg?rev=1243022&view=auto
==============================================================================
Binary file - no diff available.

Propchange: mahout/site/new_website/MAHOUT/recommenderimplementation.data/taste-architecture.png.jpeg
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream