You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2012/02/11 11:22:31 UTC

svn commit: r1243022 [14/38] - in /mahout/site/new_website: ./ MAHOUT/ MAHOUT/2010/ MAHOUT/2010/09/ MAHOUT/2010/09/14/ MAHOUT/2011/ MAHOUT/2011/10/ MAHOUT/2011/10/21/ MAHOUT/books-tutorials-and-talks.data/ MAHOUT/books-tutorials-talks.data/ MAHOUT/book...

Added: mahout/site/new_website/MAHOUT/index.html
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/index.html?rev=1243022&view=auto
==============================================================================
--- mahout/site/new_website/MAHOUT/index.html (added)
+++ mahout/site/new_website/MAHOUT/index.html Sat Feb 11 10:22:15 2012
@@ -0,0 +1,353 @@
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<HTML>
+  <HEAD>
+    <LINK type="text/css" rel="stylesheet" href="resources/space.css">
+    <STYLE type="text/css">
+      .footer {
+        background-image:      url('https://cwiki.apache.org/confluence/images/border/border_bottom.gif');
+        background-repeat:     repeat-x;
+        background-position:   left top;
+        padding-top:           4px;
+        color:                 #666;
+      }
+    </STYLE>
+    <SCRIPT type="text/javascript" language="javascript">
+      var hide = null;
+      var show = null;
+      var children = null;
+
+      function init() {
+        /* Search form initialization */
+        var form = document.forms['search'];
+        if (form != null) {
+          form.elements['domains'].value = location.hostname;
+          form.elements['sitesearch'].value = location.hostname;
+        }
+
+        /* Children initialization */
+        hide = document.getElementById('hide');
+        show = document.getElementById('show');
+        children = document.all != null ?
+                   document.all['children'] :
+                   document.getElementById('children');
+        if (children != null) {
+          children.style.display = 'none';
+          show.style.display = 'inline';
+          hide.style.display = 'none';
+        }
+      }
+
+      function showChildren() {
+        children.style.display = 'block';
+        show.style.display = 'none';
+        hide.style.display = 'inline';
+      }
+
+      function hideChildren() {
+        children.style.display = 'none';
+        show.style.display = 'inline';
+        hide.style.display = 'none';
+      }
+    </SCRIPT>
+    <TITLE>index</TITLE>
+  <META http-equiv="Content-Type" content="text/html;charset=UTF-8"></HEAD>
+  <BODY onload="init()">
+    <TABLE border="0" cellpadding="2" cellspacing="0" width="100%">
+      <TR class="topBar">
+        <TD align="left" valign="middle" class="topBarDiv" align="left" nowrap="">
+          &nbsp;<A href="" title="Apache Mahout">Apache Mahout</A>&nbsp;&gt;&nbsp;<A href="" title="index">index</A>
+        </TD>
+        <TD align="right" valign="middle" nowrap="">
+          <FORM name="search" action="http://www.google.com/search" method="get">
+            <INPUT type="hidden" name="ie" value="UTF-8">
+            <INPUT type="hidden" name="oe" value="UTF-8">
+            <INPUT type="hidden" name="domains" value="">
+            <INPUT type="hidden" name="sitesearch" value="">
+            <INPUT type="text" name="q" maxlength="255" value="">        
+            <INPUT type="submit" name="btnG" value="Google Search">
+          </FORM>
+        </TD>
+      </TR> 
+    </TABLE>
+
+    <DIV id="PageContent">
+      <DIV class="pageheader" style="padding: 6px 0px 0px 0px;">
+        <!-- We'll enable this once we figure out how to access (and save) the logo resource -->
+        <!--img src="/wiki/images/confluence_logo.gif" style="float: left; margin: 4px 4px 4px 10px;" border="0"-->
+        <DIV style="margin: 0px 10px 0px 10px" class="smalltext">Apache Mahout</DIV>
+        <DIV style="margin: 0px 10px 8px 10px" class="pagetitle">index</DIV>
+
+        <DIV class="greynavbar" align="right" style="padding: 2px 10px; margin: 0px;">
+          <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=74539">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/notep_16.gif" height="16" width="16" border="0" align="absmiddle" title="Edit Page"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=74539">Edit Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/browse_space.gif" height="16" width="16" border="0" align="absmiddle" title="Browse Space"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">Browse Space</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=74539">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_page_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add Page"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=74539">Add Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=74539">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_blogentry_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add News"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=74539">Add News</A>
+        </DIV>
+      </DIV>
+      <DIV class="pagesubheading" style="margin: 0px 10px 0px 10px;">
+        #editReport()
+      </DIV>
+
+      <DIV class="pagecontent">
+        <DIV class="wiki-content">
+          <H1><A name="index-ApacheMahoutWiki"></A>Apache Mahout Wiki</H1>
+
+<P>Apache Mahout is a new Apache TLP project to create scalable, machine learning algorithms under the Apache license. It is related to other Apache Lucene projects and integrates well with Solr.</P>
+
+<STYLE type="text/css">/*<![CDATA[*/
+div.rbtoc1277321022602 {margin-left: 1.5em;padding: 0px;}
+div.rbtoc1277321022602 ul {list-style: disc;margin-left: 0px;}
+div.rbtoc1277321022602 li {margin-left: 0px;padding-left: 0px;}
+
+/*]]>*/</STYLE><DIV class="rbtoc1277321022602">
+<UL>
+    <LI><A href="#index-General">General</A></LI>
+    <LI><A href="#index-Community">Community</A></LI>
+    <LI><A href="#index-Installation%252FSetup">Installation/Setup</A></LI>
+    <LI><A href="#index-ImplementationBackground">Implementation Background</A></LI>
+<UL>
+    <LI><A href="#index-RequirementsandDesign">Requirements and Design</A></LI>
+    <LI><A href="#index-CollectionsandAlgorithms">Collections and Algorithms</A></LI>
+    <LI><A href="#index-Utilities">Utilities</A></LI>
+    <LI><A href="#index-Data">Data</A></LI>
+    <LI><A href="#index-Benchmarks">Benchmarks</A></LI>
+</UL>
+    <LI><A href="#index-Committer%2527sResources">Committer's Resources</A></LI>
+<UL>
+    <LI><A href="#index-ProjectResources">Project Resources</A></LI>
+    <LI><A href="#index-AdditionalResources">Additional Resources</A></LI>
+</UL>
+    <LI><A href="#index-HowToEditThisWiki">How To Edit This Wiki</A></LI>
+</UL></DIV>
+
+<H2><A name="index-General"></A>General</H2>
+<P><A href="overview.html" title="Overview">Overview</A> &ndash; Mahout? What's that supposed to be?</P>
+
+<P><A href="quickstart.html" title="QuickStart">QuickStart</A> &ndash; learn how to quickly setup Apache Mahout for your project.</P>
+
+<P><A href="faq.html" title="FAQ">FAQ</A> &ndash; Frequent questions encountered on the mailing lists.</P>
+
+<P><A href="developerresources.html" title="DeveloperResources">DeveloperResources</A> &ndash; overview of the Mahout development infrastructure.</P>
+
+<P><A href="howtocontribute.html" title="HowToContribute">HowToContribute</A> &ndash; get involved with the Mahout community.</P>
+
+<P><A href="howtobecomeacommitter.html" title="HowToBecomeACommitter">HowToBecomeACommitter</A> &ndash; become a member of the Mahout development community.</P>
+
+<P><A href="http://hadoop.apache.org/" class="external-link" rel="nofollow">Hadoop</A> &ndash; several of our implementations depend on Hadoop.</P>
+
+<P><A href="http://mloss.org/software/" class="external-link" rel="nofollow">Machine Learning Open Source Software</A> &ndash; other projects implementing Open Source Machine Learning libraries.</P>
+
+<P><A href="todo.html" title="TODO">TODO</A></P>
+
+<H2><A name="index-Community"></A>Community</H2>
+
+<P><A href="whoweare.html" title="WhoWeAre">Who we are</A> &ndash; who are the developers behind Apache Mahout?</P>
+
+<P><A href="bookstutorialstalks.html" title="BooksTutorialsTalks">Books, Tutorials, Talks, Articles, News, etc. on Mahout</A></P>
+
+<P><A href="issuetracker.html" title="IssueTracker">IssueTracker</A> &ndash; see what features people are working on, submit patches and file bugs.</P>
+
+<P><A href="https://svn.apache.org/repos/asf/mahout/" class="external-link" rel="nofollow">Source Code (SVN)</A> &ndash; <A href="http://fisheye6.atlassian.com/browse/mahout" class="external-link" rel="nofollow">Fisheye</A> &ndash; download the Mahout source code from svn.</P>
+
+<P><A href="mailinglistarchives.html" title="MailingListArchives">Mailing lists</A> &ndash; links to our mailing lists and archived design and algorithm discussions, maybe your questions was answered there already?</P>
+
+<P><A href="versioncontrol.html" title="VersionControl">VersionControl</A> &ndash; where we track our code.</P>
+
+<P><A href="poweredby.html" title="PoweredBy">PoweredBy</A> &ndash; who is using Mahout in production?</P>
+
+<P><A href="gsoc.html" title="GSOC">Mahout and Google Summer of Code</A>  &ndash; All you need to know about Mahout and GSoC.</P>
+
+<P><A href="machine-learning-resources.html" title="Machine Learning Resources">Machine Learning Resources</A> &ndash; books, tutorials, talks, papers on machine learning problems.</P>
+
+<P><A href="glossary.html" title="Glossary">Glossary of commonly used terms</A></P>
+
+<H2><A name="index-Installation%2FSetup"></A>Installation/Setup</H2>
+
+<P><A href="systemrequirements.html" title="SystemRequirements">System Requirements</A> &ndash; what do you need to run Mahout?</P>
+
+<P><A href="quickstart.html" title="QuickStart">QuickStart</A> &ndash; get started with Mahout, run the examples and get pointers to further resources.</P>
+
+<P><A href="releases.html" title="Releases">Releases</A> &ndash; a list of Mahout releases.</P>
+
+<P><A href="buildingmahout.html" title="BuildingMahout">Download and installation</A> &ndash; build Mahout from the sources.</P>
+
+<P><A href="mahoutec2.html" title="MahoutEC2">Mahout on Amazon's EC2 Service</A> &ndash; run Mahout on Amazon's EC2.</P>
+
+<P><A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&title=MahoutIntegration&linkCreation=true&fromPageId=74539" class="createlink">Integrating Mahout into an Application</A> &ndash; integrate Mahout's capabilities in your application.</P>
+
+
+<H2><A name="index-ImplementationBackground"></A>Implementation Background</H2>
+
+<H3><A name="index-RequirementsandDesign"></A>Requirements and Design</H3>
+
+<P><A href="matrix-and-vector-needs.html" title="Matrix and Vector Needs">Matrix and Vector Needs</A> &ndash; requirements for Mahout vectors.</P>
+
+<P><A href="collectionde-serialization.html" title="Collection(De-)Serialization">Collection&#40;De&#45;&#41;Serialization</A></P>
+
+<H3><A name="index-CollectionsandAlgorithms"></A>Collections and Algorithms</H3>
+
+<P>Learn more about <A href="mahout-collections.html" title="mahout-collections">mahout&#45;collections</A>, containers for efficient storage of primitive-type data and open hash tables.</P>
+
+<P>Learn more about the <A href="algorithms.html" title="Algorithms">Algorithms</A> discussed and employed by Mahout.</P>
+
+<P>Learn more about the <A href="recommenderimplementation.html" title="RecommenderImplementation">Mahout recommender implementation</A>.</P>
+
+<H3><A name="index-Utilities"></A>Utilities</H3>
+
+<P>This section describes tools that might be useful for working with Mahout.</P>
+
+<P><A href="creating-vectors.html" title="Creating Vectors">Creating Vectors</A> &ndash; Mahout's algorithms operate on vectors. Learn more on how to generate these from raw data.<BR>
+<A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&title=Viewing%20Result&linkCreation=true&fromPageId=74539" class="createlink">Viewing Result</A> &ndash; How to visualize the result of your trained algorithms.</P>
+
+<H3><A name="index-Data"></A>Data</H3>
+
+<P><A href="collections.html" title="Collections">Collections</A> &ndash; To try out and test Mahout's algorithms you need training data. We are always looking for new training data collections.</P>
+
+<H3><A name="index-Benchmarks"></A>Benchmarks</H3>
+
+<P><A href="mahoutbenchmarks.html" title="MahoutBenchmarks">MahoutBenchmarks</A></P>
+
+<H2><A name="index-Committer%27sResources"></A>Committer's Resources</H2>
+
+<H3><A name="index-ProjectResources"></A>Project Resources</H3>
+
+<UL>
+	<LI><A href="thirdpartydependencies.html" title="ThirdPartyDependencies">Dealing with Third Party Dependencies not in Maven</A></LI>
+	<LI><A href="howtoupdatethewebsite.html" title="HowToUpdateTheWebsite">HowToUpdateTheWebsite</A></LI>
+	<LI><A href="patchchecklist.html" title="PatchCheckList">PatchCheckList</A></LI>
+	<LI><A href="http://cwiki.apache.org/confluence/display/MAHOUT/How%20to%20release" class="external-link" rel="nofollow">How To Release</A></LI>
+</UL>
+
+
+<H3><A name="index-AdditionalResources"></A>Additional Resources</H3>
+
+<UL>
+	<LI><A href="http://monitoring.apache.org/status/" class="external-link" rel="nofollow">Apache Machine Status</A> &#45; Check to see if SVN, other resources are available.</LI>
+	<LI><A href="http://www.apache.org/dev/committers.html" class="external-link" rel="nofollow">Committer's FAQ</A></LI>
+	<LI><A href="http://www.apache.org/dev/" class="external-link" rel="nofollow">Apache Dev</A></LI>
+</UL>
+
+
+
+<H2><A name="index-HowToEditThisWiki"></A>How To Edit This Wiki</H2>
+
+<P>How to edit this Wiki</P>
+
+<P>This Wiki is a collaborative site, anyone can contribute and share:</P>
+
+<UL>
+	<LI>Create an account by clicking the &quot;Login&quot; link at the top of any page, and picking a username and password.</LI>
+	<LI>Edit any page by pressing Edit at the top of the page</LI>
+</UL>
+
+
+<P>There are some conventions used on the Mahout wiki:</P>
+
+<UL>
+	<LI><DIV class="preformatted panel" style="border-width: 1px;"><DIV class="preformattedContent panelContent">
+<PRE>+*TODO:*+</PRE>
+</DIV></DIV>
+<P> (<INS><B>TODO:</B></INS> ) is used to denote sections that definitely need to be cleaned up.</P></LI>
+	<LI><DIV class="preformatted panel" style="border-width: 1px;"><DIV class="preformattedContent panelContent">
+<PRE>+*Mahout_(version)*+</PRE>
+</DIV></DIV>
+<P> (<INS><B>Mahout_0.2</B></INS>) is used to draw attention to which version of Mahout a feature was (or will be) added to Mahout.</P></LI>
+</UL>
+
+
+        </DIV>
+
+                  <DIV class="tabletitle">
+            Children
+            <SPAN class="smalltext" id="show" style="display: inline;">
+              <A href="javascript:showChildren()">Show Children</A></SPAN>
+            <SPAN class="smalltext" id="hide" style="display: none;">
+              <A href="javascript:hideChildren()">Hide Children</A></SPAN>
+          </DIV>
+          <DIV class="greybox" id="children" style="display: none;">
+                                      <A href="faq.html" title="FAQ">FAQ</A>
+              <SPAN class="smalltext">(Apache Mahout)</SPAN>
+              <BR>
+                          <A href="mailinglistarchives.html" title="MailingListArchives">MailingListArchives</A>
+              <SPAN class="smalltext">(Apache Mahout)</SPAN>
+              <BR>
+                          <A href="collections.html" title="Collections">Collections</A>
+              <SPAN class="smalltext">(Apache Mahout)</SPAN>
+              <BR>
+                          <A href="poweredby.html" title="PoweredBy">PoweredBy</A>
+              <SPAN class="smalltext">(Apache Mahout)</SPAN>
+              <BR>
+                          <A href="issuetracker.html" title="IssueTracker">IssueTracker</A>
+              <SPAN class="smalltext">(Apache Mahout)</SPAN>
+              <BR>
+                          <A href="bookstutorialstalks.html" title="BooksTutorialsTalks">BooksTutorialsTalks</A>
+              <SPAN class="smalltext">(Apache Mahout)</SPAN>
+              <BR>
+                          <A href="what-when-where-why-but-not-how-or-who.html" title="What, When, Where, Why (but not How or Who)">What, When, Where, Why (but not How or Who)</A>
+              <SPAN class="smalltext">(Apache Mahout)</SPAN>
+              <BR>
+                          <A href="quickstart.html" title="QuickStart">QuickStart</A>
+              <SPAN class="smalltext">(Apache Mahout)</SPAN>
+              <BR>
+                          <A href="mahoutec2.html" title="MahoutEC2">MahoutEC2</A>
+              <SPAN class="smalltext">(Apache Mahout)</SPAN>
+              <BR>
+                          <A href="reference-reading.html" title="Reference Reading">Reference Reading</A>
+              <SPAN class="smalltext">(Apache Mahout)</SPAN>
+              <BR>
+                          <A href="algorithms.html" title="Algorithms">Algorithms</A>
+              <SPAN class="smalltext">(Apache Mahout)</SPAN>
+              <BR>
+                          <A href="creating-vectors.html" title="Creating Vectors">Creating Vectors</A>
+              <SPAN class="smalltext">(Apache Mahout)</SPAN>
+              <BR>
+                          <A href="glossary.html" title="Glossary">Glossary</A>
+              <SPAN class="smalltext">(Apache Mahout)</SPAN>
+              <BR>
+                          <A href="gsoc.html" title="GSOC">GSOC</A>
+              <SPAN class="smalltext">(Apache Mahout)</SPAN>
+              <BR>
+                          <A href="mahoutbenchmarks.html" title="MahoutBenchmarks">MahoutBenchmarks</A>
+              <SPAN class="smalltext">(Apache Mahout)</SPAN>
+              <BR>
+                          <A href="whoweare.html" title="WhoWeAre">WhoWeAre</A>
+              <SPAN class="smalltext">(Apache Mahout)</SPAN>
+              <BR>
+                          <A href="overview.html" title="Overview">Overview</A>
+              <SPAN class="smalltext">(Apache Mahout)</SPAN>
+              <BR>
+                          <A href="systemrequirements.html" title="SystemRequirements">SystemRequirements</A>
+              <SPAN class="smalltext">(Apache Mahout)</SPAN>
+              <BR>
+                          <A href="releases.html" title="Releases">Releases</A>
+              <SPAN class="smalltext">(Apache Mahout)</SPAN>
+              <BR>
+                          <A href="developerresources.html" title="DeveloperResources">DeveloperResources</A>
+              <SPAN class="smalltext">(Apache Mahout)</SPAN>
+              <BR>
+                          <A href="versioncontrol.html" title="VersionControl">VersionControl</A>
+              <SPAN class="smalltext">(Apache Mahout)</SPAN>
+              <BR>
+                      </DIV>
+        
+      </DIV>
+    </DIV>
+    <DIV class="footer">
+      Generated by
+      <A href="http://www.atlassian.com/confluence/">Atlassian Confluence</A> (Version: 3.2 Build: 1810 Mar 16, 2010)
+      <A href="http://could.it/autoexport/">Auto Export Plugin</A> (Version: 1.0.0-dkulp)
+    </DIV>
+  </BODY>
+</HTML>
\ No newline at end of file

Added: mahout/site/new_website/MAHOUT/issue-tracker.html
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/issue-tracker.html?rev=1243022&view=auto
==============================================================================
--- mahout/site/new_website/MAHOUT/issue-tracker.html (added)
+++ mahout/site/new_website/MAHOUT/issue-tracker.html Sat Feb 11 10:22:15 2012
@@ -0,0 +1,171 @@
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<HTML>
+  <HEAD>
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/space.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/master.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/wiki-content.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/abs.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/menu.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/menu-ie.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/tables.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/panels.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/master-ie.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/renderer-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/content-types.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/login.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/information-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/layout-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/default-theme.css">
+    <LINK type="text/css" rel="stylesheet" href="resources/space.css">
+    <STYLE type="text/css">
+      .footer {
+        background-image:      url('https://cwiki.apache.org/confluence/images/border/border_bottom.gif');
+        background-repeat:     repeat-x;
+        background-position:   left top;
+        padding-top:           4px;
+        color:                 #666;
+      }
+    </STYLE>
+    <SCRIPT type="text/javascript" language="javascript">
+      var hide = null;
+      var show = null;
+      var children = null;
+
+      function init() {
+        /* Search form initialization */
+        var form = document.forms['search'];
+        if (form != null) {
+          form.elements['domains'].value = location.hostname;
+          form.elements['sitesearch'].value = location.hostname;
+        }
+
+        /* Children initialization */
+        hide = document.getElementById('hide');
+        show = document.getElementById('show');
+        children = document.all != null ?
+                   document.all['children'] :
+                   document.getElementById('children');
+        if (children != null) {
+          children.style.display = 'none';
+          show.style.display = 'inline';
+          hide.style.display = 'none';
+        }
+      }
+
+      function showChildren() {
+        children.style.display = 'block';
+        show.style.display = 'none';
+        hide.style.display = 'inline';
+      }
+
+      function hideChildren() {
+        children.style.display = 'none';
+        show.style.display = 'inline';
+        hide.style.display = 'none';
+      }
+    </SCRIPT>
+    <TITLE>Issue Tracker</TITLE>
+  <META http-equiv="Content-Type" content="text/html;charset=UTF-8"></HEAD>
+  <BODY onload="init()">
+    <TABLE border="0" cellpadding="2" cellspacing="0" width="100%">
+      <TR class="topBar">
+        <TD align="left" valign="middle" class="topBarDiv" align="left" nowrap="">
+          &nbsp;<A href="mahout-wiki.html" title="Apache Mahout">Apache Mahout</A>&nbsp;&gt;&nbsp;<A href="mahout-wiki.html" title="Mahout Wiki">Mahout Wiki</A>&nbsp;&gt;&nbsp;<A href="" title="Issue Tracker">Issue Tracker</A>
+        </TD>
+        <TD align="right" valign="middle" nowrap="">
+          <FORM name="search" action="http://www.google.com/search" method="get">
+            <INPUT type="hidden" name="ie" value="UTF-8">
+            <INPUT type="hidden" name="oe" value="UTF-8">
+            <INPUT type="hidden" name="domains" value="">
+            <INPUT type="hidden" name="sitesearch" value="">
+            <INPUT type="text" name="q" maxlength="255" value="">        
+            <INPUT type="submit" name="btnG" value="Google Search">
+          </FORM>
+        </TD>
+      </TR> 
+    </TABLE>
+
+    <DIV id="PageContent">
+      <DIV class="pageheader" style="padding: 6px 0px 0px 0px;">
+        <!-- We'll enable this once we figure out how to access (and save) the logo resource -->
+        <!--img src="/wiki/images/confluence_logo.gif" style="float: left; margin: 4px 4px 4px 10px;" border="0"-->
+        <DIV style="margin: 0px 10px 0px 10px" class="smalltext">Apache Mahout</DIV>
+        <DIV style="margin: 0px 10px 8px 10px" class="pagetitle">Issue Tracker</DIV>
+
+        <DIV class="greynavbar" align="right" style="padding: 2px 10px; margin: 0px;">
+          <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=82377">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/notep_16.gif" height="16" width="16" border="0" align="absmiddle" title="Edit Page"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=82377">Edit Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/browse_space.gif" height="16" width="16" border="0" align="absmiddle" title="Browse Space"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">Browse Space</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=82377">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_page_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add Page"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=82377">Add Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=82377">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_blogentry_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add News"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=82377">Add News</A>
+        </DIV>
+      </DIV>
+
+      <DIV class="pagecontent">
+        <DIV class="wiki-content">
+          <P>Anything that is to be added to <A href="http://svn.apache.org/viewvc/mahout/" class="external-link" rel="nofollow">the source code repository</A> is first presented as a patch in the issue tracker. All conversations are echoed on the developer mailing list and people tend to respond or continue conversations there rather in the issue tracker, so in order to follow an issue you might also have to read to the mailing lists. </P>
+
+<P>An issue does not literally have to be an issue. It could be a wish, task, bug report, etc. and it does not have to contain a patch.</P>
+
+<P>Mahout use <A href="http://issues.apache.org/jira/browse/MAHOUT" class="external-link" rel="nofollow">JIRA</A>. It is made by the same company that made this Wiki and a handful of other collaborate software development tools that supposedly can communicate with each other and be trigged by actions such as commits to SVN with issue identity in the comments.</P>
+
+<H3><A name="IssueTracker-Bestpractise"></A>Best practise</H3>
+
+<P>Don't create duplicate issues. Make sure your problem is a problem and that nobody else already fixed it. If you are new to the project it is often prefered that the subject of an issue is discussed on one of our mailing list before an issue is created.</P>
+
+<P>Quote what it is you are responding to in comments.</P>
+
+<P>Patches should be created at trunk or trunk parent level and if possible be a single uncompressed text file so it is easy to inspect the patch in a web browser. (See <A href="patch-check-list.html" title="Patch Check List">Patch Check List</A>.)</P>
+
+<P>Use the issue identity when referring to an issue in any of our fora. &quot;MAHOUT-n&quot; and not &quot;mahout-n&quot; or &quot;n&quot;. MAHOUT-1 would automatically be linked to <A href="http://issues.apache.org/jira/browse/MAHOUT-1" class="external-link" rel="nofollow">MAHOUT-1</A> in a better world.</P>
+
+        </DIV>
+
+                  <DIV class="tabletitle">
+            Children
+            <SPAN class="smalltext" id="show" style="display: inline;">
+              <A href="javascript:showChildren()">Show Children</A></SPAN>
+            <SPAN class="smalltext" id="hide" style="display: none;">
+              <A href="javascript:hideChildren()">Hide Children</A></SPAN>
+          </DIV>
+          <DIV class="greybox" id="children" style="display: none;">
+                                      <A href="patch-check-list.html" title="Patch Check List">Patch Check List</A>
+              <SPAN class="smalltext">(Apache Mahout)</SPAN>
+              <BR>
+                      </DIV>
+        
+      </DIV>
+    </DIV>
+    <DIV class="footer">
+      Generated by
+      <A href="http://www.atlassian.com/confluence/">Atlassian Confluence</A> (Version: 3.4.6 Build: 2036 Dec 21, 2010)
+      <A href="http://could.it/autoexport/">Auto Export Plugin</A> (Version: 1.0.0-dkulp)
+    </DIV>
+<SCRIPT type="text/javascript">
+
+  var _gaq = _gaq || [];
+  _gaq.push(['_setAccount', 'UA-17359171-1']);
+  _gaq.push(['_setDomainName', 'none']);
+  _gaq.push(['_setAllowLinker', true]);
+  _gaq.push(['_trackPageview']);
+
+  (function() {
+    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+  })();
+
+</SCRIPT>
+  </BODY>
+</HTML>
\ No newline at end of file

Added: mahout/site/new_website/MAHOUT/issuetracker.html
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/issuetracker.html?rev=1243022&view=auto
==============================================================================
--- mahout/site/new_website/MAHOUT/issuetracker.html (added)
+++ mahout/site/new_website/MAHOUT/issuetracker.html Sat Feb 11 10:22:15 2012
@@ -0,0 +1,134 @@
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<HTML>
+  <HEAD>
+    <LINK type="text/css" rel="stylesheet" href="resources/space.css">
+    <STYLE type="text/css">
+      .footer {
+        background-image:      url('http://cwiki.apache.org/confluence/images/border/border_bottom.gif');
+        background-repeat:     repeat-x;
+        background-position:   left top;
+        padding-top:           4px;
+        color:                 #666;
+      }
+    </STYLE>
+    <SCRIPT type="text/javascript" language="javascript">
+      var hide = null;
+      var show = null;
+      var children = null;
+
+      function init() {
+        /* Search form initialization */
+        var form = document.forms['search'];
+        if (form != null) {
+          form.elements['domains'].value = location.hostname;
+          form.elements['sitesearch'].value = location.hostname;
+        }
+
+        /* Children initialization */
+        hide = document.getElementById('hide');
+        show = document.getElementById('show');
+        children = document.all != null ?
+                   document.all['children'] :
+                   document.getElementById('children');
+        if (children != null) {
+          children.style.display = 'none';
+          show.style.display = 'inline';
+          hide.style.display = 'none';
+        }
+      }
+
+      function showChildren() {
+        children.style.display = 'block';
+        show.style.display = 'none';
+        hide.style.display = 'inline';
+      }
+
+      function hideChildren() {
+        children.style.display = 'none';
+        show.style.display = 'inline';
+        hide.style.display = 'none';
+      }
+    </SCRIPT>
+    <TITLE>IssueTracker</TITLE>
+  <META http-equiv="Content-Type" content="text/html;charset=UTF-8"></HEAD>
+  <BODY onload="init()">
+    <TABLE border="0" cellpadding="2" cellspacing="0" width="100%">
+      <TR class="topBar">
+        <TD align="left" valign="middle" class="topBarDiv" align="left" nowrap="">
+          &nbsp;<A href="index.html" title="Apache Lucene Mahout">Apache Lucene Mahout</A>&nbsp;&gt;&nbsp;<A href="index.html" title="index">index</A>&nbsp;&gt;&nbsp;<A href="" title="IssueTracker">IssueTracker</A>
+        </TD>
+        <TD align="right" valign="middle" nowrap="">
+          <FORM name="search" action="http://www.google.com/search" method="get">
+            <INPUT type="hidden" name="ie" value="UTF-8">
+            <INPUT type="hidden" name="oe" value="UTF-8">
+            <INPUT type="hidden" name="domains" value="">
+            <INPUT type="hidden" name="sitesearch" value="">
+            <INPUT type="text" name="q" maxlength="255" value="">        
+            <INPUT type="submit" name="btnG" value="Google Search">
+          </FORM>
+        </TD>
+      </TR> 
+    </TABLE>
+
+    <DIV id="PageContent">
+      <DIV class="pageheader" style="padding: 6px 0px 0px 0px;">
+        <!-- We'll enable this once we figure out how to access (and save) the logo resource -->
+        <!--img src="/wiki/images/confluence_logo.gif" style="float: left; margin: 4px 4px 4px 10px;" border="0"-->
+        <DIV style="margin: 0px 10px 0px 10px" class="smalltext">Apache Lucene Mahout</DIV>
+        <DIV style="margin: 0px 10px 8px 10px" class="pagetitle">IssueTracker</DIV>
+
+        <DIV class="greynavbar" align="right" style="padding: 2px 10px; margin: 0px;">
+          <A href="http://cwiki.apache.org/confluence/pages/editpage.action?pageId=82377">
+            <IMG src="http://cwiki.apache.org/confluence/images/icons/notep_16.gif" height="16" width="16" border="0" align="absmiddle" title="Edit Page"></A>
+            <A href="http://cwiki.apache.org/confluence/pages/editpage.action?pageId=82377">Edit Page</A>
+          &nbsp;
+          <A href="http://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">
+            <IMG src="http://cwiki.apache.org/confluence/images/icons/browse_space.gif" height="16" width="16" border="0" align="absmiddle" title="Browse Space"></A>
+            <A href="http://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">Browse Space</A>
+          &nbsp;
+          <A href="http://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=82377">
+            <IMG src="http://cwiki.apache.org/confluence/images/icons/add_page_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add Page"></A>
+          <A href="http://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=82377">Add Page</A>
+          &nbsp;
+          <A href="http://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=82377">
+            <IMG src="http://cwiki.apache.org/confluence/images/icons/add_blogentry_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add News"></A>
+          <A href="http://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=82377">Add News</A>
+        </DIV>
+      </DIV>
+      <DIV class="pagesubheading" style="margin: 0px 10px 0px 10px;">
+                    Added by <A href="http://cwiki.apache.org/confluence/users/viewuserprofile.action?username=kalle">Karl Wettin</A>, last edited by <A href="http://cwiki.apache.org/confluence/users/viewuserprofile.action?username=kalle">Karl Wettin</A> on Apr 11, 2008
+                      &nbsp;(<A class="noprint" href="http://cwiki.apache.org/confluence/pages/diffpages.action?pageId=82377&originalId=82400">view change</A>)
+              
+      </DIV>
+
+      <DIV class="pagecontent">
+        <DIV class="wiki-content">
+          <P>Anything that is to be added to <A href="http://svn.apache.org/viewvc/lucene/mahout/" rel="nofollow">the source code repository</A> is first presented as a patch in the issue tracker. All conversations are echoed on the developer mailing list and people tend to respond or continue conversations there rather in the issue tracker, so in order to follow an issue you might also have to read to the mailing lists. </P>
+
+<P>An issue does not literally have to be an issue. It could be a wish, task, bug report, et c. and it does not have to contain a patch.</P>
+
+<P>Mahout use JIRA. It is made by the same company that made this Wiki and a handful of other collaborate software development tools that supposedly can communicate with each other and be trigged by actions such as commits to SVN with issue identity in the comments.</P>
+
+<H3><A name="IssueTracker-Bestpractise"></A>Best practise</H3>
+
+<P>Don't create duplicate issues. Make sure your problem is a problem and that nobody else already fixed it. If you are new to the project it is often prefered that the subject of an issue is discussed on one of our mailing list before an issue is created.</P>
+
+<P>Quote what it is you are responding to in comments.</P>
+
+<P>Patches should be created at trunk or trunk parent level and if possible be a single uncompressed text file so it is easy to inspect the patch in a web browser. (See <A href="patchchecklist.html" title="PatchCheckList">PatchCheckList</A>.)</P>
+
+<P>Use the issue identity when referring to an issue in any of our fora. &quot;MAHOUT-n&quot; and not &quot;mahout-n&quot; or &quot;n&quot;. MAHOUT-1 would automatically be linked to <A href="http://issues.apache.org/jira/browse/MAHOUT-1" rel="nofollow">MAHOUT-1</A> in a better world.</P>
+
+        </DIV>
+
+        
+      </DIV>
+    </DIV>
+    <DIV class="footer">
+      Generated by
+      <A href="http://www.atlassian.com/confluence/">Atlassian Confluence</A> (Version: 2.10.4 Build: 1520 Jul 24, 2009)
+      <A href="http://could.it/autoexport/">Auto Export Plugin</A> (Version: 1.0.0.beta1)
+    </DIV>
+  </BODY>
+</HTML>

Added: mahout/site/new_website/MAHOUT/itembased-collaborative-filtering.html
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/itembased-collaborative-filtering.html?rev=1243022&view=auto
==============================================================================
--- mahout/site/new_website/MAHOUT/itembased-collaborative-filtering.html (added)
+++ mahout/site/new_website/MAHOUT/itembased-collaborative-filtering.html Sat Feb 11 10:22:15 2012
@@ -0,0 +1,279 @@
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<HTML>
+  <HEAD>
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/space.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/master.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/wiki-content.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/abs.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/menu.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/menu-ie.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/tables.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/panels.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/master-ie.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/renderer-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/content-types.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/login.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/information-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/layout-macros.css">
+<LINK type="text/css" rel="stylesheet" href="https://cwiki.apache.org/confluence/display/MAHOUT/$stylebase/default-theme.css">
+    <LINK type="text/css" rel="stylesheet" href="resources/space.css">
+    <STYLE type="text/css">
+      .footer {
+        background-image:      url('https://cwiki.apache.org/confluence/images/border/border_bottom.gif');
+        background-repeat:     repeat-x;
+        background-position:   left top;
+        padding-top:           4px;
+        color:                 #666;
+      }
+    </STYLE>
+    <SCRIPT type="text/javascript" language="javascript">
+      var hide = null;
+      var show = null;
+      var children = null;
+
+      function init() {
+        /* Search form initialization */
+        var form = document.forms['search'];
+        if (form != null) {
+          form.elements['domains'].value = location.hostname;
+          form.elements['sitesearch'].value = location.hostname;
+        }
+
+        /* Children initialization */
+        hide = document.getElementById('hide');
+        show = document.getElementById('show');
+        children = document.all != null ?
+                   document.all['children'] :
+                   document.getElementById('children');
+        if (children != null) {
+          children.style.display = 'none';
+          show.style.display = 'inline';
+          hide.style.display = 'none';
+        }
+      }
+
+      function showChildren() {
+        children.style.display = 'block';
+        show.style.display = 'none';
+        hide.style.display = 'inline';
+      }
+
+      function hideChildren() {
+        children.style.display = 'none';
+        show.style.display = 'inline';
+        hide.style.display = 'none';
+      }
+    </SCRIPT>
+    <TITLE>Itembased Collaborative Filtering</TITLE>
+  <META http-equiv="Content-Type" content="text/html;charset=UTF-8"></HEAD>
+  <BODY onload="init()">
+    <TABLE border="0" cellpadding="2" cellspacing="0" width="100%">
+      <TR class="topBar">
+        <TD align="left" valign="middle" class="topBarDiv" align="left" nowrap="">
+          &nbsp;<A href="mahout-wiki.html" title="Apache Mahout">Apache Mahout</A>&nbsp;&gt;&nbsp;<A href="mahout-wiki.html" title="Mahout Wiki">Mahout Wiki</A>&nbsp;&gt;&nbsp;<A href="algorithms.html" title="Algorithms">Algorithms</A>&nbsp;&gt;&nbsp;<A href="" title="Itembased Collaborative Filtering">Itembased Collaborative Filtering</A>
+        </TD>
+        <TD align="right" valign="middle" nowrap="">
+          <FORM name="search" action="http://www.google.com/search" method="get">
+            <INPUT type="hidden" name="ie" value="UTF-8">
+            <INPUT type="hidden" name="oe" value="UTF-8">
+            <INPUT type="hidden" name="domains" value="">
+            <INPUT type="hidden" name="sitesearch" value="">
+            <INPUT type="text" name="q" maxlength="255" value="">        
+            <INPUT type="submit" name="btnG" value="Google Search">
+          </FORM>
+        </TD>
+      </TR> 
+    </TABLE>
+
+    <DIV id="PageContent">
+      <DIV class="pageheader" style="padding: 6px 0px 0px 0px;">
+        <!-- We'll enable this once we figure out how to access (and save) the logo resource -->
+        <!--img src="/wiki/images/confluence_logo.gif" style="float: left; margin: 4px 4px 4px 10px;" border="0"-->
+        <DIV style="margin: 0px 10px 0px 10px" class="smalltext">Apache Mahout</DIV>
+        <DIV style="margin: 0px 10px 8px 10px" class="pagetitle">Itembased Collaborative Filtering</DIV>
+
+        <DIV class="greynavbar" align="right" style="padding: 2px 10px; margin: 0px;">
+          <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=24183054">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/notep_16.gif" height="16" width="16" border="0" align="absmiddle" title="Edit Page"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/editpage.action?pageId=24183054">Edit Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/browse_space.gif" height="16" width="16" border="0" align="absmiddle" title="Browse Space"></A>
+            <A href="https://cwiki.apache.org/confluence/pages/listpages.action?key=MAHOUT">Browse Space</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=24183054">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_page_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add Page"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createpage.action?spaceKey=MAHOUT&fromPageId=24183054">Add Page</A>
+          &nbsp;
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=24183054">
+            <IMG src="https://cwiki.apache.org/confluence/images/icons/add_blogentry_16.gif" height="16" width="16" border="0" align="absmiddle" title="Add News"></A>
+          <A href="https://cwiki.apache.org/confluence/pages/createblogpost.action?spaceKey=MAHOUT&fromPageId=24183054">Add News</A>
+        </DIV>
+      </DIV>
+
+      <DIV class="pagecontent">
+        <DIV class="wiki-content">
+          <P>Itembased Collaborative Filtering is a popular way of doing Recommendation Mining.</P>
+
+<H3><A name="ItembasedCollaborativeFiltering-Terminology"></A>Terminology</H3>
+
+<P>We have <B>users</B> that interact with <B>items</B> (which can be pretty much anything like books, videos, news, other users,...). Those users express <B>preferences</B> towards the items which can either be boolean (just modelling that a user likes an item) or numeric (by having a rating value assigned to the preference). Typically only a small number of preferences is known for each single user.</P>
+
+<H3><A name="ItembasedCollaborativeFiltering-Algorithmicproblems"></A>Algorithmic problems</H3>
+
+<P>Collaborative Filtering algorithms aim to solve the <B>prediction</B> problem where the task is to estimate the preference of a user towards an item which he/she has not yet seen.</P>
+
+<P>Once an algorithm can predict preferences it can also be used to do <B>Top-N-Recommendation</B> where the task is to find the N items a given user might like best. This is usually done by isolating a set of candidate items, computing the predicted preferences of the given user towards them and returning the highest scoring ones.</P>
+
+<P>If we look at the problem from a mathematical perspective, a <B>user-item-matrix</B> is created from the preference data and the task is to predict the missing entries by finding patterns in the known entries.</P>
+
+<H3><A name="ItembasedCollaborativeFiltering-ItembasedCollaborativeFiltering"></A>Itembased Collaborative Filtering</H3>
+
+<P>A popular approach called &quot;Itembased Collaborative Filtering&quot; estimates a user's preference towards an item by looking at his/her preferences towards similar items, be aware that similarity must be thought of as similarity of rating behaviour not similarity of content in this context.</P>
+
+<P>The standard procedure is to pairwisely compare the columns of the user-item-matrix (the item-vectors) using a similarity measure like pearson-correlation, cosine or loglikelihood to obtain similar items and use those together with the user's ratings to predict his/her preference towards unknown items.</P>
+
+
+<H3><A name="ItembasedCollaborativeFiltering-Map%2FReduceimplementations"></A>Map/Reduce implementations</H3>
+
+<P>Mahout offers two Map/Reduce jobs aimed to support Itembased Collaborative Filtering.</P>
+
+<P><B>org.apache.mahout.cf.taste.hadoop.similarity.item.ItemSimilarityJob</B> computes all similar items. It expects a .csv file with the preference data as input, where each line represents a single preference in the form <EM>userID,itemID,value</EM> and outputs pairs of itemIDs with their associated similarity value.</P>
+
+<P><EM>job specific options</EM></P>
+
+<DIV class="table-wrap">
+<TABLE class="confluenceTable"><TBODY>
+<TR>
+<TD class="confluenceTd">input</TD>
+<TD class="confluenceTd">path to input directory</TD>
+</TR>
+<TR>
+<TD class="confluenceTd">input</TD>
+<TD class="confluenceTd">path to output directory</TD>
+</TR>
+<TR>
+<TD class="confluenceTd">similarityClassname</TD>
+<TD class="confluenceTd">Name of distributed similarity class to instantiate,                                                           alternatively use one of the predefined similarities                                                            (SIMILARITY_COOCCURRENCE, SIMILARITY_EUCLIDEAN_DISTANCE,                                                            SIMILARITY_LOGLIKELIHOOD, SIMILARITY_PEARSON_CORRELATION,                                                       SIMILARITY_TANIMOTO_COEFFICIENT, SIMILARITY_UNCENTERED_COSINE,                                                            SIMILARITY_UNCENTERED_ZERO_ASSUMING_COSINE)</TD>
+</TR>
+<TR>
+<TD class="confluenceTd">maxSimilaritiesPerItem</TD>
+<TD class="confluenceTd">try to cap the number of similar items per item to this number</TD>
+</TR>
+<TR>
+<TD class="confluenceTd">maxPrefsPerUser</TD>
+<TD class="confluenceTd">max number of preferences to consider per user, users with more preferences will be sampled down</TD>
+</TR>
+<TR>
+<TD class="confluenceTd">minPrefsPerUser</TD>
+<TD class="confluenceTd">ignore users with less preferences than this</TD>
+</TR>
+<TR>
+<TD class="confluenceTd">booleanData</TD>
+<TD class="confluenceTd">treat input as having no preference values</TD>
+</TR>
+<TR>
+<TD class="confluenceTd">threshold</TD>
+<TD class="confluenceTd">discard item pairs with a similarity value below this</TD>
+</TR>
+</TBODY></TABLE>
+</DIV>
+
+
+<P><B>org.apache.mahout.cf.taste.hadoop.item.RecommenderJob</B> is a completely distributed itembased recommender. It expects a .csv file with the preference data as input, where each line represents a single preference in the form <EM>userID,itemID,value</EM> and outputs userIDs with associated recommended itemIDs and their scores.</P>
+
+<P><EM>job specific options</EM></P>
+
+<DIV class="table-wrap">
+<TABLE class="confluenceTable"><TBODY>
+<TR>
+<TD class="confluenceTd">input</TD>
+<TD class="confluenceTd">path to input directory</TD>
+</TR>
+<TR>
+<TD class="confluenceTd">input</TD>
+<TD class="confluenceTd">path to output directory</TD>
+</TR>
+<TR>
+<TD class="confluenceTd">numRecommendations</TD>
+<TD class="confluenceTd">number of recommendations per user</TD>
+</TR>
+<TR>
+<TD class="confluenceTd">usersFile</TD>
+<TD class="confluenceTd">file of users to recommend for</TD>
+</TR>
+<TR>
+<TD class="confluenceTd">itemsFile</TD>
+<TD class="confluenceTd">file of items to recommend for</TD>
+</TR>
+<TR>
+<TD class="confluenceTd">filterFile</TD>
+<TD class="confluenceTd">file containing comma-separated userID,itemID pairs. Used to exclude the item from the recommendations for that user (optional)</TD>
+</TR>
+<TR>
+<TD class="confluenceTd">maxPrefsPerUser</TD>
+<TD class="confluenceTd">maximum number of preferences considered per user in final recommendation phase</TD>
+</TR>
+<TR>
+<TD class="confluenceTd">similarityClassname</TD>
+<TD class="confluenceTd">Name of distributed similarity class to instantiate,                                                           alternatively use one of the predefined similarities                                                            (SIMILARITY_COOCCURRENCE, SIMILARITY_EUCLIDEAN_DISTANCE,                                                            SIMILARITY_LOGLIKELIHOOD, SIMILARITY_PEARSON_CORRELATION,                                                       SIMILARITY_TANIMOTO_COEFFICIENT, SIMILARITY_UNCENTERED_COSINE,                                                            SIMILARITY_UNCENTERED_ZERO_ASSUMING_COSINE)</TD>
+</TR>
+<TR>
+<TD class="confluenceTd">maxSimilaritiesPerItem</TD>
+<TD class="confluenceTd">try to cap the number of similar items per item to this number</TD>
+</TR>
+<TR>
+<TD class="confluenceTd">maxPrefsPerUserInItemSimilarity</TD>
+<TD class="confluenceTd">max number of preferences to consider per user, users with more preferences will be sampled down</TD>
+</TR>
+<TR>
+<TD class="confluenceTd">minPrefsPerUser</TD>
+<TD class="confluenceTd">ignore users with less preferences than this</TD>
+</TR>
+<TR>
+<TD class="confluenceTd">booleanData</TD>
+<TD class="confluenceTd">treat input as having no preference values</TD>
+</TR>
+<TR>
+<TD class="confluenceTd">threshold</TD>
+<TD class="confluenceTd">discard item pairs with a similarity value below this</TD>
+</TR>
+</TBODY></TABLE>
+</DIV>
+
+
+<H3><A name="ItembasedCollaborativeFiltering-Resources"></A>Resources</H3>
+
+<UL>
+	<LI><A href="http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.144.9927&rep=rep1&type=pdf" class="external-link" rel="nofollow">Sarwar et al.:Item-Based Collaborative Filtering Recommendation Algorithms </A></LI>
+	<LI><A href="http://www.slideshare.net/sscdotopen/mahoutcf" class="external-link" rel="nofollow">Slides: Distributed Itembased Collaborative Filtering with Apache Mahout</A></LI>
+</UL>
+
+        </DIV>
+
+        
+      </DIV>
+    </DIV>
+    <DIV class="footer">
+      Generated by
+      <A href="http://www.atlassian.com/confluence/">Atlassian Confluence</A> (Version: 3.4.9 Build: 2042 Feb 14, 2011)
+      <A href="http://could.it/autoexport/">Auto Export Plugin</A> (Version: 1.0.0-dkulp)
+    </DIV>
+<SCRIPT type="text/javascript">
+
+  var _gaq = _gaq || [];
+  _gaq.push(['_setAccount', 'UA-17359171-1']);
+  _gaq.push(['_setDomainName', 'none']);
+  _gaq.push(['_setAllowLinker', true]);
+  _gaq.push(['_trackPageview']);
+
+  (function() {
+    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+  })();
+
+</SCRIPT>
+  </BODY>
+</HTML>
\ No newline at end of file

Added: mahout/site/new_website/MAHOUT/k-means-clustering.data/2dKMeans.png
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/k-means-clustering.data/2dKMeans.png?rev=1243022&view=auto
==============================================================================
Binary file - no diff available.

Propchange: mahout/site/new_website/MAHOUT/k-means-clustering.data/2dKMeans.png
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: mahout/site/new_website/MAHOUT/k-means-clustering.data/2dKMeans.png.jpeg
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/k-means-clustering.data/2dKMeans.png.jpeg?rev=1243022&view=auto
==============================================================================
Binary file - no diff available.

Propchange: mahout/site/new_website/MAHOUT/k-means-clustering.data/2dKMeans.png.jpeg
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: mahout/site/new_website/MAHOUT/k-means-clustering.data/Canopy.png
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/k-means-clustering.data/Canopy.png?rev=1243022&view=auto
==============================================================================
Binary file - no diff available.

Propchange: mahout/site/new_website/MAHOUT/k-means-clustering.data/Canopy.png
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: mahout/site/new_website/MAHOUT/k-means-clustering.data/Canopy.png.jpeg
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/k-means-clustering.data/Canopy.png.jpeg?rev=1243022&view=auto
==============================================================================
Binary file - no diff available.

Propchange: mahout/site/new_website/MAHOUT/k-means-clustering.data/Canopy.png.jpeg
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: mahout/site/new_website/MAHOUT/k-means-clustering.data/Canopy10.png
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/k-means-clustering.data/Canopy10.png?rev=1243022&view=auto
==============================================================================
Binary file - no diff available.

Propchange: mahout/site/new_website/MAHOUT/k-means-clustering.data/Canopy10.png
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: mahout/site/new_website/MAHOUT/k-means-clustering.data/Canopy10.png.jpeg
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/k-means-clustering.data/Canopy10.png.jpeg?rev=1243022&view=auto
==============================================================================
Binary file - no diff available.

Propchange: mahout/site/new_website/MAHOUT/k-means-clustering.data/Canopy10.png.jpeg
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: mahout/site/new_website/MAHOUT/k-means-clustering.data/Example implementation of k-Means provided with Mahout
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/k-means-clustering.data/Example%20implementation%20of%20k-Means%20provided%20with%20Mahout?rev=1243022&view=auto
==============================================================================
--- mahout/site/new_website/MAHOUT/k-means-clustering.data/Example implementation of k-Means provided with Mahout (added)
+++ mahout/site/new_website/MAHOUT/k-means-clustering.data/Example implementation of k-Means provided with Mahout Sat Feb 11 10:22:15 2012
@@ -0,0 +1 @@
+<stage keygen_seq="5" version="1.0"><pageObj drawingWidth="822" istt="false" print_paper="LETTER" print_scale="0" drawingHeight="816" guides="0" gr="1" pb="0" border="1" print_layout="0" width="5000" fill="16777215" height="5000" print_grid="0"><styles><shapeStyle lineWidth="-1" dropShadowOn="true" lineColor="global:0x333333" fillColor="global:0xd1d1d1" gradientOn="true"/><lineStyle end="0" pattern="0" borderLine="false" width="1" connType="right" roundCorners="true" begin="0" color="0x000000"/><textStyle color="0" face="Arial" style="" size="12"/></styles><objects><object shp_id="2" linec="0" class="GliffyImageShape" rot="0" lock="false" fixed-aspect="true" text-horizontal-pos="center" dshad="false" ceoid="75159" x="466" libraryid="com.gliffy.images" linew="1" y="413" width="592.525" fill="16777215" height="685" order="0" text-vertical-pos="middle" gradon="false" filename="k-Means in Mahout.jpg"><text/><connlines/></object></objects></pageObj></stage>
\ No newline at end of file

Added: mahout/site/new_website/MAHOUT/k-means-clustering.data/Example implementation of k-Means provided with Mahout.png
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/k-means-clustering.data/Example%20implementation%20of%20k-Means%20provided%20with%20Mahout.png?rev=1243022&view=auto
==============================================================================
Binary file - no diff available.

Propchange: mahout/site/new_website/MAHOUT/k-means-clustering.data/Example implementation of k-Means provided with Mahout.png
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: mahout/site/new_website/MAHOUT/k-means-clustering.data/Example implementation of k-Means provided with Mahout.png.jpeg
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/k-means-clustering.data/Example%20implementation%20of%20k-Means%20provided%20with%20Mahout.png.jpeg?rev=1243022&view=auto
==============================================================================
Binary file - no diff available.

Propchange: mahout/site/new_website/MAHOUT/k-means-clustering.data/Example implementation of k-Means provided with Mahout.png.jpeg
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: mahout/site/new_website/MAHOUT/k-means-clustering.data/KMeans.png
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/k-means-clustering.data/KMeans.png?rev=1243022&view=auto
==============================================================================
Binary file - no diff available.

Propchange: mahout/site/new_website/MAHOUT/k-means-clustering.data/KMeans.png
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: mahout/site/new_website/MAHOUT/k-means-clustering.data/KMeans.png.jpeg
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/k-means-clustering.data/KMeans.png.jpeg?rev=1243022&view=auto
==============================================================================
Binary file - no diff available.

Propchange: mahout/site/new_website/MAHOUT/k-means-clustering.data/KMeans.png.jpeg
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: mahout/site/new_website/MAHOUT/k-means-clustering.data/SampleData.png
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/k-means-clustering.data/SampleData.png?rev=1243022&view=auto
==============================================================================
Binary file - no diff available.

Propchange: mahout/site/new_website/MAHOUT/k-means-clustering.data/SampleData.png
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: mahout/site/new_website/MAHOUT/k-means-clustering.data/SampleData.png.jpeg
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/k-means-clustering.data/SampleData.png.jpeg?rev=1243022&view=auto
==============================================================================
Binary file - no diff available.

Propchange: mahout/site/new_website/MAHOUT/k-means-clustering.data/SampleData.png.jpeg
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: mahout/site/new_website/MAHOUT/k-means-clustering.data/k-Means Example
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/k-means-clustering.data/k-Means%20Example?rev=1243022&view=auto
==============================================================================
--- mahout/site/new_website/MAHOUT/k-means-clustering.data/k-Means Example (added)
+++ mahout/site/new_website/MAHOUT/k-means-clustering.data/k-Means Example Sat Feb 11 10:22:15 2012
@@ -0,0 +1 @@
+<stage keygen_seq="64"><pageObj istt="true" stg="0" pb="0" gr="0" fill="16777215" height="754" width="576"><objects><object order="8" path="0,0;44.4315030580294,0;44.4315030580294,18;59.4315030580294,18;" numSegs="3" linep="0" linec="0" linew="1" endStyle="0" beginStyle="0" buff="11" height="22" width="63.4" y="119" x="220.568496941971" ln_id="12" class="TheOrthoLine"><text/></object><object order="9" path="0,0;62.4687500000001,0;62.4687500000001,-15;77.4687500000001,-15;" numSegs="3" linep="0" linec="0" linew="1" endStyle="0" beginStyle="0" buff="11" height="19" width="81.45" y="137" x="360" ln_id="13" class="TheOrthoLine"><text/></object><object order="10" path="0,0;0,33;4,33;4,48;" numSegs="3" linep="0" linec="0" linew="1" endStyle="0" beginStyle="0" buff="11" height="52" width="8" y="167" x="505" ln_id="14" class="TheOrthoLine"><text/></object><object order="11" path="0,0;-19.2625000000002,0;-19.2625000000002,4;-34.2625000000002,4;" numSegs="3" linep="0" linec="0" linew=
 "1" endStyle="0" beginStyle="0" buff="11" height="8" width="38.25" y="270" x="447.8125" ln_id="15" class="TheOrthoLine"><text/></object><object order="12" path="0,0;-74.4500000000001,0;-74.4500000000001,-4;-89.4500000000001,-4;" numSegs="3" linep="0" linec="0" linew="1" endStyle="0" beginStyle="0" buff="11" height="8" width="93.45" y="274" x="258.45" ln_id="16" class="TheOrthoLine"><text/></object><object order="15" path="0,0;0,15;87,15;87,42.75;" numSegs="3" linep="0" linec="0" linew="1" endStyle="0" beginStyle="0" buff="11" height="46.75" width="91" y="389.5" x="341" ln_id="19" class="TheOrthoLine"><text/></object><object order="16" path="0,0;0,15;22.2125,15;22.2125,32;" numSegs="3" linep="0" linec="0" linew="1" endStyle="0" beginStyle="0" buff="11" height="36" width="26.2" y="300" x="110" ln_id="21" class="TheOrthoLine"><text/></object><object order="17" path="0,0;41.1625000000001,0;41.1625000000001,-16;56.1625000000001,-16;" numSegs="3" linep="0" linec="0" linew="1" endS
 tyle="0" beginStyle="0" buff="11" height="20" width="60.15" y="373" x="207.9" ln_id="22" class="TheOrthoLine"><text/></object><object order="18" path="0,0;0,15;13,15;13,30;" numSegs="3" linep="0" linec="0" linew="1" endStyle="0" beginStyle="0" buff="11" height="34" width="17" y="414" x="132" ln_id="23" class="TheOrthoLine"><text/></object><object order="0" dsy="8" dsx="8" dshad="true" gradon="true" linew="1" linec="0" fill="8113609" text-horizontal-pos="center" text-vertical-pos="middle" fixed-aspect="false" rot="0" height="53.0369938839414" width="149.136993883941" y="119" x="146" shp_id="3" class="fc_data"><text><![CDATA[<P ALIGN="CENTER"><FONT FACE="Arial" SIZE="12" COLOR="#000000" LETTERSPACING="0" KERNING="0">Points to Cluster:</FONT></P><P ALIGN="CENTER"><FONT FACE="Arial" SIZE="12" COLOR="#000000" LETTERSPACING="0" KERNING="0">(1,1),(2,1),(1,2),(2,2),(3,3)</FONT></P>]]></text><connlines><connline type="start" ln_id="12" cpnum="3"/></connlines></object><object order="1
 " dsy="8" dsx="8" dshad="true" gradon="true" linew="1" linec="0" fill="8113609" text-horizontal-pos="center" text-vertical-pos="middle" fixed-aspect="false" rot="0" height="110" width="122.375" y="270" x="509" shp_id="4" class="rectangle"><text><![CDATA[<P ALIGN="CENTER"><FONT FACE="Arial" SIZE="12" COLOR="#000000" LETTERSPACING="0" KERNING="0">Selected 2 k-Means Centers as (1,1), (1,2) and write to Sequence File: ../testdata/clusters/part-00000</FONT></P>]]></text><connlines><connline type="start" ln_id="15" cpnum="1"/><connline type="end" ln_id="14" cpnum="2"/></connlines></object><object order="2" dsy="8" dsx="8" dshad="true" gradon="true" linew="1" linec="0" fill="8113609" text-horizontal-pos="center" text-vertical-pos="middle" fixed-aspect="false" rot="0" height="57" width="155.1" y="274" x="336" shp_id="6" class="fc_data"><text><![CDATA[<P ALIGN="CENTER"><FONT FACE="Arial" SIZE="12" COLOR="#000000" LETTERSPACING="0" KERNING="0">C0C0:[s2,0:1.0,1:1.0,]</FONT></P><P ALIGN
 ="CENTER"><FONT FACE="Arial" SIZE="12" COLOR="#000000" LETTERSPACING="0" KERNING="0">C1C1:[s2,0:1.0,1:2.0,]</FONT></P>]]></text><connlines><connline type="start" ln_id="16" cpnum="1"/><connline type="end" ln_id="15" cpnum="3"/></connlines></object><object order="3" dsy="8" dsx="8" dshad="true" gradon="true" linew="1" linec="0" fill="8113609" text-horizontal-pos="center" text-vertical-pos="middle" fixed-aspect="false" rot="0" height="93" width="80" y="137" x="320" shp_id="7" class="rectangle"><text><![CDATA[<P ALIGN="CENTER"><FONT FACE="Arial" SIZE="12" COLOR="#000000" LETTERSPACING="0" KERNING="0">Converted into Input File: ../testdata/points/input.txt</FONT></P>]]></text><connlines><connline type="end" ln_id="12" cpnum="1"/><connline type="start" ln_id="13" cpnum="3"/></connlines></object><object order="4" dsy="8" dsx="8" dshad="true" gradon="true" linew="1" linec="0" fill="8113609" text-horizontal-pos="center" text-vertical-pos="middle" fixed-aspect="false" rot="0" height=
 "90" width="135.0625" y="122" x="505" shp_id="8" class="fc_data"><text><![CDATA[<P ALIGN="CENTER"><FONT FACE="Arial" SIZE="12" COLOR="#000000" LETTERSPACING="0" KERNING="0">[s2, 0:1.0, 1:1.0, ] </FONT></P><P ALIGN="CENTER"><FONT FACE="Arial" SIZE="12" COLOR="#000000" LETTERSPACING="0" KERNING="0">[s2, 0:2.0, 1:1.0, ] </FONT></P><P ALIGN="CENTER"><FONT FACE="Arial" SIZE="12" COLOR="#000000" LETTERSPACING="0" KERNING="0">[s2, 0:1.0, 1:2.0, ] </FONT></P><P ALIGN="CENTER"><FONT FACE="Arial" SIZE="12" COLOR="#000000" LETTERSPACING="0" KERNING="0">[s2, 0:2.0, 1:2.0, ] </FONT></P><P ALIGN="CENTER"><FONT FACE="Arial" SIZE="12" COLOR="#000000" LETTERSPACING="0" KERNING="0">[s2, 0:3.0, 1:3.0, ]</FONT></P>]]></text><connlines><connline type="start" ln_id="14" cpnum="0"/><connline type="end" ln_id="13" cpnum="1"/></connlines></object><object order="5" dsy="8" dsx="8" dshad="true" gradon="true" linew="1" linec="0" fill="8113609" text-horizontal-pos="center" text-vertical-pos="middle" fix
 ed-aspect="false" rot="0" height="60" width="118" y="270" x="110" shp_id="9" class="rectangle"><text><![CDATA[<P ALIGN="CENTER"><FONT FACE="Arial" SIZE="12" COLOR="#000000" LETTERSPACING="0" KERNING="0">Run k-Means Algorithm: k = 2</FONT></P>]]></text><connlines><connline type="start" ln_id="21" cpnum="0"/><connline type="end" ln_id="16" cpnum="3"/></connlines></object><object order="6" dsy="8" dsx="8" dshad="true" gradon="true" linew="1" linec="0" fill="8113609" text-horizontal-pos="center" text-vertical-pos="middle" fixed-aspect="false" rot="0" height="58" width="153.95" y="473" x="145" shp_id="10" class="fc_data"><text><![CDATA[<P ALIGN="CENTER"><FONT FACE="Arial" SIZE="12" COLOR="#000000" LETTERSPACING="0" KERNING="0">V0V0: [s2, 0:1.5, 1:1.5, ]</FONT></P><P ALIGN="CENTER"><FONT FACE="Arial" SIZE="12" COLOR="#000000" LETTERSPACING="0" KERNING="0">V1V1: [s2, 0:4.0, 1:4.0, ]  </FONT></P>]]></text><connlines><connline type="end" ln_id="23" cpnum="2"/></connlines></object><ob
 ject order="7" dsy="8" dsx="8" dshad="true" gradon="true" linew="1" linec="0" fill="8113609" text-horizontal-pos="center" text-vertical-pos="middle" fixed-aspect="false" rot="0" height="93.5" width="273.9875" y="479" x="428" shp_id="11" class="fc_data"><text><![CDATA[<P ALIGN="CENTER"><FONT FACE="Arial" SIZE="12" COLOR="#000000" LETTERSPACING="0" KERNING="0">V0: [s2, 0:1.5, 1:1.5, ] 	[s2, 0:1.0, 1:1.0, ] </FONT></P><P ALIGN="CENTER"><FONT FACE="Arial" SIZE="12" COLOR="#000000" LETTERSPACING="0" KERNING="0">V0: [s2, 0:1.5, 1:1.5, ] 	[s2, 0:2.0, 1:1.0, ] </FONT></P><P ALIGN="CENTER"><FONT FACE="Arial" SIZE="12" COLOR="#000000" LETTERSPACING="0" KERNING="0">V0: [s2, 0:1.5, 1:1.5, ] 	[s2, 0:1.0, 1:2.0, ] </FONT></P><P ALIGN="CENTER"><FONT FACE="Arial" SIZE="12" COLOR="#000000" LETTERSPACING="0" KERNING="0">V0: [s2, 0:1.5, 1:1.5, ] 	[s2, 0:2.0, 1:2.0, ] </FONT></P><P ALIGN="CENTER"><FONT FACE="Arial" SIZE="12" COLOR="#000000" LETTERSPACING="0" KERNING="0">V1: [s2, 0:4.0, 1:4.0, ]
  	[s2, 0:3.0, 1:3.0, ]</FONT></P>]]></text><connlines><connline type="end" ln_id="19" cpnum="2"/></connlines></object><object order="13" dsy="8" dsx="8" dshad="true" gradon="true" linew="1" linec="0" fill="8113609" text-horizontal-pos="center" text-vertical-pos="middle" fixed-aspect="false" rot="0" height="82" width="151.375" y="373" x="132" shp_id="17" class="rectangle"><text><![CDATA[<P ALIGN="CENTER"><FONT FACE="Arial" SIZE="12" COLOR="#000000" LETTERSPACING="0" KERNING="0">Find k-Means Centers and write to File:</FONT></P><P ALIGN="CENTER"><FONT FACE="Arial" SIZE="12" COLOR="#000000" LETTERSPACING="0" KERNING="0">../output/clusters-2/part-00000</FONT></P>]]></text><connlines><connline type="start" ln_id="23" cpnum="0"/><connline type="end" ln_id="21" cpnum="2"/><connline type="start" ln_id="22" cpnum="3"/></connlines></object><object order="14" dsy="8" dsx="8" dshad="true" gradon="true" linew="1" linec="0" fill="8113609" text-horizontal-pos="center" text-vertical-pos="mi
 ddle" fixed-aspect="false" rot="0" height="65" width="153.875" y="357" x="341" shp_id="18" class="rectangle"><text><![CDATA[<P ALIGN="CENTER"><FONT FACE="Arial" SIZE="12" COLOR="#000000" LETTERSPACING="0" KERNING="0">Cluster points to Centers and write to File:</FONT></P><P ALIGN="CENTER"><FONT FACE="Arial" SIZE="12" COLOR="#000000" LETTERSPACING="0" KERNING="0">../output/points/part-00000</FONT></P>]]></text><connlines><connline type="start" ln_id="19" cpnum="0"/><connline type="end" ln_id="22" cpnum="1"/></connlines></object><object order="19" dshad="false" gradon="false" linew="1" linec="0" fill="16777215" text-horizontal-pos="center" text-vertical-pos="middle" fixed-aspect="false" rot="0" height="20" width="204.275" y="68" x="342" shp_id="43" class="rectangle"><text><![CDATA[<P ALIGN="CENTER"><FONT FACE="Arial" SIZE="12" COLOR="#000000" LETTERSPACING="0" KERNING="0">Steps in k-Means Alogrithm: k = 2</FONT></P>]]></text><connlines/></object></objects></pageObj></stage>
\ No newline at end of file

Added: mahout/site/new_website/MAHOUT/k-means-clustering.data/k-Means in Mahout.jpg
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/k-means-clustering.data/k-Means%20in%20Mahout.jpg?rev=1243022&view=auto
==============================================================================
Binary file - no diff available.

Propchange: mahout/site/new_website/MAHOUT/k-means-clustering.data/k-Means in Mahout.jpg
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: mahout/site/new_website/MAHOUT/k-means-clustering.data/k-Means in Mahout.jpg.jpeg
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/k-means-clustering.data/k-Means%20in%20Mahout.jpg.jpeg?rev=1243022&view=auto
==============================================================================
Binary file - no diff available.

Propchange: mahout/site/new_website/MAHOUT/k-means-clustering.data/k-Means in Mahout.jpg.jpeg
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: mahout/site/new_website/MAHOUT/k-means-clustering.data/k-Means in Mahout.png
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/k-means-clustering.data/k-Means%20in%20Mahout.png?rev=1243022&view=auto
==============================================================================
Binary file - no diff available.

Propchange: mahout/site/new_website/MAHOUT/k-means-clustering.data/k-Means in Mahout.png
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: mahout/site/new_website/MAHOUT/k-means-clustering.data/k-Means in Mahout.png.jpeg
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/k-means-clustering.data/k-Means%20in%20Mahout.png.jpeg?rev=1243022&view=auto
==============================================================================
Binary file - no diff available.

Propchange: mahout/site/new_website/MAHOUT/k-means-clustering.data/k-Means in Mahout.png.jpeg
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: mahout/site/new_website/MAHOUT/k-means-clustering.data/quickstart-kmeans.sh
URL: http://svn.apache.org/viewvc/mahout/site/new_website/MAHOUT/k-means-clustering.data/quickstart-kmeans.sh?rev=1243022&view=auto
==============================================================================
--- mahout/site/new_website/MAHOUT/k-means-clustering.data/quickstart-kmeans.sh (added)
+++ mahout/site/new_website/MAHOUT/k-means-clustering.data/quickstart-kmeans.sh Sat Feb 11 10:22:15 2012
@@ -0,0 +1,45 @@
+#/**
+# * Licensed to the Apache Software Foundation (ASF) under one or more
+# * contributor license agreements.  See the NOTICE file distributed with
+# * this work for additional information regarding copyright ownership.
+# * The ASF licenses this file to You under the Apache License, Version 2.0
+# * (the "License"); you may not use this file except in compliance with
+# * the License.  You may obtain a copy of the License at
+# *
+# *     http://www.apache.org/licenses/LICENSE-2.0
+# *
+# * Unless required by applicable law or agreed to in writing, software
+# * distributed under the License is distributed on an "AS IS" BASIS,
+# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# * See the License for the specific language governing permissions and
+# * limitations under the License.
+# */
+
+#
+# Downloads the Reuters dataset and prepares it for clustering
+#
+# To run:  change into the mahout directory and type:
+#  examples/bin/build-reuters.sh
+#!/bin/sh
+
+cd examples/bin/
+mkdir -p work
+if [ ! -e work/reuters-out ]; then
+  if [ ! -e work/reuters-sgm ]; then
+    if [ ! -f work/reuters21578.tar.gz ]; then
+      echo "Downloading Reuters-21578"
+      curl http://kdd.ics.uci.edu/databases/reuters21578/reuters21578.tar.gz  -o work/reuters21578.tar.gz
+    fi
+    mkdir -p work/reuters-sgm
+    echo "Extracting..."
+    cd work/reuters-sgm && tar xzf ../reuters21578.tar.gz && cd .. && cd ..
+  fi
+fi
+
+cd ../..
+./bin/mahout org.apache.lucene.benchmark.utils.ExtractReuters ./examples/bin/work/reuters-sgm/ ./examples/bin/work/reuters-out/
+./bin/mahout seqdirectory -i ./examples/bin/work/reuters-out/ -o ./examples/bin/work/reuters-out-seqdir -c UTF-8 -chunk 5
+./bin/mahout seq2sparse -i ./examples/bin/work/reuters-out-seqdir/ -o ./examples/bin/work/reuters-out-seqdir-sparse
+./bin/mahout kmeans -i ./examples/bin/work/reuters-out-seqdir-sparse/tfidf-vectors/ -c ./examples/bin/work/clusters -o ./examples/bin/work/reuters-kmeans -x 10 -k 20 -ow
+./bin/mahout clusterdump -s examples/bin/work/reuters-kmeans/clusters-10 -d examples/bin/work/reuters-out-seqdir-sparse/dictionary.file-0 -dt sequencefile -b 100 -n 20
+