You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2008/05/09 23:35:17 UTC

svn commit: r654943 [2/9] - in /lucene/mahout/trunk/core: ./ lib/ src/main/examples/org/ src/main/examples/org/apache/ src/main/examples/org/apache/mahout/ src/main/examples/org/apache/mahout/cf/ src/main/examples/org/apache/mahout/cf/taste/ src/main/e...

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/ejb/ejb-jar.xml
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/ejb/ejb-jar.xml?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/ejb/ejb-jar.xml (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/ejb/ejb-jar.xml Fri May  9 14:35:12 2008
@@ -0,0 +1,59 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<ejb-jar xmlns="http://java.sun.com/xml/ns/j2ee"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://java.sun.com/xml/ns/j2ee http://java.sun.com/xml/ns/j2ee/ejb-jar_2_1.xsd"
+         version="2.1">
+  <enterprise-beans>
+    <session>
+      <ejb-name>RecommenderEJB</ejb-name>
+      <home>org.apache.mahout.cf.taste.ejb.RecommenderEJBHome</home>
+      <remote>org.apache.mahout.cf.taste.ejb.RecommenderEJB</remote>
+      <local-home>org.apache.mahout.cf.taste.ejb.RecommenderEJBLocalHome</local-home>
+      <local>org.apache.mahout.cf.taste.ejb.RecommenderEJBLocal</local>
+      <ejb-class>org.apache.mahout.cf.taste.ejb.RecommenderEJBBean</ejb-class>
+      <session-type>Stateless</session-type>
+      <transaction-type>Container</transaction-type>
+      <env-entry>
+        <env-entry-name>recommender-class</env-entry-name>
+        <env-entry-type>java.lang.String</env-entry-type>
+        <env-entry-value>@RECOMMENDER_CLASS@</env-entry-value>
+      </env-entry>
+      <!-- ...or give the JNDI name where an implementation can be found,
+           relative to java:comp/env -->
+      <!--
+         <env-entry>
+           <env-entry-name>recommender-jndi-name</env-entry-name>
+           <env-entry-type>java.lang.String</env-entry-type>
+           <env-entry-value>foo/YourRecommender</env-entry-value>
+         </env-entry>
+         -->
+    </session>
+  </enterprise-beans>
+  <assembly-descriptor>
+    <container-transaction>
+      <method>
+        <ejb-name>RecommenderEJB</ejb-name>
+        <method-name>*</method-name>
+      </method>
+      <trans-attribute>Supports</trans-attribute>
+    </container-transaction>
+  </assembly-descriptor>
+</ejb-jar>

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/ejb/mapping.xml
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/ejb/mapping.xml?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/ejb/mapping.xml (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/ejb/mapping.xml Fri May  9 14:35:12 2008
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<java-wsdl-mapping xmlns="http://java.sun.com/xml/ns/j2ee"
+                   xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+                   xsi:schemaLocation="http://java.sun.com/xml/ns/j2ee http://www.ibm.com/webservices/xsd/j2ee_jaxrpc_mapping_1_1.xsd"
+                   version="1.1">
+  <package-mapping>
+    <package-type>org.apache.mahout.cf.taste.ejb</package-type>
+    <namespaceURI>urn:org.apache.mahout.cf.taste.ejb.RecommenderWS</namespaceURI>
+  </package-mapping>
+</java-wsdl-mapping>

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/ejb/webservices.xml
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/ejb/webservices.xml?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/ejb/webservices.xml (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/ejb/webservices.xml Fri May  9 14:35:12 2008
@@ -0,0 +1,40 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<webservices xmlns="http://java.sun.com/xml/ns/j2ee"
+             xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+             xsi:schemaLocation="http://java.sun.com/xml/ns/j2ee http://www.ibm.com/webservices/xsd/j2ee_web_services_1_1.xsd"
+             version="1.1">
+  <webservice-description>
+    <webservice-description-name>RecommenderWS</webservice-description-name>
+    <wsdl-file>META-INF/RecommenderWS.wsdl</wsdl-file>
+    <jaxrpc-mapping-file>META-INF/mapping.xml</jaxrpc-mapping-file>
+    <port-component>
+      <port-component-name>RecommenderServicePort</port-component-name>
+      <wsdl-port>
+        <namespaceURI>urn:org.apache.mahout.cf.taste.ejb.RecommenderWS</namespaceURI>
+        <localpart>RecommenderServicePort</localpart>
+      </wsdl-port>
+      <service-endpoint-interface>org.apache.mahout.cf.taste.ejb.RecommenderWS</service-endpoint-interface>
+      <service-impl-bean>
+        <ejb-link>RecommenderEJB</ejb-link>
+      </service-impl-bean>
+    </port-component>
+  </webservice-description>
+</webservices>
\ No newline at end of file

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/IRStatistics.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/IRStatistics.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/IRStatistics.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/IRStatistics.java Fri May  9 14:35:12 2008
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.eval;
+
+/**
+ * <p>Implementations encapsulate information retrieval-related statistics about
+ * a {@link org.apache.mahout.cf.taste.recommender.Recommender}'s recommendations.</p>
+ *
+ * <p>See <a href="http://en.wikipedia.org/wiki/Information_retrieval">Information retrieval</a>.</p>
+ */
+public interface IRStatistics {
+
+  /**
+   * <p>See <a href="http://en.wikipedia.org/wiki/Information_retrieval#Precision">Precision</a>.</p>
+   */
+  double getPrecision();
+
+  /**
+   * <p>See <a href="http://en.wikipedia.org/wiki/Information_retrieval#Recall">Recall</a>.</p>
+   */
+  double getRecall();
+
+  /**
+   * <p>See <a href="http://en.wikipedia.org/wiki/Information_retrieval#F-measure">F-measure</a>.</p>
+   */
+  double getF1Measure();
+
+  /**
+   * <p>See <a href="http://en.wikipedia.org/wiki/Information_retrieval#F-measure">F-measure</a>.</p>
+   */
+  double getFNMeasure(double n);
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderBuilder.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderBuilder.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderBuilder.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderBuilder.java Fri May  9 14:35:12 2008
@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.eval;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.recommender.Recommender;
+
+/**
+ * <p>Implementations of this inner interface are simple helper classes which create a
+ * {@link Recommender} to be evaluated based on the given {@link DataModel}.</p>
+ */
+public interface RecommenderBuilder {
+
+  /**
+   * <p>Builds a {@link Recommender} implementation to be evaluated, using the given {@link DataModel}.</p>
+   *
+   * @param dataModel {@link DataModel} to build the {@link Recommender} on
+   * @return {@link Recommender} based upon the given {@link DataModel}
+   * @throws TasteException if an error occurs while accessing the {@link DataModel}
+   */
+  Recommender buildRecommender(DataModel dataModel) throws TasteException;
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderEvaluator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderEvaluator.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderEvaluator.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderEvaluator.java Fri May  9 14:35:12 2008
@@ -0,0 +1,68 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.eval;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.model.DataModel;
+
+/**
+ * <p>Implementations of this interface evaluate the quality of a
+ * {@link org.apache.mahout.cf.taste.recommender.Recommender}'s recommendations.</p>
+ */
+public interface RecommenderEvaluator {
+
+  /**
+   * <p>Evaluates the quality of a {@link org.apache.mahout.cf.taste.recommender.Recommender}'s recommendations.
+   * The range of values that may be returned
+   * depends on the implementation, but <em>lower</em> values must mean better recommendations, with 0 being the
+   * lowest / best possible evaluation, meaning a perfect match. This method does not
+   * accept a {@link org.apache.mahout.cf.taste.recommender.Recommender} directly, but rather a
+   * {@link RecommenderBuilder} which can build the
+   * {@link org.apache.mahout.cf.taste.recommender.Recommender} to test on top of a given {@link DataModel}.</p>
+   *
+   * <p>Implementations will take a certain percentage of the preferences supplied by the given {@link DataModel}
+   * as "training data". This is typically most of the data, like 90%. This data is used to produce recommendations,
+   * and the rest of the data is compared against estimated preference values to see how much the
+   * {@link org.apache.mahout.cf.taste.recommender.Recommender}'s predicted preferences match the user's real
+   * preferences. Specifically,
+   * for each user, this percentage of the user's ratings are used to produce recommendatinos, and for each user,
+   * the remaining preferences are compared against the user's real preferences.</p>
+   *
+   * <p>For large datasets, it may be desirable to only evaluate based on a small percentage of the data.
+   * <code>evaluationPercentage</code> controls how many of the {@link DataModel}'s users are used in
+   * evaluation.</p>
+   *
+   * <p>To be clear, <code>trainingPercentage</code> and <code>evaluationPercentage</code> are not related.
+   * They do not need to add up to 1.0, for example.</p>
+   *
+   * @param recommenderBuilder object that can build a {@link org.apache.mahout.cf.taste.recommender.Recommender} to test
+   * @param dataModel dataset to test on
+   * @param trainingPercentage percentage of each user's preferences to use to produce recommendations; the rest
+   * are compared to estimated preference values to evaluate {@link org.apache.mahout.cf.taste.recommender.Recommender}
+   * performance
+   * @param evaluationPercentage percentage of users to use in evaluation
+   * @return a "score" representing how well the {@link org.apache.mahout.cf.taste.recommender.Recommender}'s
+   *         estimated preferences match real values; <em>lower</em> scores mean a better match and 0 is a perfect match
+   * @throws TasteException if an error occurs while accessing the {@link DataModel}
+   */
+  double evaluate(RecommenderBuilder recommenderBuilder,
+                  DataModel dataModel,
+                  double trainingPercentage,
+                  double evaluationPercentage) throws TasteException;
+
+}
\ No newline at end of file

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderIRStatsEvaluator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderIRStatsEvaluator.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderIRStatsEvaluator.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderIRStatsEvaluator.java Fri May  9 14:35:12 2008
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.eval;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.model.DataModel;
+
+/**
+ * <p>Implementations collect information retrieval-related statistics on a
+ * {@link org.apache.mahout.cf.taste.recommender.Recommender}'s performance, including precision,
+ * recall and f-measure.</p>
+ *
+ * <p>See <a href="http://en.wikipedia.org/wiki/Information_retrieval">Information retrieval</a>.
+ */
+public interface RecommenderIRStatsEvaluator {
+
+  /**
+   * @param recommenderBuilder object that can build a {@link org.apache.mahout.cf.taste.recommender.Recommender} to test
+   * @param dataModel dataset to test on
+   * @param at as in, "precision at 5". The number of recommendations to consider when evaluating
+   * precision, etc.
+   * @param relevanceThreshold {@link org.apache.mahout.cf.taste.model.Item}s whose preference value is at least
+   * this value are considered "relevant" for the purposes of computations
+   * @return {@link IRStatistics} with resulting precision, recall, etc.
+   * @throws TasteException if an error occurs while accessing the {@link DataModel}
+   */
+  IRStatistics evaluate(RecommenderBuilder recommenderBuilder,
+                        DataModel dataModel,
+                        int at,
+                        double relevanceThreshold,
+                        double evaluationPercentage) throws TasteException;
+
+}
\ No newline at end of file

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/ArrayIterator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/ArrayIterator.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/ArrayIterator.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/ArrayIterator.java Fri May  9 14:35:12 2008
@@ -0,0 +1,74 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
+/**
+ * <p>Simple, fast {@link Iterator} for an array.</p>
+ */
+public final class ArrayIterator<T> implements Iterator<T>, Iterable<T> {
+
+  private final T[] array;
+  private int position;
+  private final int max;
+
+  /**
+   * <p>Creates an {@link ArrayIterator} over an entire array.</p>
+   *
+   * @param array array to iterate over
+   */
+  public ArrayIterator(T[] array) {
+    if (array == null) {
+      throw new IllegalArgumentException("array is null");
+    }
+    this.array = array; // yeah, not going to copy the array here, for performance
+    this.position = 0;
+    this.max = array.length;
+  }
+
+  public boolean hasNext() {
+    return position < max;
+  }
+
+  public T next() {
+    if (position >= array.length) {
+      throw new NoSuchElementException();
+    }
+    return array[position++];
+  }
+
+  /**
+   * @throws UnsupportedOperationException
+   */
+  public void remove() {
+    throw new UnsupportedOperationException();
+  }
+
+  public Iterator<T> iterator() {
+    return this;
+  }
+
+  @Override
+  public String toString() {
+    return "ArrayIterator[" + Arrays.toString(array) + '@' + position + ']';
+  }
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/CompactRunningAverage.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/CompactRunningAverage.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/CompactRunningAverage.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/CompactRunningAverage.java Fri May  9 14:35:12 2008
@@ -0,0 +1,80 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.io.Serializable;
+
+/**
+ * <p>Like {@link org.apache.mahout.cf.taste.impl.common.FullRunningAverage} but uses smaller values (short, float)
+ * to conserve memory. This is used in {@link org.apache.mahout.cf.taste.impl.recommender.slopeone.SlopeOneRecommender}
+ * since it may allocate tens of millions of such objects.</p>
+ */
+public class CompactRunningAverage implements RunningAverage, Serializable {
+
+  private char count;
+  private float average;
+
+  public CompactRunningAverage() {
+    count = (char) 0;
+    average = Float.NaN;
+  }
+
+  public void addDatum(double datum) {
+    if ((int) count < 65535) { // = 65535 = 2^16 - 1
+      if ((int) ++count == 1) {
+        average = (float) datum;
+      } else {
+        average = (float)
+                ((double) average * ((double) ((int) count - 1) / (double) count) + datum / (double) count);
+      }
+    }
+  }
+
+  public void removeDatum(double datum) {
+    if ((int) count == 0) {
+      throw new IllegalStateException();
+    }
+    if ((int) --count == 0) {
+      average = Float.NaN;
+    } else {
+      average = (float)
+              ((double) average * ((double) ((int) count + 1) / (double) count) - datum / (double) count);
+    }
+  }
+
+  public void changeDatum(double delta) {
+    if ((int) count == 0) {
+      throw new IllegalStateException();
+    }
+    average += (float) (delta / (double) count);
+  }
+
+  public int getCount() {
+    return (int) count;
+  }
+
+  public double getAverage() {
+    return (double) average;
+  }
+
+  @Override
+  public String toString() {
+    return String.valueOf(average);
+  }
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/CompactRunningAverageAndStdDev.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/CompactRunningAverageAndStdDev.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/CompactRunningAverageAndStdDev.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/CompactRunningAverageAndStdDev.java Fri May  9 14:35:12 2008
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+/**
+ * <p>Extends {@link CompactRunningAverage} to add a running standard deviation computation.</p>
+ */
+public final class CompactRunningAverageAndStdDev extends CompactRunningAverage implements RunningAverageAndStdDev {
+
+  private float stdDev;
+  private float sumX2;
+
+  public CompactRunningAverageAndStdDev() {
+    stdDev = Float.NaN;
+  }
+
+  public double getStandardDeviation() {
+    return (double) stdDev;
+  }
+
+  @Override
+  public void addDatum(double datum) {
+    super.addDatum(datum);
+    sumX2 += (float) (datum * datum);
+    recomputeStdDev();
+  }
+
+  @Override
+  public void removeDatum(double datum) {
+    super.removeDatum(datum);
+    sumX2 -= (float) (datum * datum);
+    recomputeStdDev();
+  }
+
+  /**
+   * @throws UnsupportedOperationException
+   */
+  @Override
+  public void changeDatum(double delta) {
+    throw new UnsupportedOperationException();
+  }
+
+  private void recomputeStdDev() {
+    int count = getCount();
+    if (count > 1) {
+      double average = getAverage();
+      stdDev = (float) Math.sqrt(((double) sumX2 - average * average * (double) count) / (double) (count - 1));
+    } else {
+      stdDev = Float.NaN;
+    }
+  }
+
+  @Override
+  public String toString() {
+    return String.valueOf(String.valueOf(getAverage()) + ',' + stdDev);
+  }
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/EmptyIterable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/EmptyIterable.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/EmptyIterable.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/EmptyIterable.java Fri May  9 14:35:12 2008
@@ -0,0 +1,43 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.util.Iterator;
+
+/**
+ * <p>An {@link Iterable} over no elements: always produces an {@link Iterator} which iterates
+ * over nothing.</p>
+ */
+public final class EmptyIterable<T> implements Iterable<T> {
+
+  private final Iterator<T> iterator;
+
+  public EmptyIterable() {
+    iterator = new EmptyIterator<T>();
+  }
+
+  public Iterator<T> iterator() {
+    return iterator;
+  }
+
+  @Override
+  public String toString() {
+    return "EmptyIterable[iterator:" + iterator + ']';
+  }
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/EmptyIterator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/EmptyIterator.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/EmptyIterator.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/EmptyIterator.java Fri May  9 14:35:12 2008
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
+/**
+ * <p>An empty {@link Iterator}, which iterates over nothing.</p>
+ */
+final class EmptyIterator<T> implements Iterator<T> {
+
+  /**
+   * @return false
+   */
+  public boolean hasNext() {
+    return false;
+  }
+
+  /**
+   * @return never returns anything
+   * @throws NoSuchElementException
+   */
+  public T next() {
+    throw new NoSuchElementException();
+  }
+
+  /**
+   * @throws UnsupportedOperationException
+   */
+  public void remove() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public String toString() {
+    return "EmptyIterator";
+  }
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java Fri May  9 14:35:12 2008
@@ -0,0 +1,666 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.util.AbstractCollection;
+import java.util.AbstractSet;
+import java.util.Arrays;
+import java.util.BitSet;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.NoSuchElementException;
+import java.util.Set;
+
+/**
+ * <p>This is an optimized {@link Map} implementation, based on algorithms described in Knuth's
+ * "Art of Computer Programming", Vol. 3, p. 529.</p>
+ *
+ * <p>It should be faster than {@link java.util.HashMap} in some cases, but not all. Its main feature is
+ * a "max size" and the ability to transparently, efficiently and semi-intelligently evict old entries
+ * when max size is exceeded.</p>
+ *
+ * <p>This class is not a bit thread-safe.</p>
+ *
+ * <p>This implementation does not allow <code>null</code> as a key or value.</p>
+ */
+public final class FastMap<K, V> implements Map<K, V> {
+
+  /**
+   * The largest prime less than 2<sup>31</sup>-1 that is the smaller of a twin prime pair.
+   */
+  private static final int MAX_INT_SMALLER_TWIN_PRIME = 2147482949;
+
+  public static final int NO_MAX_SIZE = Integer.MAX_VALUE;
+
+  /**
+   * Dummy object used to represent a key that has been removed. Package-private to allow direct access
+   * by inner classes. No harm in exposing it.
+   */
+  private static final Object REMOVED = new Object();
+
+  private K[] keys;
+  private V[] values;
+  private int numEntries;
+  private int numSlotsUsed;
+  private Set<Entry<K, V>> entrySet;
+  private Set<K> keySet;
+  private Collection<V> valueCollection;
+  private int maxSize;
+  private final BitSet recentlyAccessed;
+
+  /**
+   * Creates a new {@link FastMap} with default capacity.
+   */
+  public FastMap() {
+    this(11, NO_MAX_SIZE);
+  }
+
+  public FastMap(int size) {
+    this(size, NO_MAX_SIZE);
+  }
+
+  /**
+   * Creates a new {@link FastMap} whose capacity can accommodate the given number of entries without rehash.</p>
+   *
+   * @param size desired capacity
+   * @param maxSize max capacity
+   * @throws IllegalArgumentException if size is less than 1 or at least half of {@link #MAX_INT_SMALLER_TWIN_PRIME}
+   */
+  public FastMap(int size, int maxSize) throws IllegalArgumentException {
+    if (size < 1) {
+      throw new IllegalArgumentException("size must be at least 1");
+    }
+    if (size >= MAX_INT_SMALLER_TWIN_PRIME >> 1) {
+      throw new IllegalArgumentException("size must be less than " + (MAX_INT_SMALLER_TWIN_PRIME >> 1));
+    }
+    if (maxSize < 1) {
+      throw new IllegalArgumentException("maxSize must be at least 1");
+    }
+    int hashSize = nextTwinPrime(2 * size);
+    keys = (K[]) new Object[hashSize];
+    values = (V[]) new Object[hashSize];
+    this.maxSize = maxSize;
+    this.recentlyAccessed = maxSize == Integer.MAX_VALUE ? null : new BitSet(maxSize);
+  }
+
+  /**
+   * This is for the benefit of inner classes. Without it the compiler would just generate a similar synthetic
+   * accessor. Might as well make it explicit.
+   */
+  K[] getKeys() {
+    return keys;
+  }
+
+  /**
+   * This is for the benefit of inner classes. Without it the compiler would just generate a similar synthetic
+   * accessor. Might as well make it explicit.
+   */
+  V[] getValues() {
+    return values;
+  }
+
+  private int find(Object key) {
+    int theHashCode = key.hashCode() & 0x7FFFFFFF; // make sure it's positive
+    K[] keys = this.keys;
+    int hashSize = keys.length;
+    int jump = 1 + theHashCode % (hashSize - 2);
+    int index = theHashCode % hashSize;
+    K currentKey = keys[index];
+    while (currentKey != null && (currentKey == REMOVED || !key.equals(currentKey))) {
+      if (index < jump) {
+        index += hashSize - jump;
+      } else {
+        index -= jump;
+      }
+      currentKey = keys[index];
+    }
+    return index;
+  }
+
+  public V get(Object key) {
+    if (key == null) {
+      return null;
+    }
+    int index = find(key);
+    if (recentlyAccessed != null) {
+      recentlyAccessed.set(index);
+    }
+    return values[index];
+  }
+
+  public int size() {
+    return numEntries;
+  }
+
+  public boolean isEmpty() {
+    return numEntries == 0;
+  }
+
+  public boolean containsKey(Object key) {
+    return key != null && keys[find(key)] != null;
+  }
+
+  public boolean containsValue(Object value) {
+    if (value == null) {
+      return false;
+    }
+    for (V theValue : values) {
+      if (theValue != null && value.equals(theValue)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  /**
+   * @throws NullPointerException if key or value is null
+   */
+  public V put(K key, V value) {
+    if (key == null || value == null) {
+      throw new NullPointerException();
+    }
+    int hashSize = keys.length;
+    if (numSlotsUsed >= hashSize >> 1) {
+      growAndRehash();
+    }
+    // Here we may later consider implementing Brent's variation described on page 532
+    int index = find(key);
+    if (keys[index] == null) {
+      // If size is limited,
+      if (recentlyAccessed != null && numEntries >= maxSize) {
+        // and we're too large, clear some old-ish entry
+        clearStaleEntry(index);
+      }
+      keys[index] = key;
+      values[index] = value;
+      numEntries++;
+      numSlotsUsed++;
+      return null;
+    } else {
+      V oldValue = values[index];
+      values[index] = value;
+      return oldValue;
+    }
+  }
+
+  private void clearStaleEntry(int index) {
+    while (true) {
+      int hashSize = keys.length;
+      K currentKey;
+      do {
+        if (index == 0) {
+          index = hashSize - 1;
+        } else {
+          index--;
+        }
+        currentKey = keys[index];
+      } while (currentKey == null || currentKey == REMOVED);
+      if (recentlyAccessed.get(index)) {
+        recentlyAccessed.clear(index);
+      } else {
+        break;
+      }
+    }
+    // Delete the entry
+    ((Object[]) keys)[index] = REMOVED;
+    numEntries--;
+    values[index] = null;
+  }
+
+  public void putAll(Map<? extends K, ? extends V> map) {
+    if (map == null) {
+      throw new NullPointerException();
+    }
+    for (Entry<? extends K, ? extends V> entry : map.entrySet()) {
+      put(entry.getKey(), entry.getValue());
+    }
+  }
+
+  public V remove(Object key) {
+    if (key == null) {
+      return null;
+    }
+    int index = find(key);
+    if (keys[index] == null) {
+      return null;
+    } else {
+      ((Object[]) keys)[index] = REMOVED;
+      numEntries--;
+      V oldValue = values[index];
+      values[index] = null;
+      // don't decrement numSlotsUsed
+      return oldValue;
+    }
+    // Could un-set recentlyAccessed's bit but doesn't matter
+  }
+
+  public void clear() {
+    numEntries = 0;
+    numSlotsUsed = 0;
+    Arrays.fill(keys, null);
+    Arrays.fill(values, null);
+    if (recentlyAccessed != null) {
+      recentlyAccessed.clear();
+    }
+  }
+
+  public Set<K> keySet() {
+    if (keySet == null) {
+      keySet = new KeySet();
+    }
+    return keySet;
+  }
+
+  public Collection<V> values() {
+    if (valueCollection == null) {
+      valueCollection = new ValueCollection();
+    }
+    return valueCollection;
+  }
+
+  public Set<Entry<K, V>> entrySet() {
+    if (entrySet == null) {
+      entrySet = new EntrySet();
+    }
+    return entrySet;
+  }
+
+  public void rehash() {
+    rehash(keys.length);
+  }
+
+  private void growAndRehash() {
+    int hashSize = keys.length;
+    if (hashSize >= MAX_INT_SMALLER_TWIN_PRIME >> 1) {
+      throw new IllegalStateException("Can't grow any more");
+    }
+    rehash(nextTwinPrime(2 * hashSize));
+  }
+
+  private void rehash(int newHashSize) {
+    K[] oldKeys = keys;
+    V[] oldValues = values;
+    numEntries = 0;
+    numSlotsUsed = 0;
+    if (recentlyAccessed != null) {
+      recentlyAccessed.clear();
+    }
+    keys = (K[]) new Object[newHashSize];
+    values = (V[]) new Object[newHashSize];
+    int length = oldKeys.length;
+    for (int i = 0; i < length; i++) {
+      K key = oldKeys[i];
+      if (key != null && key != REMOVED) {
+        put(key, oldValues[i]);
+      }
+    }
+  }
+
+  // Simple methods for finding a next larger prime
+
+
+  /**
+   * <p>Finds next-largest "twin primes": numbers p and p+2 such that both are prime. Finds the smallest such p such
+   * that the smaller twin, p, is greater than or equal to n. Returns p+2, the larger of the two twins.</p>
+   */
+  private static int nextTwinPrime(int n) {
+    if (n > MAX_INT_SMALLER_TWIN_PRIME) {
+      throw new IllegalArgumentException();
+    }
+    int next = nextPrime(n);
+    while (isNotPrime(next + 2)) {
+      next = nextPrime(next + 4);
+    }
+    return next + 2;
+  }
+
+  /**
+   * <p>Finds smallest prime p such that p is greater than or equal to n.</p>
+   */
+  private static int nextPrime(int n) {
+    // Make sure the number is odd. Is this too clever?
+    n |= 0x1;
+    // There is no problem with overflow since Integer.MAX_INT is prime, as it happens
+    while (isNotPrime(n)) {
+      n += 2;
+    }
+    return n;
+  }
+
+  /**
+   * @param n
+   * @return <code>true</code> iff n is not a prime
+   */
+  private static boolean isNotPrime(int n) {
+    if (n < 2) {
+      throw new IllegalArgumentException();
+    }
+    if ((n & 0x1) == 0) { // even
+      return true;
+    }
+    int max = 1 + (int) Math.sqrt((double) n);
+    for (int d = 3; d <= max; d += 2) {
+      if (n % d == 0) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  private void iteratorRemove(int lastNext) {
+    if (lastNext >= values.length) {
+      throw new NoSuchElementException();
+    }
+    if (lastNext < 0) {
+      throw new IllegalStateException();
+    }
+    values[lastNext] = null;
+    ((Object[]) keys)[lastNext] = REMOVED;
+    numEntries--;
+  }
+
+  private final class EntrySet extends AbstractSet<Entry<K, V>> {
+
+    @Override
+    public int size() {
+      return FastMap.this.size();
+    }
+
+    @Override
+    public boolean isEmpty() {
+      return FastMap.this.isEmpty();
+    }
+
+    @Override
+    public boolean contains(Object o) {
+      return FastMap.this.containsKey(o);
+    }
+
+    @Override
+    public Iterator<Entry<K, V>> iterator() {
+      return new EntryIterator();
+    }
+
+    @Override
+    public boolean add(Entry<K, V> t) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public boolean remove(Object o) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public boolean addAll(Collection<? extends Entry<K, V>> ts) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public boolean retainAll(Collection<?> objects) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public boolean removeAll(Collection<?> objects) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public void clear() {
+      FastMap.this.clear();
+    }
+
+    final class MapEntry implements Entry<K, V> {
+
+      private final int index;
+
+      private MapEntry(int index) {
+        this.index = index;
+      }
+
+      public K getKey() {
+        return getKeys()[index];
+      }
+
+      public V getValue() {
+        return getValues()[index];
+      }
+
+      public V setValue(V value) {
+        if (value == null) {
+          throw new IllegalArgumentException();
+        }
+        V[] values = getValues();
+        V oldValue = values[index];
+        getValues()[index] = value;
+        return oldValue;
+      }
+    }
+
+    final class EntryIterator implements Iterator<Entry<K, V>> {
+
+      private int position;
+      private int lastNext = -1;
+
+      public boolean hasNext() {
+        goToNext();
+        return position < getKeys().length;
+      }
+
+      public Entry<K, V> next() {
+        goToNext();
+        lastNext = position;
+        K[] keys = getKeys();
+        if (position >= keys.length) {
+          throw new NoSuchElementException();
+        }
+        return new MapEntry(position++);
+      }
+
+      private void goToNext() {
+        V[] values = getValues();
+        int length = values.length;
+        while (position < length && values[position] == null) {
+          position++;
+        }
+      }
+
+      public void remove() {
+        iteratorRemove(lastNext);
+      }
+    }
+
+  }
+
+  private final class KeySet extends AbstractSet<K> {
+
+    @Override
+    public int size() {
+      return FastMap.this.size();
+    }
+
+    @Override
+    public boolean isEmpty() {
+      return FastMap.this.isEmpty();
+    }
+
+    @Override
+    public boolean contains(Object o) {
+      return FastMap.this.containsKey(o);
+    }
+
+    @Override
+    public Iterator<K> iterator() {
+      return new KeyIterator();
+    }
+
+    @Override
+    public boolean add(K t) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public boolean remove(Object o) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public boolean addAll(Collection<? extends K> ts) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public boolean retainAll(Collection<?> objects) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public boolean removeAll(Collection<?> objects) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public void clear() {
+      FastMap.this.clear();
+    }
+
+    final class KeyIterator implements Iterator<K> {
+
+      private int position;
+      private int lastNext = -1;
+
+      public boolean hasNext() {
+        goToNext();
+        return position < getKeys().length;
+      }
+
+      public K next() {
+        goToNext();
+        lastNext = position;
+        K[] keys = getKeys();
+        if (position >= keys.length) {
+          throw new NoSuchElementException();
+        }
+        return keys[position++];
+      }
+
+      private void goToNext() {
+        V[] values = getValues();
+        int length = values.length;
+        while (position < length && values[position] == null) {
+          position++;
+        }
+      }
+
+      public void remove() {
+        iteratorRemove(lastNext);
+      }
+    }
+
+  }
+
+  private final class ValueCollection extends AbstractCollection<V> {
+
+    @Override
+    public int size() {
+      return FastMap.this.size();
+    }
+
+    @Override
+    public boolean isEmpty() {
+      return FastMap.this.isEmpty();
+    }
+
+    @Override
+    public boolean contains(Object o) {
+      return FastMap.this.containsValue(o);
+    }
+
+    @Override
+    public Iterator<V> iterator() {
+      return new ValueIterator();
+    }
+
+    @Override
+    public boolean add(V v) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public boolean remove(Object o) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public boolean addAll(Collection<? extends V> vs) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public boolean removeAll(Collection<?> objects) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public boolean retainAll(Collection<?> objects) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public void clear() {
+      FastMap.this.clear();
+    }
+
+    final class ValueIterator implements Iterator<V> {
+
+      private int position;
+      private int lastNext = -1;
+
+      public boolean hasNext() {
+        goToNext();
+        return position < getValues().length;
+      }
+
+      public V next() {
+        goToNext();
+        lastNext = position;
+        V[] values = getValues();
+        if (position >= values.length) {
+          throw new NoSuchElementException();
+        }
+        return values[position++];
+      }
+
+      private void goToNext() {
+        V[] values = getValues();
+        int length = values.length;
+        while (position < length && values[position] == null) {
+          position++;
+        }
+      }
+
+      public void remove() {
+        iteratorRemove(lastNext);
+      }
+
+    }
+
+  }
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverage.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverage.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverage.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverage.java Fri May  9 14:35:12 2008
@@ -0,0 +1,88 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.io.Serializable;
+
+/**
+ * <p>A simple class that can keep track of a running avearage of a series of numbers.
+ * One can add to or remove from the series, as well as update a datum in the series.
+ * The class does not actually keep track of the series of values, just its running average,
+ * so it doesn't even matter if you remove/change a value that wasn't added.</p>
+ */
+public class FullRunningAverage implements RunningAverage, Serializable {
+
+  private int count;
+  private double average;
+
+  public FullRunningAverage() {
+    count = 0;
+    average = Double.NaN;
+  }
+
+  /**
+   * @param datum new item to add to the running average
+   */
+  public void addDatum(double datum) {
+    if (++count == 1) {
+      average = datum;
+    } else {
+      average = average * ((double) (count - 1) / (double) count) + datum / (double) count;
+    }
+  }
+
+  /**
+   * @param datum item to remove to the running average
+   * @throws IllegalStateException if count is 0
+   */
+  public void removeDatum(double datum) {
+    if (count == 0) {
+      throw new IllegalStateException();
+    }
+    if (--count == 0) {
+      average = Double.NaN;
+    } else {
+      average = average * ((double) (count + 1) / (double) count) - datum / (double) count;
+    }
+  }
+
+  /**
+   * @param delta amount by which to change a datum in the running average
+   * @throws IllegalStateException if count is 0
+   */
+  public void changeDatum(double delta) {
+    if (count == 0) {
+      throw new IllegalStateException();
+    }
+    average += delta / (double) count;
+  }
+
+  public int getCount() {
+    return count;
+  }
+
+  public double getAverage() {
+    return average;
+  }
+
+  @Override
+  public String toString() {
+    return String.valueOf(average);
+  }
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverageAndStdDev.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverageAndStdDev.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverageAndStdDev.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverageAndStdDev.java Fri May  9 14:35:12 2008
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+/**
+ * <p>Extends {@link FullRunningAverage} to add a running standard deviation computation.</p>
+ */
+public final class FullRunningAverageAndStdDev extends FullRunningAverage implements RunningAverageAndStdDev {
+
+  private double stdDev;
+  private double sumX2;
+
+  public FullRunningAverageAndStdDev() {
+    stdDev = Double.NaN;
+  }
+
+  public double getStandardDeviation() {
+    return stdDev;
+  }
+
+  @Override
+  public void addDatum(double datum) {
+    super.addDatum(datum);
+    sumX2 += datum * datum;
+    recomputeStdDev();
+  }
+
+  @Override
+  public void removeDatum(double datum) {
+    super.removeDatum(datum);
+    sumX2 -= datum * datum;
+    recomputeStdDev();
+  }
+
+  /**
+   * @throws UnsupportedOperationException
+   */
+  @Override
+  public void changeDatum(double delta) {
+    throw new UnsupportedOperationException();
+  }
+
+  private void recomputeStdDev() {
+    int count = getCount();
+    if (count > 1) {
+      double average = getAverage();
+      stdDev = Math.sqrt((sumX2 - average * average * (double) count) / (double) (count - 1));
+    } else {
+      stdDev = Double.NaN;
+    }
+  }
+
+  @Override
+  public String toString() {
+    return String.valueOf(String.valueOf(getAverage()) + ',' + stdDev);
+  }
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/IOUtils.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/IOUtils.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/IOUtils.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/IOUtils.java Fri May  9 14:35:12 2008
@@ -0,0 +1,96 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.sql.Connection;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * <p>I/O-related utility methods that don't have a better home.</p>
+ */
+public final class IOUtils {
+
+  private static final Logger log = Logger.getLogger(IOUtils.class.getName());
+
+  private IOUtils() {
+  }
+
+  public static void quietClose(Closeable closeable) {
+    if (closeable != null) {
+      try {
+        closeable.close();
+      } catch (IOException ioe) {
+        log.log(Level.WARNING, "Unexpected exception while closing " + closeable + "; continuing", ioe);
+      }
+    }
+  }
+
+  // Sheez, why can't ResultSet, Statement and Connection implement Closeable?
+
+  public static void quietClose(ResultSet closeable) {
+    if (closeable != null) {
+      try {
+        closeable.close();
+      } catch (SQLException sqle) {
+        log.log(Level.WARNING, "Unexpected exception while closing " + closeable + "; continuing", sqle);
+      }
+    }
+  }
+
+  public static void quietClose(Statement closeable) {
+    if (closeable != null) {
+      try {
+        closeable.close();
+      } catch (SQLException sqle) {
+        log.log(Level.WARNING, "Unexpected exception while closing " + closeable + "; continuing", sqle);
+      }
+    }
+  }
+
+  public static void quietClose(Connection closeable) {
+    if (closeable != null) {
+      try {
+        closeable.close();
+      } catch (SQLException sqle) {
+        log.log(Level.WARNING, "Unexpected exception while closing " + closeable + "; continuing", sqle);
+      }
+    }
+  }
+
+  /**
+   * Closes a {@link ResultSet}, {@link Statement} and {@link Connection} (if not null) and
+   * logs (but does not rethrow) any resulting {@link SQLException}. This is useful for cleaning
+   * up after a database query.
+   *
+   * @param resultSet {@link ResultSet} to close
+   * @param statement {@link Statement} to close
+   * @param connection {@link Connection} to close
+   */
+  public static void safeClose(ResultSet resultSet, Statement statement, Connection connection) {
+    quietClose(resultSet);
+    quietClose(statement);
+    quietClose(connection);
+  }
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/IteratorIterable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/IteratorIterable.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/IteratorIterable.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/IteratorIterable.java Fri May  9 14:35:12 2008
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.util.Iterator;
+
+/**
+ * <p>Simple utility class that makes an {@link Iterator} {@link Iterable}
+ * by returning the {@link Iterator} itself.</p>
+ */
+public final class IteratorIterable<T> implements Iterable<T> {
+
+  private final Iterator<T> iterator;
+
+  /**
+   * <p>Constructs an {@link IteratorIterable} for an {@link Iterator}.</p>
+   *
+   * @param iterator {@link Iterator} on which to base this {@link IteratorIterable}
+   */
+  public IteratorIterable(Iterator<T> iterator) {
+    if (iterator == null) {
+      throw new IllegalArgumentException("iterator is null");
+    }
+    this.iterator = iterator;
+  }
+
+  public Iterator<T> iterator() {
+    return iterator;
+  }
+
+  @Override
+  public String toString() {
+    return "IteratorIterable[iterator:" + iterator + ']';
+  }
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/IteratorUtils.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/IteratorUtils.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/IteratorUtils.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/IteratorUtils.java Fri May  9 14:35:12 2008
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+
+/**
+ * <p>{@link java.util.Iterator}-related methods without a better home.</p>
+ */
+public final class IteratorUtils {
+
+  private IteratorUtils() {
+  }
+
+  /**
+   * @param iterable {@link Iterable} whose contents are to be put into a {@link List}
+   * @return a {@link List} with the objects one gets by iterating over the given {@link Iterable}
+   */
+  public static <K> List<K> iterableToList(Iterable<K> iterable) {
+    return iterableToList(iterable, null);
+  }
+
+  /**
+   * @param iterable {@link Iterable} whose contents are to be put into a {@link List}
+   * @param comparator {@link Comparator} defining the sort order of the returned {@link List}
+   * @return a {@link List} with the objects one gets by iterating over the given {@link Iterable},
+   *         sorted according to the given {@link Comparator}
+   */
+  public static <K> List<K> iterableToList(Iterable<K> iterable, Comparator<K> comparator) {
+    if (iterable == null) {
+      throw new IllegalArgumentException("iterable is null");
+    }
+    List<K> list;
+    if (iterable instanceof Collection) {
+      if (iterable instanceof List) {
+        list = (List<K>) iterable;
+      } else {
+        Collection<K> collection = (Collection<K>) iterable;
+        list = new ArrayList<K>(collection.size());
+        list.addAll(collection);
+      }
+    } else {
+      list = new ArrayList<K>();
+      for (K item : iterable) {
+        list.add(item);
+      }
+    }
+    if (comparator != null) {
+      Collections.sort(list, comparator);
+    }
+    return list;
+  }
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/Pair.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/Pair.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/Pair.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/Pair.java Fri May  9 14:35:12 2008
@@ -0,0 +1,72 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+/**
+ * A simple (ordered) pair of two objects. Elements may be null.
+ */
+public class Pair<A, B> {
+
+  private final A first;
+  private final B second;
+
+  public Pair(A first, B second) {
+    this.first = first;
+    this.second = second;
+  }
+
+  public A getFirst() {
+    return first;
+  }
+
+  public B getSecond() {
+    return second;
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (!(obj instanceof Pair)) {
+      return false;
+    }
+    Pair<?, ?> otherPair = (Pair<?, ?>) obj;
+    return isEqualOrNulls(first, otherPair.first) &&
+           isEqualOrNulls(second, otherPair.second);
+  }
+
+  static boolean isEqualOrNulls(Object obj1, Object obj2) {
+    return obj1 == null ? obj2 == null : obj1.equals(obj2);
+  }
+
+  @Override
+  public int hashCode() {
+    int firstHash = hashCodeNull(first);
+    // Flip top and bottom 16 bits; this makes the hash function probably different
+    // for (a,b) versus (b,a)
+    return (firstHash >>> 16 | firstHash << 16) ^ hashCodeNull(second);
+  }
+
+  static int hashCodeNull(Object obj) {
+    return obj == null ? 0 : obj.hashCode();
+  }
+
+  @Override
+  public String toString() {
+    return '(' + String.valueOf(first) + ',' + String.valueOf(second) + ')';
+  }
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RandomUtils.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RandomUtils.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RandomUtils.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RandomUtils.java Fri May  9 14:35:12 2008
@@ -0,0 +1,42 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.util.Random;
+
+/**
+ * <p>The source of random stuff for the whole project. This lets us make all randomness in
+ * the project predictable, if desired, for when we run unit tests, which should be repeatable.</p>
+ */
+public final class RandomUtils {
+
+  private static final long STANDARD_SEED = 0xCAFEBABEL;
+  private static boolean testSeed;
+
+  private RandomUtils() {
+  }
+
+  public static void useTestSeed() {
+    testSeed = true;
+  }
+
+  public static Random getRandom() {
+    return testSeed ? new Random(STANDARD_SEED) : new Random();
+  }
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverage.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverage.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverage.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverage.java Fri May  9 14:35:12 2008
@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+/**
+ * <p>Interface for classes that can keep track of a running average of a series of numbers.
+ * One can add to or remove from the series, as well as update a datum in the series.
+ * The class does not actually keep track of the series of values, just its running average,
+ * so it doesn't even matter if you remove/change a value that wasn't added.</p>
+ */
+public interface RunningAverage {
+
+  /**
+   * @param datum new item to add to the running average
+   * @throws IllegalArgumentException if datum is {@link Double#NaN}
+   */
+  void addDatum(double datum);
+
+  /**
+   * @param datum item to remove to the running average
+   * @throws IllegalArgumentException if datum is {@link Double#NaN}
+   * @throws IllegalStateException if count is 0
+   */
+  void removeDatum(double datum);
+
+  /**
+   * @param delta amount by which to change a datum in the running average
+   * @throws IllegalArgumentException if delta is {@link Double#NaN}
+   * @throws IllegalStateException if count is 0
+   */
+  void changeDatum(double delta);
+
+  int getCount();
+
+  double getAverage();
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverageAndStdDev.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverageAndStdDev.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverageAndStdDev.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverageAndStdDev.java Fri May  9 14:35:12 2008
@@ -0,0 +1,30 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+/**
+ * <p>Extends {@link RunningAverage} by adding standard deviation too.</p>
+ */
+public interface RunningAverageAndStdDev extends RunningAverage {
+
+  /**
+   * @return standard deviation of data
+   */
+  double getStandardDeviation();
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SoftCache.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SoftCache.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SoftCache.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SoftCache.java Fri May  9 14:35:12 2008
@@ -0,0 +1,130 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+
+/**
+ * <p>An efficient Map-like class which caches values for keys. Values are not "put" into a {@link SoftCache};
+ * instead the caller supplies the instance with an implementation of {@link Retriever} which can load the
+ * value for a given key.</p>
+ *
+ * <p>This class is a bit misnamed at this point since it no longer uses <code>SoftReference</code> internally,
+ * but hey.</p>
+ *
+ * <p>The cache does not support <code>null</code> values or keys.</p>
+ *
+ * <p>Thanks to Amila Jayasooriya for helping evaluate performance of the rewrite of this class, as part of a
+ * Google Summer of Code 2007 project.</p>
+ */
+public final class SoftCache<K, V> {
+
+  private final FastMap<K, V> cache;
+  private final Retriever<? super K, ? extends V> retriever;
+
+  /**
+   * <p>Creates a new cache based on the given {@link Retriever}.</p>
+   *
+   * @param retriever object which can retrieve values for keys
+   */
+  public SoftCache(Retriever<? super K, ? extends V> retriever) {
+    this(retriever, FastMap.NO_MAX_SIZE);
+  }
+
+  /**
+   * <p>Creates a new cache based on the given {@link Retriever} and with given maximum size.</p>
+   *
+   * @param retriever object which can retrieve values for keys
+   * @param maxEntries maximum number of entries the cache will store before evicting some
+   */
+  public SoftCache(Retriever<? super K, ? extends V> retriever, int maxEntries) {
+    if (retriever == null) {
+      throw new IllegalArgumentException("retriever is null");
+    }
+    if (maxEntries < 1) {
+      throw new IllegalArgumentException("maxEntries must be at least 1");
+    }
+    cache = new FastMap<K, V>(11, maxEntries);
+    this.retriever = retriever;
+  }
+
+  /**
+   * <p>Returns cached value for a key. If it does not exist, it is loaded using a {@link Retriever}.</p>
+   *
+   * @param key cache key
+   * @return value for that key
+   * @throws TasteException if an exception occurs while retrieving a new cached value
+   */
+  public V get(K key) throws TasteException {
+    V value;
+    synchronized (cache) {
+      value = cache.get(key);
+    }
+    if (value == null) {
+      return getAndCacheValue(key);
+    }
+    return value;
+  }
+
+  /**
+   * <p>Uncaches any existing value for a given key.</p>
+   *
+   * @param key cache key
+   */
+  public void remove(K key) {
+    synchronized (cache) {
+      cache.remove(key);
+    }
+  }
+
+  /**
+   * <p>Clears the cache.</p>
+   */
+  public void clear() {
+    synchronized (cache) {
+      cache.clear();
+    }
+  }
+
+  private V getAndCacheValue(K key) throws TasteException {
+    V value = retriever.getValue(key);
+    synchronized (cache) {
+      cache.put(key, value);
+    }
+    return value;
+  }
+
+  @Override
+  public String toString() {
+    return "SoftCache[retriever:" + retriever + ']';
+  }
+
+  /**
+   * <p>Implementations can retrieve a value for a given key.</p>
+   */
+  public static interface Retriever<KK, VV> {
+
+    /**
+     * @param key key for which a value should be retrieved
+     * @return value for key
+     * @throws TasteException if an error occurs while retrieving the value
+     */
+    VV getValue(KK key) throws TasteException;
+  }
+
+}

Propchange: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SoftCache.java
------------------------------------------------------------------------------
    svn:executable = *

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/WeightedRunningAverage.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/WeightedRunningAverage.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/WeightedRunningAverage.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/WeightedRunningAverage.java Fri May  9 14:35:12 2008
@@ -0,0 +1,92 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.io.Serializable;
+
+public final class WeightedRunningAverage implements RunningAverage, Serializable {
+
+  private double totalWeight;
+  private double average;
+
+  public WeightedRunningAverage() {
+    totalWeight = 0.0;
+    average = Double.NaN;
+  }
+
+  public void addDatum(double datum) {
+    addDatum(datum, 1.0);
+  }
+
+  public void addDatum(double datum, double weight) {
+    double oldTotalWeight = totalWeight;
+    totalWeight += weight;
+    if (oldTotalWeight <= 0.0) {
+      average = datum * weight;
+    } else {
+      average = average * (oldTotalWeight / totalWeight) + datum / totalWeight;
+    }
+  }
+
+  public void removeDatum(double datum) {
+    removeDatum(datum, 1.0);
+  }
+
+  public void removeDatum(double datum, double weight) {
+    double oldTotalWeight = totalWeight;
+    totalWeight -= weight;
+    if (totalWeight <= 0.0) {
+      average = Double.NaN;
+      totalWeight = 0.0;
+    } else {
+      average = average * (oldTotalWeight / totalWeight) - datum / totalWeight;
+    }
+  }
+
+  public void changeDatum(double delta) {
+    changeDatum(delta, 1.0);
+  }
+
+  public void changeDatum(double delta, double weight) {
+    if (weight > totalWeight) {
+      throw new IllegalArgumentException();
+    }
+    average += (delta * weight) / totalWeight;
+  }
+
+  public double getTotalWeight() {
+    return totalWeight;
+  }
+
+  /**
+   * @return {@link #getTotalWeight()}
+   */
+  public int getCount() {
+    return (int) totalWeight;
+  }
+
+  public double getAverage() {
+    return average;
+  }
+
+  @Override
+  public String toString() {
+    return String.valueOf(average);
+  }
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/AveragingPreferenceInferrer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/AveragingPreferenceInferrer.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/AveragingPreferenceInferrer.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/AveragingPreferenceInferrer.java Fri May  9 14:35:12 2008
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.correlation;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.correlation.PreferenceInferrer;
+import org.apache.mahout.cf.taste.impl.common.FullRunningAverage;
+import org.apache.mahout.cf.taste.impl.common.RunningAverage;
+import org.apache.mahout.cf.taste.impl.common.SoftCache;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.Item;
+import org.apache.mahout.cf.taste.model.Preference;
+import org.apache.mahout.cf.taste.model.User;
+
+/**
+ * <p>Implementations of this interface compute an inferred preference for a {@link User} and an {@link Item}
+ * that the user has not expressed any preference for. This might be an average of other preferences scores
+ * from that user, for example. This technique is sometimes called "default voting".</p>
+ */
+public final class AveragingPreferenceInferrer implements PreferenceInferrer {
+
+  private static final SoftCache.Retriever<User, Double> RETRIEVER = new PrefRetriever();
+
+  private final SoftCache<User, Double> averagePreferenceValue;
+
+  public AveragingPreferenceInferrer(DataModel dataModel) throws TasteException {
+    averagePreferenceValue = new SoftCache<User, Double>(RETRIEVER, dataModel.getNumUsers());
+    refresh();
+  }
+
+  public double inferPreference(User user, Item item) throws TasteException {
+    if (user == null || item == null) {
+      throw new IllegalArgumentException("user or item is null");
+    }
+    return averagePreferenceValue.get(user);
+  }
+
+  public void refresh() {
+    averagePreferenceValue.clear();
+  }
+
+  private static final class PrefRetriever implements SoftCache.Retriever<User, Double> {
+
+    public Double getValue(User key) {
+      RunningAverage average = new FullRunningAverage();
+      Preference[] prefs = key.getPreferencesAsArray();
+      if (prefs.length == 0) {
+        return 0.0;
+      }
+      for (int i = 0; i < prefs.length; i++) {
+        average.addDatum(prefs[i].getValue());
+      }
+      return average.getAverage();
+    }
+  }
+
+  @Override
+  public String toString() {
+    return "AveragingPreferenceInferrer";
+  }
+
+}