You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2009/11/08 14:14:12 UTC

svn commit: r833869 - in /lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl: common/ eval/

Author: srowen
Date: Sun Nov  8 13:14:11 2009
New Revision: 833869

URL: http://svn.apache.org/viewvc?rev=833869&view=rev
Log:
Make RecommenderEvaluators multi-threaded for speed

Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/CompactRunningAverage.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/CompactRunningAverageAndStdDev.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverage.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverageAndStdDev.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/WeightedRunningAverage.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/AbstractDifferenceRecommenderEvaluator.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/AverageAbsoluteDifferenceRecommenderEvaluator.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/RMSRecommenderEvaluator.java

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/CompactRunningAverage.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/CompactRunningAverage.java?rev=833869&r1=833868&r2=833869&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/CompactRunningAverage.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/CompactRunningAverage.java Sun Nov  8 13:14:11 2009
@@ -35,7 +35,7 @@
   }
 
   @Override
-  public void addDatum(double datum) {
+  public synchronized void addDatum(double datum) {
     if ((int) count < 65535) { // = 65535 = 2^16 - 1
       if ((int) ++count == 1) {
         average = (float) datum;
@@ -47,7 +47,7 @@
   }
 
   @Override
-  public void removeDatum(double datum) {
+  public synchronized void removeDatum(double datum) {
     if ((int) count == 0) {
       throw new IllegalStateException();
     }
@@ -60,7 +60,7 @@
   }
 
   @Override
-  public void changeDatum(double delta) {
+  public synchronized void changeDatum(double delta) {
     if ((int) count == 0) {
       throw new IllegalStateException();
     }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/CompactRunningAverageAndStdDev.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/CompactRunningAverageAndStdDev.java?rev=833869&r1=833868&r2=833869&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/CompactRunningAverageAndStdDev.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/CompactRunningAverageAndStdDev.java Sun Nov  8 13:14:11 2009
@@ -33,14 +33,14 @@
   }
 
   @Override
-  public void addDatum(double datum) {
+  public synchronized void addDatum(double datum) {
     super.addDatum(datum);
     sumX2 += (float) (datum * datum);
     recomputeStdDev();
   }
 
   @Override
-  public void removeDatum(double datum) {
+  public synchronized void removeDatum(double datum) {
     super.removeDatum(datum);
     sumX2 -= (float) (datum * datum);
     recomputeStdDev();
@@ -54,7 +54,7 @@
     throw new UnsupportedOperationException();
   }
 
-  private void recomputeStdDev() {
+  private synchronized void recomputeStdDev() {
     int count = getCount();
     if (count > 1) {
       double average = getAverage();

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverage.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverage.java?rev=833869&r1=833868&r2=833869&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverage.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverage.java Sun Nov  8 13:14:11 2009
@@ -36,7 +36,7 @@
 
   /** @param datum new item to add to the running average */
   @Override
-  public void addDatum(double datum) {
+  public synchronized void addDatum(double datum) {
     if (++count == 1) {
       average = datum;
     } else {
@@ -49,7 +49,7 @@
    * @throws IllegalStateException if count is 0
    */
   @Override
-  public void removeDatum(double datum) {
+  public synchronized void removeDatum(double datum) {
     if (count == 0) {
       throw new IllegalStateException();
     }
@@ -65,7 +65,7 @@
    * @throws IllegalStateException if count is 0
    */
   @Override
-  public void changeDatum(double delta) {
+  public synchronized void changeDatum(double delta) {
     if (count == 0) {
       throw new IllegalStateException();
     }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverageAndStdDev.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverageAndStdDev.java?rev=833869&r1=833868&r2=833869&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverageAndStdDev.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverageAndStdDev.java Sun Nov  8 13:14:11 2009
@@ -33,14 +33,14 @@
   }
 
   @Override
-  public void addDatum(double datum) {
+  public synchronized void addDatum(double datum) {
     super.addDatum(datum);
     sumX2 += datum * datum;
     recomputeStdDev();
   }
 
   @Override
-  public void removeDatum(double datum) {
+  public synchronized void removeDatum(double datum) {
     super.removeDatum(datum);
     sumX2 -= datum * datum;
     recomputeStdDev();
@@ -54,7 +54,7 @@
     throw new UnsupportedOperationException();
   }
 
-  private void recomputeStdDev() {
+  private synchronized void recomputeStdDev() {
     int count = getCount();
     if (count > 1) {
       double average = getAverage();

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/WeightedRunningAverage.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/WeightedRunningAverage.java?rev=833869&r1=833868&r2=833869&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/WeightedRunningAverage.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/WeightedRunningAverage.java Sun Nov  8 13:14:11 2009
@@ -30,11 +30,11 @@
   }
 
   @Override
-  public void addDatum(double datum) {
+  public synchronized void addDatum(double datum) {
     addDatum(datum, 1.0);
   }
 
-  public void addDatum(double datum, double weight) {
+  public synchronized void addDatum(double datum, double weight) {
     double oldTotalWeight = totalWeight;
     totalWeight += weight;
     if (oldTotalWeight <= 0.0) {
@@ -45,11 +45,11 @@
   }
 
   @Override
-  public void removeDatum(double datum) {
+  public synchronized void removeDatum(double datum) {
     removeDatum(datum, 1.0);
   }
 
-  public void removeDatum(double datum, double weight) {
+  public synchronized void removeDatum(double datum, double weight) {
     double oldTotalWeight = totalWeight;
     totalWeight -= weight;
     if (totalWeight <= 0.0) {
@@ -61,11 +61,11 @@
   }
 
   @Override
-  public void changeDatum(double delta) {
+  public synchronized void changeDatum(double delta) {
     changeDatum(delta, 1.0);
   }
 
-  public void changeDatum(double delta, double weight) {
+  public synchronized void changeDatum(double delta, double weight) {
     if (weight > totalWeight) {
       throw new IllegalArgumentException();
     }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/AbstractDifferenceRecommenderEvaluator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/AbstractDifferenceRecommenderEvaluator.java?rev=833869&r1=833868&r2=833869&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/AbstractDifferenceRecommenderEvaluator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/AbstractDifferenceRecommenderEvaluator.java Sun Nov  8 13:14:11 2009
@@ -17,13 +17,14 @@
 
 package org.apache.mahout.cf.taste.impl.eval;
 
+import org.apache.mahout.cf.taste.common.NoSuchItemException;
+import org.apache.mahout.cf.taste.common.NoSuchUserException;
 import org.apache.mahout.cf.taste.common.TasteException;
 import org.apache.mahout.cf.taste.eval.DataModelBuilder;
 import org.apache.mahout.cf.taste.eval.RecommenderBuilder;
 import org.apache.mahout.cf.taste.eval.RecommenderEvaluator;
 import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
 import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
-import org.apache.mahout.common.RandomUtils;
 import org.apache.mahout.cf.taste.impl.model.GenericDataModel;
 import org.apache.mahout.cf.taste.impl.model.GenericPreference;
 import org.apache.mahout.cf.taste.impl.model.GenericUserPreferenceArray;
@@ -31,12 +32,20 @@
 import org.apache.mahout.cf.taste.model.Preference;
 import org.apache.mahout.cf.taste.model.PreferenceArray;
 import org.apache.mahout.cf.taste.recommender.Recommender;
+import org.apache.mahout.common.RandomUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.List;
+import java.util.Map;
 import java.util.Random;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
 
 /**
  * Abstract superclass of a couple implementations, providing shared functionality.
@@ -149,7 +158,7 @@
     }
   }
 
-  protected final float capEstimatedPreference(float estimate) {
+  private float capEstimatedPreference(float estimate) {
     if (estimate > maxPreference) {
       return maxPreference;
     }
@@ -159,7 +168,79 @@
     return estimate;
   }
 
-  abstract double getEvaluation(FastByIDMap<PreferenceArray> testUserPrefs, Recommender recommender)
-      throws TasteException;
+  private double getEvaluation(FastByIDMap<PreferenceArray> testUserPrefs, Recommender recommender)
+      throws TasteException {
+    reset();
+    Collection<Callable<Object>> estimateCallables = new ArrayList<Callable<Object>>();
+    for (Map.Entry<Long, PreferenceArray> entry : testUserPrefs.entrySet()) {
+      estimateCallables.add(new PreferenceEstimateCallable(recommender, entry.getKey(), entry.getValue()));
+    }
+    log.info("Beginning evaluation of {} users", estimateCallables.size());
+    execute(estimateCallables);
+    return computeFinalEvaluation();
+  }
+
+  static void execute(Collection<Callable<Object>> callables) throws TasteException {
+    ExecutorService executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
+    try {
+      List<Future<Object>> futures = executor.invokeAll(callables);
+      int count = 0;
+      for (Future<Object> future : futures) {
+        future.get();
+        if (count++ % 1000 == 0) {
+          log.info("Finished {}", count);
+        }
+      }
+    } catch (InterruptedException ie) {
+      throw new TasteException(ie);
+    } catch (ExecutionException ee) {
+      throw new TasteException(ee.getCause());
+    }
+    executor.shutdown();
+  }
+
+  abstract void reset();
+
+  abstract void processOneEstimate(float estimatedPreference, Preference realPref);
+
+  abstract double computeFinalEvaluation();
+
+
+  private class PreferenceEstimateCallable implements Callable<Object> {
+
+    private final Recommender recommender;
+    private final long testUserID;
+    private final PreferenceArray prefs;
+
+    private PreferenceEstimateCallable(Recommender recommender,
+                                       long testUserID,
+                                       PreferenceArray prefs) {
+      this.recommender = recommender;
+      this.testUserID = testUserID;
+      this.prefs = prefs;
+    }
+
+    @Override
+    public Object call() throws TasteException {
+      for (Preference realPref : prefs) {
+        float estimatedPreference = Float.NaN;
+        try {
+          estimatedPreference = recommender.estimatePreference(testUserID, realPref.getItemID());
+        } catch (NoSuchUserException nsue) {
+          // It's possible that an item exists in the test data but not training data in which case
+          // NSEE will be thrown. Just ignore it and move on.
+          log.info("User exists in test data but not training data: {}", testUserID);
+        } catch (NoSuchItemException nsie) {
+          log.info("Item exists in test data but not training data: {}", realPref.getItemID());
+        }
+        if (!Float.isNaN(estimatedPreference)) {
+          estimatedPreference = capEstimatedPreference(estimatedPreference);
+          processOneEstimate(estimatedPreference, realPref);
+        }
+      }
+      return null;
+    }
+
+  }
 
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/AverageAbsoluteDifferenceRecommenderEvaluator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/AverageAbsoluteDifferenceRecommenderEvaluator.java?rev=833869&r1=833868&r2=833869&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/AverageAbsoluteDifferenceRecommenderEvaluator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/AverageAbsoluteDifferenceRecommenderEvaluator.java Sun Nov  8 13:14:11 2009
@@ -26,8 +26,6 @@
 import org.apache.mahout.cf.taste.model.Preference;
 import org.apache.mahout.cf.taste.model.PreferenceArray;
 import org.apache.mahout.cf.taste.recommender.Recommender;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import java.util.Map;
 
@@ -39,34 +37,20 @@
  */
 public final class AverageAbsoluteDifferenceRecommenderEvaluator extends AbstractDifferenceRecommenderEvaluator {
 
-  private static final Logger log = LoggerFactory.getLogger(AverageAbsoluteDifferenceRecommenderEvaluator.class);
+  private RunningAverage average;
 
   @Override
-  double getEvaluation(FastByIDMap<PreferenceArray> testUserPrefs, Recommender recommender) throws TasteException {
-    RunningAverage average = new FullRunningAverage();
-    int count = 0;
-    for (Map.Entry<Long, PreferenceArray> entry : testUserPrefs.entrySet()) {
-      for (Preference realPref : entry.getValue()) {
-        long testUserID = entry.getKey();
-        try {
-          float estimatedPreference =
-              recommender.estimatePreference(testUserID, realPref.getItemID());
-          if (!Float.isNaN(estimatedPreference)) {
-            estimatedPreference = capEstimatedPreference(estimatedPreference);
-            average.addDatum(Math.abs(realPref.getValue() - estimatedPreference));
-          }
-        } catch (NoSuchUserException nsue) {
-          // It's possible that an item exists in the test data but not training data in which case
-          // NSEE will be thrown. Just ignore it and move on.
-          log.info("User exists in test data but not training data: {}", testUserID);
-        } catch (NoSuchItemException nsie) {
-          log.info("Item exists in test data but not training data: {}", realPref.getItemID());
-        }
-        if (++count % 1000 == 0) {
-          log.info("Finished evaluation for {} prefs", count);
-        }
-      }
-    }
+  void reset() {
+    average = new FullRunningAverage();
+  }
+
+  @Override
+  void processOneEstimate(float estimatedPreference, Preference realPref) {
+    average.addDatum(Math.abs(realPref.getValue() - estimatedPreference));
+  }
+
+  @Override
+  double computeFinalEvaluation() {
     return average.getAverage();
   }
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/RMSRecommenderEvaluator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/RMSRecommenderEvaluator.java?rev=833869&r1=833868&r2=833869&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/RMSRecommenderEvaluator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/RMSRecommenderEvaluator.java Sun Nov  8 13:14:11 2009
@@ -26,8 +26,6 @@
 import org.apache.mahout.cf.taste.model.Preference;
 import org.apache.mahout.cf.taste.model.PreferenceArray;
 import org.apache.mahout.cf.taste.recommender.Recommender;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import java.util.Map;
 
@@ -38,35 +36,21 @@
  */
 public final class RMSRecommenderEvaluator extends AbstractDifferenceRecommenderEvaluator {
 
-  private static final Logger log = LoggerFactory.getLogger(RMSRecommenderEvaluator.class);
+  private RunningAverage average;
 
   @Override
-  double getEvaluation(FastByIDMap<PreferenceArray> testUserPrefs, Recommender recommender) throws TasteException {
-    RunningAverage average = new FullRunningAverage();
-    int count = 0;
-    for (Map.Entry<Long, PreferenceArray> entry : testUserPrefs.entrySet()) {
-      for (Preference realPref : entry.getValue()) {
-        long testUserID = entry.getKey();
-        try {
-          float estimatedPreference =
-              recommender.estimatePreference(testUserID, realPref.getItemID());
-          if (!Float.isNaN(estimatedPreference)) {
-            estimatedPreference = capEstimatedPreference(estimatedPreference);            
-            double diff = realPref.getValue() - estimatedPreference;
-            average.addDatum(diff * diff);
-          }
-        } catch (NoSuchUserException nsee) {
-          // It's possible that an item exists in the test data but not training data in which case
-          // NSEE will be thrown. Just ignore it and move on.
-          log.info("User exists in test data but not training data: {}", testUserID);
-        } catch (NoSuchItemException nsie) {
-          log.info("Item exists in test data but not training data: {}", realPref.getItemID());
-        }
-        if (++count % 1000 == 0) {
-          log.info("Finished evaluation for {} prefs", count);
-        }
-      }
-    }
+  void reset() {
+    average = new FullRunningAverage();
+  }
+
+  @Override
+  void processOneEstimate(float estimatedPreference, Preference realPref) {
+    double diff = realPref.getValue() - estimatedPreference;
+    average.addDatum(diff * diff);
+  }
+
+  @Override
+  double computeFinalEvaluation() {
     return Math.sqrt(average.getAverage());
   }