You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2008/09/12 17:06:38 UTC

svn commit: r694706 - in /lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl: eval/GenericRecommenderIRStatsEvaluator.java similarity/PearsonCorrelationSimilarity.java

Author: srowen
Date: Fri Sep 12 08:06:38 2008
New Revision: 694706

URL: http://svn.apache.org/viewvc?rev=694706&view=rev
Log:
Add functionality to compute reasonable relevance threshold in IRStatsEvaluator, and fixed a typo in PearsonCorrelationSimilarity javadoc

Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java?rev=694706&r1=694705&r2=694706&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java Fri Sep 12 08:06:38 2008
@@ -24,6 +24,8 @@
 import org.apache.mahout.cf.taste.impl.common.FullRunningAverage;
 import org.apache.mahout.cf.taste.impl.common.RandomUtils;
 import org.apache.mahout.cf.taste.impl.common.RunningAverage;
+import org.apache.mahout.cf.taste.impl.common.RunningAverageAndStdDev;
+import org.apache.mahout.cf.taste.impl.common.FullRunningAverageAndStdDev;
 import org.apache.mahout.cf.taste.impl.model.GenericDataModel;
 import org.apache.mahout.cf.taste.impl.model.GenericUser;
 import org.apache.mahout.cf.taste.model.DataModel;
@@ -54,6 +56,13 @@
 
   private static final Logger log = LoggerFactory.getLogger(GenericRecommenderIRStatsEvaluator.class);
 
+  /**
+   * Pass as "relevanceThreshold" argument to
+   * {@link #evaluate(RecommenderBuilder, DataModel, Rescorer, int, double, double)} to have it attempt
+   * to compute a reasonable threshold. Note that this will impact performance.
+   */
+  public static final double CHOOSE_THRESHOLD = Double.NaN;
+
   private final Random random;
 
   public GenericRecommenderIRStatsEvaluator() {
@@ -92,9 +101,15 @@
         Object id = user.getID();
         Collection<Item> relevantItems = new HashSet<Item>(at);
         Preference[] prefs = user.getPreferencesAsArray();
+        double theRelevanceThreshold;
+        if (Double.isNaN(relevanceThreshold)) {
+          theRelevanceThreshold = computeThreshold(prefs);
+        } else {
+          theRelevanceThreshold = relevanceThreshold;
+        }
         for (int i = 0; i < prefs.length; i++) {
           Preference pref = prefs[i];
-          if (pref.getValue() >= relevanceThreshold) {
+          if (pref.getValue() >= theRelevanceThreshold) {
             relevantItems.add(pref.getItem());
           }
         }
@@ -167,4 +182,16 @@
     }
   }
 
+  private static double computeThreshold(Preference[] prefs) {
+    if (prefs.length < 2) {
+      // Not enough data points -- return a threshold that allows everything
+      return Double.NEGATIVE_INFINITY;
+    }
+    RunningAverageAndStdDev stdDev = new FullRunningAverageAndStdDev();
+    for (int i = 0; i < prefs.length; i++) {
+      stdDev.addDatum(prefs[i].getValue());
+    }
+    return stdDev.getAverage() + stdDev.getStandardDeviation();
+  }
+
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java?rev=694706&r1=694705&r2=694706&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java Fri Sep 12 08:06:38 2008
@@ -20,7 +20,6 @@
 import org.apache.mahout.cf.taste.common.TasteException;
 import org.apache.mahout.cf.taste.common.Weighting;
 import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.model.Item;
 import org.apache.mahout.cf.taste.model.User;
 
 /**
@@ -38,8 +37,6 @@
  *
  * <p><code>sumXY / sqrt(sumX2 * sumY2)</code></p>
  *
- * <p>where <code>size</code> is the number of {@link Item}s in the {@link DataModel}.</p>
- *
  * <p>Note that this correlation "centers" its data, shifts the user's preference values so that
  * each of their means is 0. This is necessary to achieve expected behavior on all data sets.</p>
  *