You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2008/09/12 17:06:38 UTC
svn commit: r694706 - in
/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl:
eval/GenericRecommenderIRStatsEvaluator.java
similarity/PearsonCorrelationSimilarity.java
Author: srowen
Date: Fri Sep 12 08:06:38 2008
New Revision: 694706
URL: http://svn.apache.org/viewvc?rev=694706&view=rev
Log:
Add functionality to compute reasonable relevance threshold in IRStatsEvaluator, and fixed a typo in PearsonCorrelationSimilarity javadoc
Modified:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java?rev=694706&r1=694705&r2=694706&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java Fri Sep 12 08:06:38 2008
@@ -24,6 +24,8 @@
import org.apache.mahout.cf.taste.impl.common.FullRunningAverage;
import org.apache.mahout.cf.taste.impl.common.RandomUtils;
import org.apache.mahout.cf.taste.impl.common.RunningAverage;
+import org.apache.mahout.cf.taste.impl.common.RunningAverageAndStdDev;
+import org.apache.mahout.cf.taste.impl.common.FullRunningAverageAndStdDev;
import org.apache.mahout.cf.taste.impl.model.GenericDataModel;
import org.apache.mahout.cf.taste.impl.model.GenericUser;
import org.apache.mahout.cf.taste.model.DataModel;
@@ -54,6 +56,13 @@
private static final Logger log = LoggerFactory.getLogger(GenericRecommenderIRStatsEvaluator.class);
+ /**
+ * Pass as "relevanceThreshold" argument to
+ * {@link #evaluate(RecommenderBuilder, DataModel, Rescorer, int, double, double)} to have it attempt
+ * to compute a reasonable threshold. Note that this will impact performance.
+ */
+ public static final double CHOOSE_THRESHOLD = Double.NaN;
+
private final Random random;
public GenericRecommenderIRStatsEvaluator() {
@@ -92,9 +101,15 @@
Object id = user.getID();
Collection<Item> relevantItems = new HashSet<Item>(at);
Preference[] prefs = user.getPreferencesAsArray();
+ double theRelevanceThreshold;
+ if (Double.isNaN(relevanceThreshold)) {
+ theRelevanceThreshold = computeThreshold(prefs);
+ } else {
+ theRelevanceThreshold = relevanceThreshold;
+ }
for (int i = 0; i < prefs.length; i++) {
Preference pref = prefs[i];
- if (pref.getValue() >= relevanceThreshold) {
+ if (pref.getValue() >= theRelevanceThreshold) {
relevantItems.add(pref.getItem());
}
}
@@ -167,4 +182,16 @@
}
}
+ private static double computeThreshold(Preference[] prefs) {
+ if (prefs.length < 2) {
+ // Not enough data points -- return a threshold that allows everything
+ return Double.NEGATIVE_INFINITY;
+ }
+ RunningAverageAndStdDev stdDev = new FullRunningAverageAndStdDev();
+ for (int i = 0; i < prefs.length; i++) {
+ stdDev.addDatum(prefs[i].getValue());
+ }
+ return stdDev.getAverage() + stdDev.getStandardDeviation();
+ }
+
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java?rev=694706&r1=694705&r2=694706&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java Fri Sep 12 08:06:38 2008
@@ -20,7 +20,6 @@
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.common.Weighting;
import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.model.Item;
import org.apache.mahout.cf.taste.model.User;
/**
@@ -38,8 +37,6 @@
*
* <p><code>sumXY / sqrt(sumX2 * sumY2)</code></p>
*
- * <p>where <code>size</code> is the number of {@link Item}s in the {@link DataModel}.</p>
- *
* <p>Note that this correlation "centers" its data, shifts the user's preference values so that
* each of their means is 0. This is necessary to achieve expected behavior on all data sets.</p>
*