You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2008/08/21 07:16:06 UTC
svn commit: r687563 - in /lucene/mahout/trunk/core/src:
main/java/org/apache/mahout/cf/taste/eval/
main/java/org/apache/mahout/cf/taste/impl/eval/
test/java/org/apache/mahout/cf/taste/impl/eval/
Author: srowen
Date: Wed Aug 20 22:16:06 2008
New Revision: 687563
URL: http://svn.apache.org/viewvc?rev=687563&view=rev
Log:
Can now use a Rescorer with RecommenderIRStatsEvaluator. Added fall-out IR statistic.
Modified:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/IRStatistics.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderIRStatsEvaluator.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/IRStatisticsImpl.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluatorImplTest.java
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/IRStatistics.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/IRStatistics.java?rev=687563&r1=687562&r2=687563&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/IRStatistics.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/IRStatistics.java Wed Aug 20 22:16:06 2008
@@ -36,6 +36,11 @@
double getRecall();
/**
+ * <p>See <a href="http://en.wikipedia.org/wiki/Information_retrieval#Fall-Out">Fall-Out</a>.</p>
+ */
+ double getFallOut();
+
+ /**
* <p>See <a href="http://en.wikipedia.org/wiki/Information_retrieval#F-measure">F-measure</a>.</p>
*/
double getF1Measure();
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderIRStatsEvaluator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderIRStatsEvaluator.java?rev=687563&r1=687562&r2=687563&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderIRStatsEvaluator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderIRStatsEvaluator.java Wed Aug 20 22:16:06 2008
@@ -19,6 +19,8 @@
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.Item;
+import org.apache.mahout.cf.taste.recommender.Rescorer;
/**
* <p>Implementations collect information retrieval-related statistics on a
@@ -30,17 +32,20 @@
public interface RecommenderIRStatsEvaluator {
/**
- * @param recommenderBuilder object that can build a {@link org.apache.mahout.cf.taste.recommender.Recommender} to test
+ * @param recommenderBuilder object that can build a
+ * {@link org.apache.mahout.cf.taste.recommender.Recommender} to test
* @param dataModel dataset to test on
+ * @param rescorer, if any, to use when computing recommendations
* @param at as in, "precision at 5". The number of recommendations to consider when evaluating
* precision, etc.
- * @param relevanceThreshold {@link org.apache.mahout.cf.taste.model.Item}s whose preference value is at least
+ * @param relevanceThreshold {@link Item}s whose preference value is at least
* this value are considered "relevant" for the purposes of computations
* @return {@link IRStatistics} with resulting precision, recall, etc.
* @throws TasteException if an error occurs while accessing the {@link DataModel}
*/
IRStatistics evaluate(RecommenderBuilder recommenderBuilder,
DataModel dataModel,
+ Rescorer<Item> rescorer,
int at,
double relevanceThreshold,
double evaluationPercentage) throws TasteException;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java?rev=687563&r1=687562&r2=687563&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java Wed Aug 20 22:16:06 2008
@@ -32,6 +32,7 @@
import org.apache.mahout.cf.taste.model.User;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.Recommender;
+import org.apache.mahout.cf.taste.recommender.Rescorer;
import java.util.ArrayList;
import java.util.Collection;
@@ -41,11 +42,11 @@
import java.util.List;
/**
- * <p>For each {@link org.apache.mahout.cf.taste.model.User}, these implementation determine the top <code>n</code> preferences,
+ * <p>For each {@link User}, these implementation determine the top <code>n</code> preferences,
* then evaluate the IR statistics based on a {@link DataModel} that does not have these values.
* This number <code>n</code> is the "at" value, as in "precision at 5". For example, this would mean precision
* evaluated by removing the top 5 preferences for a {@link User} and then finding the percentage of those 5
- * {@link org.apache.mahout.cf.taste.model.Item}s included in the top 5 recommendations for that user.</p>
+ * {@link Item}s included in the top 5 recommendations for that user.</p>
*/
public final class GenericRecommenderIRStatsEvaluator implements RecommenderIRStatsEvaluator {
@@ -57,6 +58,7 @@
public IRStatistics evaluate(RecommenderBuilder recommenderBuilder,
DataModel dataModel,
+ Rescorer<Item> rescorer,
int at,
double relevanceThreshold,
double evaluationPercentage) throws TasteException {
@@ -79,6 +81,7 @@
RunningAverage precision = new FullRunningAverage();
RunningAverage recall = new FullRunningAverage();
+ RunningAverage fallOut = new FullRunningAverage();
for (User user : dataModel.getUsers()) {
if (random.nextDouble() < evaluationPercentage) {
Object id = user.getID();
@@ -106,21 +109,27 @@
}
int intersectionSize = 0;
- for (RecommendedItem recommendedItem : recommender.recommend(id, at)) {
+ for (RecommendedItem recommendedItem : recommender.recommend(id, at, rescorer)) {
if (relevantItems.contains(recommendedItem.getItem())) {
intersectionSize++;
}
}
precision.addDatum((double) intersectionSize / (double) at);
recall.addDatum((double) intersectionSize / (double) numRelevantItems);
+ if (numRelevantItems < prefs.length) {
+ fallOut.addDatum((double) (at - intersectionSize) / (double) (prefs.length - numRelevantItems));
+ }
}
}
}
- return new IRStatisticsImpl(precision.getAverage(), recall.getAverage());
+ return new IRStatisticsImpl(precision.getAverage(), recall.getAverage(), fallOut.getAverage());
}
- private void processOtherUser(Object id, Collection<Item> relevantItems, List<User> trainingUsers, User user2) {
+ private void processOtherUser(Object id,
+ Collection<Item> relevantItems,
+ Collection<User> trainingUsers,
+ User user2) {
if (id.equals(user2.getID())) {
List<Preference> trainingPrefs = new ArrayList<Preference>();
Preference[] prefs2 = user2.getPreferencesAsArray();
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/IRStatisticsImpl.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/IRStatisticsImpl.java?rev=687563&r1=687562&r2=687563&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/IRStatisticsImpl.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/IRStatisticsImpl.java Wed Aug 20 22:16:06 2008
@@ -25,16 +25,21 @@
private final double precision;
private final double recall;
+ private final double fallOut;
- IRStatisticsImpl(double precision, double recall) {
+ IRStatisticsImpl(double precision, double recall, double fallOut) {
if (precision < 0.0 || precision > 1.0) {
throw new IllegalArgumentException("Illegal precision: " + precision);
}
if (recall < 0.0 || recall > 1.0) {
throw new IllegalArgumentException("Illegal recall: " + recall);
}
+ if (fallOut < 0.0 || fallOut > 1.0) {
+ throw new IllegalArgumentException("Illegal fallOut: " + fallOut);
+ }
this.precision = precision;
this.recall = recall;
+ this.fallOut = fallOut;
}
public double getPrecision() {
@@ -45,6 +50,10 @@
return recall;
}
+ public double getFallOut() {
+ return fallOut;
+ }
+
public double getF1Measure() {
return getFNMeasure(1.0);
}
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluatorImplTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluatorImplTest.java?rev=687563&r1=687562&r2=687563&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluatorImplTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluatorImplTest.java Wed Aug 20 22:16:06 2008
@@ -36,7 +36,7 @@
}
};
RecommenderIRStatsEvaluator evaluator = new GenericRecommenderIRStatsEvaluator();
- IRStatistics stats = evaluator.evaluate(builder, model, 5, 0.2, 1.0);
+ IRStatistics stats = evaluator.evaluate(builder, model, null, 5, 0.2, 1.0);
assertNotNull(stats);
assertEquals(0.2, stats.getPrecision(), EPSILON);
assertEquals(1.0, stats.getRecall(), EPSILON);