You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ss...@apache.org on 2010/11/26 22:21:14 UTC

svn commit: r1039562 - in /mahout/trunk/core/src: main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java

Author: ssc
Date: Fri Nov 26 21:21:14 2010
New Revision: 1039562

URL: http://svn.apache.org/viewvc?rev=1039562&view=rev
Log:
MAHOUT-553 Unify ranking of boolean recommendations in distributed and non-distributed recommenders

Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java
    mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java?rev=1039562&r1=1039561&r2=1039562&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java Fri Nov 26 21:21:14 2010
@@ -121,32 +121,16 @@ public final class AggregateAndRecommend
   private void reduceBooleanData(VarLongWritable userID,
                                  Iterable<PrefAndSimilarityColumnWritable> values,
                                  Context context) throws IOException, InterruptedException {
-
     /* having boolean data, each estimated preference can only be 1,
-     * so the computation is much simpler */
+     * however we can't use this to rank the recommended items,
+     * so we use the sum of similarities for that. */
     Vector predictionVector = null;
     for (PrefAndSimilarityColumnWritable prefAndSimilarityColumn : values) {
       predictionVector = predictionVector == null
           ? prefAndSimilarityColumn.getSimilarityColumn()
           : predictionVector.plus(prefAndSimilarityColumn.getSimilarityColumn());
     }
-
-    Iterator<Vector.Element> predictions = predictionVector.iterateNonZero();
-    List<RecommendedItem> recommendations = new ArrayList<RecommendedItem>();
-    while (predictions.hasNext() && recommendations.size() < recommendationsPerUser) {
-      Vector.Element prediction = predictions.next();
-      /* NaN means the user already knows this item */
-      if (!Double.isNaN(prediction.get())) {
-        long itemID = indexItemIDMap.get(prediction.index());
-        if (itemsToRecommendFor == null || itemsToRecommendFor.contains(itemID)) {
-          recommendations.add(new GenericRecommendedItem(itemID, BOOLEAN_PREF_VALUE));
-        }
-      }
-    }
-
-    if (!recommendations.isEmpty()) {
-      context.write(userID, new RecommendedItemsWritable(recommendations));
-    }
+    writeRecommendedItems(userID, predictionVector, context);
   }
 
   private void reduceNonBooleanData(VarLongWritable userID,
@@ -193,7 +177,20 @@ public final class AggregateAndRecommend
         recommendationVector.setQuick(itemIDIndex, prediction);
       }
     }
+    writeRecommendedItems(userID, recommendationVector, context);
+  }
 
+  /**
+   * find the top entries in recommendationVector, map them to the real itemIDs and write back the result
+   *
+   * @param userID
+   * @param recommendationVector
+   * @param context
+   * @throws IOException
+   * @throws InterruptedException
+   */
+  private void writeRecommendedItems(VarLongWritable userID, Vector recommendationVector, Context context)
+      throws IOException, InterruptedException {
     Queue<RecommendedItem> topItems = new PriorityQueue<RecommendedItem>(recommendationsPerUser + 1,
     Collections.reverseOrder(ByValueRecommendedItemComparator.getInstance()));
 

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java?rev=1039562&r1=1039561&r2=1039562&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java Fri Nov 26 21:21:14 2010
@@ -796,10 +796,17 @@ public class RecommenderJobTest extends 
     List<RecommendedItem> recommendedToCow = recommendations.get(3L);
     assertEquals(2, recommendedToCow.size());
 
-    long itemID1 = recommendedToCow.get(0).getItemID();
-    long itemID2 = recommendedToCow.get(1).getItemID();
+    RecommendedItem item1 = recommendedToCow.get(0);
+    RecommendedItem item2 = recommendedToCow.get(1);
 
-    assertTrue((itemID1 == 1L && itemID2 == 3L) || (itemID1 == 3L && itemID2 == 1L));
+    assertEquals(1L, item1.getItemID());
+    assertEquals(3L, item2.getItemID());
+
+    /* predicted pref must be the sum of similarities:
+    *    item1: coocc(burger, hotdog) + coocc(burger, icecream) = 3 
+    *    item2: coocc(berries, hotdog) + coocc(berries, icecream) = 2 */
+    assertEquals(3, item1.getValue(), 0.05);
+    assertEquals(2, item2.getValue(), 0.05);
   }
 
   /**