You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ss...@apache.org on 2013/03/08 14:53:26 UTC

svn commit: r1454389 - in /mahout/trunk/core/src: main/java/org/apache/mahout/cf/taste/hadoop/ main/java/org/apache/mahout/cf/taste/hadoop/als/ test/java/org/apache/mahout/cf/taste/hadoop/als/

Author: ssc
Date: Fri Mar  8 13:53:26 2013
New Revision: 1454389

URL: http://svn.apache.org/r1454389
Log:
MAHOUT-1151 more optimizations

Added:
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/MutableRecommendedItem.java
      - copied, changed from r1453756, mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/CappableRecommendedItem.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/TopItemQueue.java
    mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/als/TopItemQueueTest.java
Removed:
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/CappableRecommendedItem.java
Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/RecommenderJob.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java?rev=1454389&r1=1454388&r2=1454389&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java Fri Mar  8 13:53:26 2013
@@ -60,7 +60,6 @@ public final class RecommendedItemsWrita
       Varint.writeSignedVarLong(item.getItemID(), out);
       out.writeFloat(item.getValue());
     }
-    
   }
   
   @Override

Copied: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/MutableRecommendedItem.java (from r1453756, mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/CappableRecommendedItem.java)
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/MutableRecommendedItem.java?p2=mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/MutableRecommendedItem.java&p1=mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/CappableRecommendedItem.java&r1=1453756&r2=1454389&rev=1454389&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/CappableRecommendedItem.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/MutableRecommendedItem.java Fri Mar  8 13:53:26 2013
@@ -17,28 +17,18 @@
 
 package org.apache.mahout.cf.taste.hadoop.als;
 
-import com.google.common.base.Preconditions;
+import org.apache.mahout.cf.taste.impl.recommender.GenericRecommendedItem;
 import org.apache.mahout.cf.taste.recommender.RecommendedItem;
 import org.apache.mahout.common.RandomUtils;
 
 /**
- * Mutable variant of {@link RecommendedItem} that allows to cap the preference to a max value
+ * Mutable variant of {@link RecommendedItem}
  */
-class CappableRecommendedItem implements RecommendedItem {
+class MutableRecommendedItem implements RecommendedItem {
 
-  private final long itemID;
+  private long itemID;
   private float value;
 
-  /**
-   * @throws IllegalArgumentException
-   *           if item is null or value is NaN
-   */
-  public CappableRecommendedItem(long itemID, float value) {
-    Preconditions.checkArgument(!Float.isNaN(value), "value is NaN");
-    this.itemID = itemID;
-    this.value = value;
-  }
-
   @Override
   public long getItemID() {
     return itemID;
@@ -49,15 +39,24 @@ class CappableRecommendedItem implements
     return value;
   }
 
+  public void set(long itemID, float value) {
+    this.itemID = itemID;
+    this.value = value;
+  }
+
   public void capToMaxValue(float maxValue) {
     if (value > maxValue) {
       value = maxValue;
     }
   }
 
+  public RecommendedItem copy() {
+    return new GenericRecommendedItem(itemID, value);
+  }
+
   @Override
   public String toString() {
-    return "CappableRecommendedItem[item:" + itemID + ", value:" + value + ']';
+    return "MutableRecommendedItem[item:" + itemID + ", value:" + value + ']';
   }
 
   @Override
@@ -67,7 +66,7 @@ class CappableRecommendedItem implements
 
   @Override
   public boolean equals(Object o) {
-    if (!(o instanceof CappableRecommendedItem)) {
+    if (!(o instanceof MutableRecommendedItem)) {
       return false;
     }
     RecommendedItem other = (RecommendedItem) o;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/RecommenderJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/RecommenderJob.java?rev=1454389&r1=1454388&r2=1454389&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/RecommenderJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/RecommenderJob.java Fri Mar  8 13:53:26 2013
@@ -17,8 +17,7 @@
 
 package org.apache.mahout.cf.taste.hadoop.als;
 
-import com.google.common.collect.Lists;
-import com.google.common.primitives.Floats;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.mapreduce.Job;
@@ -36,12 +35,9 @@ import org.apache.mahout.math.map.OpenIn
 import org.apache.mahout.math.set.OpenIntHashSet;
 
 import java.io.IOException;
-import java.util.Collections;
-import java.util.Comparator;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
-import java.util.PriorityQueue;
 
 /**
  * <p>Computes the top-N recommendations per user from a decomposition of the rating matrix</p>
@@ -53,7 +49,7 @@ import java.util.PriorityQueue;
  * <li>--output (path): path where output should go</li>
  * <li>--numRecommendations (int): maximum number of recommendations per user</li>
  * <li>--maxRating (double): maximum rating of an item</li>
- * <li>--numFeatures (int): number of features to use for decomposition </li>
+ * <li>--NUM_FEATURES (int): number of features to use for decomposition </li>
  * </ol>
  */
 public class RecommenderJob extends AbstractJob {
@@ -87,11 +83,13 @@ public class RecommenderJob extends Abst
 
     Job prediction = prepareJob(getInputPath(), getOutputPath(), SequenceFileInputFormat.class, PredictionMapper.class,
         IntWritable.class, RecommendedItemsWritable.class, TextOutputFormat.class);
-    prediction.getConfiguration().setInt(NUM_RECOMMENDATIONS,
-        Integer.parseInt(getOption("numRecommendations")));
-    prediction.getConfiguration().set(USER_FEATURES_PATH, getOption("userFeatures"));
-    prediction.getConfiguration().set(ITEM_FEATURES_PATH, getOption("itemFeatures"));
-    prediction.getConfiguration().set(MAX_RATING, getOption("maxRating"));
+    Configuration conf = prediction.getConfiguration();
+
+    conf.setInt(NUM_RECOMMENDATIONS, Integer.parseInt(getOption("numRecommendations")));
+    conf.set(USER_FEATURES_PATH, getOption("userFeatures"));
+    conf.set(ITEM_FEATURES_PATH, getOption("itemFeatures"));
+    conf.set(MAX_RATING, getOption("maxRating"));
+
     boolean succeeded = prediction.waitForCompletion(true);
     if (!succeeded) {
       return -1;
@@ -100,17 +98,6 @@ public class RecommenderJob extends Abst
     return 0;
   }
 
-  private static final Comparator<RecommendedItem> BY_PREFERENCE_VALUE =
-      new Comparator<RecommendedItem>() {
-        @Override
-        public int compare(RecommendedItem one, RecommendedItem two) {
-          return Floats.compare(one.getValue(), two.getValue());
-        }
-      };
-
-  private static final Comparator<RecommendedItem> DESCENDING_BY_PREFERENCE_VALUE =
-      Collections.reverseOrder(BY_PREFERENCE_VALUE);
-
   static class PredictionMapper
       extends Mapper<IntWritable,VectorWritable,IntWritable,RecommendedItemsWritable> {
 
@@ -120,14 +107,11 @@ public class RecommenderJob extends Abst
     private int recommendationsPerUser;
     private float maxRating;
 
-    private PriorityQueue<RecommendedItem> topKItems;
-
     private RecommendedItemsWritable recommendations = new RecommendedItemsWritable();
 
     @Override
     protected void setup(Context ctx) throws IOException, InterruptedException {
-      recommendationsPerUser = ctx.getConfiguration().getInt(NUM_RECOMMENDATIONS,
-          DEFAULT_NUM_RECOMMENDATIONS);
+      recommendationsPerUser = ctx.getConfiguration().getInt(NUM_RECOMMENDATIONS, DEFAULT_NUM_RECOMMENDATIONS);
 
       Path pathToU = new Path(ctx.getConfiguration().get(USER_FEATURES_PATH));
       Path pathToM = new Path(ctx.getConfiguration().get(ITEM_FEATURES_PATH));
@@ -136,8 +120,16 @@ public class RecommenderJob extends Abst
       M = ALSUtils.readMatrixByRows(pathToM, ctx.getConfiguration());
 
       maxRating = Float.parseFloat(ctx.getConfiguration().get(MAX_RATING));
+    }
 
-      topKItems = new PriorityQueue<RecommendedItem>(recommendationsPerUser + 1, BY_PREFERENCE_VALUE);
+    // we can use a simple dot product computation, as both vectors are dense
+    private double dot(Vector x, Vector y) {
+      int numFeatures = x.size();
+      double sum = 0;
+      for (int n = 0; n < numFeatures; n++) {
+        sum += x.getQuick(n) * y.getQuick(n);
+      }
+      return sum;
     }
 
     @Override
@@ -153,19 +145,19 @@ public class RecommenderJob extends Abst
         alreadyRatedItems.add(ratingsIterator.next().index());
       }
 
-      topKItems.clear();
+      final TopItemQueue topItemQueue = new TopItemQueue(recommendationsPerUser);
+      final Vector userFeatures = U.get(userID);
 
       M.forEachPair(new IntObjectProcedure<Vector>() {
         @Override
         public boolean apply(int itemID, Vector itemFeatures) {
           if (!alreadyRatedItems.contains(itemID)) {
-            double predictedRating = U.get(userID).dot(itemFeatures);
+            double predictedRating = dot(userFeatures, itemFeatures);
 
-            if (topKItems.size() < recommendationsPerUser) {
-              topKItems.add(new CappableRecommendedItem(itemID, (float) predictedRating));
-            } else if (predictedRating > topKItems.peek().getValue()) {
-              topKItems.add(new CappableRecommendedItem(itemID, (float) predictedRating));
-              topKItems.poll();
+            MutableRecommendedItem top = topItemQueue.top();
+            if (predictedRating > top.getValue()) {
+              top.set(itemID, (float) predictedRating);
+              topItemQueue.updateTop();
             }
 
           }
@@ -173,12 +165,13 @@ public class RecommenderJob extends Abst
         }
       });
 
-      if (!topKItems.isEmpty()) {
+      List<RecommendedItem> recommendedItems = topItemQueue.getTopItems();
+
+      if (!recommendedItems.isEmpty()) {
 
-        List<RecommendedItem> recommendedItems = Lists.newArrayList(topKItems);
-        Collections.sort(recommendedItems, DESCENDING_BY_PREFERENCE_VALUE);
+        // cap predictions to maxRating
         for (RecommendedItem topItem : recommendedItems) {
-          ((CappableRecommendedItem) topItem).capToMaxValue(maxRating);
+          ((MutableRecommendedItem) topItem).capToMaxValue(maxRating);
         }
 
         recommendations.set(recommendedItems);

Added: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/TopItemQueue.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/TopItemQueue.java?rev=1454389&view=auto
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/TopItemQueue.java (added)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/TopItemQueue.java Fri Mar  8 13:53:26 2013
@@ -0,0 +1,62 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.als;
+
+import com.google.common.collect.Lists;
+import org.apache.lucene.util.PriorityQueue;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+
+import java.util.Collections;
+import java.util.List;
+
+public class TopItemQueue extends PriorityQueue<MutableRecommendedItem> {
+
+  public static final long SENTINEL_ID = Long.MIN_VALUE;
+
+  private final int maxSize;
+
+  public TopItemQueue(int maxSize) {
+    super(maxSize);
+    this.maxSize = maxSize;
+  }
+
+  public List<RecommendedItem> getTopItems() {
+    List<RecommendedItem> recommendedItems = Lists.newArrayListWithCapacity(maxSize);
+    while (size() > 0) {
+      MutableRecommendedItem topItem = pop();
+      // filter out "sentinel" objects necessary for maintaining an efficient priority queue
+      if (topItem.getItemID() != TopItemQueue.SENTINEL_ID) {
+        recommendedItems.add(topItem);
+      }
+    }
+    Collections.reverse(recommendedItems);
+    return recommendedItems;
+  }
+
+  @Override
+  protected boolean lessThan(MutableRecommendedItem one, MutableRecommendedItem two) {
+    return one.getValue() < two.getValue();
+  }
+
+  @Override
+  protected MutableRecommendedItem getSentinelObject() {
+    MutableRecommendedItem sentinel =  new MutableRecommendedItem();
+    sentinel.set(SENTINEL_ID, Float.MIN_VALUE);
+    return sentinel;
+  }
+}

Added: mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/als/TopItemQueueTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/als/TopItemQueueTest.java?rev=1454389&view=auto
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/als/TopItemQueueTest.java (added)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/als/TopItemQueueTest.java Fri Mar  8 13:53:26 2013
@@ -0,0 +1,71 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.als;
+
+import org.apache.mahout.cf.taste.impl.TasteTestCase;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.junit.Test;
+
+import java.util.List;
+
+public class TopItemQueueTest extends TasteTestCase {
+
+  @Test
+  public void topK() {
+
+    float[] ratings = { .5f, .6f, .7f, 2f, 0f };
+
+    List<RecommendedItem> topItems = findTop(ratings, 2);
+
+    assertEquals(2, topItems.size());
+    assertEquals(3l, topItems.get(0).getItemID());
+    assertEquals(2f, topItems.get(0).getValue(), TasteTestCase.EPSILON);
+    assertEquals(2l, topItems.get(1).getItemID());
+    assertEquals(.7f, topItems.get(1).getValue(), TasteTestCase.EPSILON);
+  }
+
+  @Test
+  public void topKInputSmallerThanK() {
+
+    float[] ratings = {.7f, 2f};
+
+    List<RecommendedItem> topItems = findTop(ratings, 3);
+
+    assertEquals(2, topItems.size());
+    assertEquals(1l, topItems.get(0).getItemID());
+    assertEquals(2f, topItems.get(0).getValue(), TasteTestCase.EPSILON);
+    assertEquals(0l, topItems.get(1).getItemID());
+    assertEquals(.7f, topItems.get(1).getValue(), TasteTestCase.EPSILON);
+  }
+
+
+  private List<RecommendedItem> findTop(float[] ratings, int k) {
+    TopItemQueue queue = new TopItemQueue(k);
+
+    for (int item = 0; item < ratings.length; item++) {
+      MutableRecommendedItem top = queue.top();
+      if (ratings[item] > top.getValue()) {
+        top.set(item, ratings[item]);
+        queue.updateTop();
+      }
+    }
+
+    return queue.getTopItems();
+  }
+
+}