You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ss...@apache.org on 2013/03/08 14:53:26 UTC
svn commit: r1454389 - in /mahout/trunk/core/src:
main/java/org/apache/mahout/cf/taste/hadoop/
main/java/org/apache/mahout/cf/taste/hadoop/als/
test/java/org/apache/mahout/cf/taste/hadoop/als/
Author: ssc
Date: Fri Mar 8 13:53:26 2013
New Revision: 1454389
URL: http://svn.apache.org/r1454389
Log:
MAHOUT-1151 more optimizations
Added:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/MutableRecommendedItem.java
- copied, changed from r1453756, mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/CappableRecommendedItem.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/TopItemQueue.java
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/als/TopItemQueueTest.java
Removed:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/CappableRecommendedItem.java
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/RecommenderJob.java
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java?rev=1454389&r1=1454388&r2=1454389&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java Fri Mar 8 13:53:26 2013
@@ -60,7 +60,6 @@ public final class RecommendedItemsWrita
Varint.writeSignedVarLong(item.getItemID(), out);
out.writeFloat(item.getValue());
}
-
}
@Override
Copied: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/MutableRecommendedItem.java (from r1453756, mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/CappableRecommendedItem.java)
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/MutableRecommendedItem.java?p2=mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/MutableRecommendedItem.java&p1=mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/CappableRecommendedItem.java&r1=1453756&r2=1454389&rev=1454389&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/CappableRecommendedItem.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/MutableRecommendedItem.java Fri Mar 8 13:53:26 2013
@@ -17,28 +17,18 @@
package org.apache.mahout.cf.taste.hadoop.als;
-import com.google.common.base.Preconditions;
+import org.apache.mahout.cf.taste.impl.recommender.GenericRecommendedItem;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.common.RandomUtils;
/**
- * Mutable variant of {@link RecommendedItem} that allows to cap the preference to a max value
+ * Mutable variant of {@link RecommendedItem}
*/
-class CappableRecommendedItem implements RecommendedItem {
+class MutableRecommendedItem implements RecommendedItem {
- private final long itemID;
+ private long itemID;
private float value;
- /**
- * @throws IllegalArgumentException
- * if item is null or value is NaN
- */
- public CappableRecommendedItem(long itemID, float value) {
- Preconditions.checkArgument(!Float.isNaN(value), "value is NaN");
- this.itemID = itemID;
- this.value = value;
- }
-
@Override
public long getItemID() {
return itemID;
@@ -49,15 +39,24 @@ class CappableRecommendedItem implements
return value;
}
+ public void set(long itemID, float value) {
+ this.itemID = itemID;
+ this.value = value;
+ }
+
public void capToMaxValue(float maxValue) {
if (value > maxValue) {
value = maxValue;
}
}
+ public RecommendedItem copy() {
+ return new GenericRecommendedItem(itemID, value);
+ }
+
@Override
public String toString() {
- return "CappableRecommendedItem[item:" + itemID + ", value:" + value + ']';
+ return "MutableRecommendedItem[item:" + itemID + ", value:" + value + ']';
}
@Override
@@ -67,7 +66,7 @@ class CappableRecommendedItem implements
@Override
public boolean equals(Object o) {
- if (!(o instanceof CappableRecommendedItem)) {
+ if (!(o instanceof MutableRecommendedItem)) {
return false;
}
RecommendedItem other = (RecommendedItem) o;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/RecommenderJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/RecommenderJob.java?rev=1454389&r1=1454388&r2=1454389&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/RecommenderJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/RecommenderJob.java Fri Mar 8 13:53:26 2013
@@ -17,8 +17,7 @@
package org.apache.mahout.cf.taste.hadoop.als;
-import com.google.common.collect.Lists;
-import com.google.common.primitives.Floats;
+import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Job;
@@ -36,12 +35,9 @@ import org.apache.mahout.math.map.OpenIn
import org.apache.mahout.math.set.OpenIntHashSet;
import java.io.IOException;
-import java.util.Collections;
-import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
-import java.util.PriorityQueue;
/**
* <p>Computes the top-N recommendations per user from a decomposition of the rating matrix</p>
@@ -53,7 +49,7 @@ import java.util.PriorityQueue;
* <li>--output (path): path where output should go</li>
* <li>--numRecommendations (int): maximum number of recommendations per user</li>
* <li>--maxRating (double): maximum rating of an item</li>
- * <li>--numFeatures (int): number of features to use for decomposition </li>
+ * <li>--NUM_FEATURES (int): number of features to use for decomposition </li>
* </ol>
*/
public class RecommenderJob extends AbstractJob {
@@ -87,11 +83,13 @@ public class RecommenderJob extends Abst
Job prediction = prepareJob(getInputPath(), getOutputPath(), SequenceFileInputFormat.class, PredictionMapper.class,
IntWritable.class, RecommendedItemsWritable.class, TextOutputFormat.class);
- prediction.getConfiguration().setInt(NUM_RECOMMENDATIONS,
- Integer.parseInt(getOption("numRecommendations")));
- prediction.getConfiguration().set(USER_FEATURES_PATH, getOption("userFeatures"));
- prediction.getConfiguration().set(ITEM_FEATURES_PATH, getOption("itemFeatures"));
- prediction.getConfiguration().set(MAX_RATING, getOption("maxRating"));
+ Configuration conf = prediction.getConfiguration();
+
+ conf.setInt(NUM_RECOMMENDATIONS, Integer.parseInt(getOption("numRecommendations")));
+ conf.set(USER_FEATURES_PATH, getOption("userFeatures"));
+ conf.set(ITEM_FEATURES_PATH, getOption("itemFeatures"));
+ conf.set(MAX_RATING, getOption("maxRating"));
+
boolean succeeded = prediction.waitForCompletion(true);
if (!succeeded) {
return -1;
@@ -100,17 +98,6 @@ public class RecommenderJob extends Abst
return 0;
}
- private static final Comparator<RecommendedItem> BY_PREFERENCE_VALUE =
- new Comparator<RecommendedItem>() {
- @Override
- public int compare(RecommendedItem one, RecommendedItem two) {
- return Floats.compare(one.getValue(), two.getValue());
- }
- };
-
- private static final Comparator<RecommendedItem> DESCENDING_BY_PREFERENCE_VALUE =
- Collections.reverseOrder(BY_PREFERENCE_VALUE);
-
static class PredictionMapper
extends Mapper<IntWritable,VectorWritable,IntWritable,RecommendedItemsWritable> {
@@ -120,14 +107,11 @@ public class RecommenderJob extends Abst
private int recommendationsPerUser;
private float maxRating;
- private PriorityQueue<RecommendedItem> topKItems;
-
private RecommendedItemsWritable recommendations = new RecommendedItemsWritable();
@Override
protected void setup(Context ctx) throws IOException, InterruptedException {
- recommendationsPerUser = ctx.getConfiguration().getInt(NUM_RECOMMENDATIONS,
- DEFAULT_NUM_RECOMMENDATIONS);
+ recommendationsPerUser = ctx.getConfiguration().getInt(NUM_RECOMMENDATIONS, DEFAULT_NUM_RECOMMENDATIONS);
Path pathToU = new Path(ctx.getConfiguration().get(USER_FEATURES_PATH));
Path pathToM = new Path(ctx.getConfiguration().get(ITEM_FEATURES_PATH));
@@ -136,8 +120,16 @@ public class RecommenderJob extends Abst
M = ALSUtils.readMatrixByRows(pathToM, ctx.getConfiguration());
maxRating = Float.parseFloat(ctx.getConfiguration().get(MAX_RATING));
+ }
- topKItems = new PriorityQueue<RecommendedItem>(recommendationsPerUser + 1, BY_PREFERENCE_VALUE);
+ // we can use a simple dot product computation, as both vectors are dense
+ private double dot(Vector x, Vector y) {
+ int numFeatures = x.size();
+ double sum = 0;
+ for (int n = 0; n < numFeatures; n++) {
+ sum += x.getQuick(n) * y.getQuick(n);
+ }
+ return sum;
}
@Override
@@ -153,19 +145,19 @@ public class RecommenderJob extends Abst
alreadyRatedItems.add(ratingsIterator.next().index());
}
- topKItems.clear();
+ final TopItemQueue topItemQueue = new TopItemQueue(recommendationsPerUser);
+ final Vector userFeatures = U.get(userID);
M.forEachPair(new IntObjectProcedure<Vector>() {
@Override
public boolean apply(int itemID, Vector itemFeatures) {
if (!alreadyRatedItems.contains(itemID)) {
- double predictedRating = U.get(userID).dot(itemFeatures);
+ double predictedRating = dot(userFeatures, itemFeatures);
- if (topKItems.size() < recommendationsPerUser) {
- topKItems.add(new CappableRecommendedItem(itemID, (float) predictedRating));
- } else if (predictedRating > topKItems.peek().getValue()) {
- topKItems.add(new CappableRecommendedItem(itemID, (float) predictedRating));
- topKItems.poll();
+ MutableRecommendedItem top = topItemQueue.top();
+ if (predictedRating > top.getValue()) {
+ top.set(itemID, (float) predictedRating);
+ topItemQueue.updateTop();
}
}
@@ -173,12 +165,13 @@ public class RecommenderJob extends Abst
}
});
- if (!topKItems.isEmpty()) {
+ List<RecommendedItem> recommendedItems = topItemQueue.getTopItems();
+
+ if (!recommendedItems.isEmpty()) {
- List<RecommendedItem> recommendedItems = Lists.newArrayList(topKItems);
- Collections.sort(recommendedItems, DESCENDING_BY_PREFERENCE_VALUE);
+ // cap predictions to maxRating
for (RecommendedItem topItem : recommendedItems) {
- ((CappableRecommendedItem) topItem).capToMaxValue(maxRating);
+ ((MutableRecommendedItem) topItem).capToMaxValue(maxRating);
}
recommendations.set(recommendedItems);
Added: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/TopItemQueue.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/TopItemQueue.java?rev=1454389&view=auto
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/TopItemQueue.java (added)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/TopItemQueue.java Fri Mar 8 13:53:26 2013
@@ -0,0 +1,62 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.als;
+
+import com.google.common.collect.Lists;
+import org.apache.lucene.util.PriorityQueue;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+
+import java.util.Collections;
+import java.util.List;
+
+public class TopItemQueue extends PriorityQueue<MutableRecommendedItem> {
+
+ public static final long SENTINEL_ID = Long.MIN_VALUE;
+
+ private final int maxSize;
+
+ public TopItemQueue(int maxSize) {
+ super(maxSize);
+ this.maxSize = maxSize;
+ }
+
+ public List<RecommendedItem> getTopItems() {
+ List<RecommendedItem> recommendedItems = Lists.newArrayListWithCapacity(maxSize);
+ while (size() > 0) {
+ MutableRecommendedItem topItem = pop();
+ // filter out "sentinel" objects necessary for maintaining an efficient priority queue
+ if (topItem.getItemID() != TopItemQueue.SENTINEL_ID) {
+ recommendedItems.add(topItem);
+ }
+ }
+ Collections.reverse(recommendedItems);
+ return recommendedItems;
+ }
+
+ @Override
+ protected boolean lessThan(MutableRecommendedItem one, MutableRecommendedItem two) {
+ return one.getValue() < two.getValue();
+ }
+
+ @Override
+ protected MutableRecommendedItem getSentinelObject() {
+ MutableRecommendedItem sentinel = new MutableRecommendedItem();
+ sentinel.set(SENTINEL_ID, Float.MIN_VALUE);
+ return sentinel;
+ }
+}
Added: mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/als/TopItemQueueTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/als/TopItemQueueTest.java?rev=1454389&view=auto
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/als/TopItemQueueTest.java (added)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/als/TopItemQueueTest.java Fri Mar 8 13:53:26 2013
@@ -0,0 +1,71 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.als;
+
+import org.apache.mahout.cf.taste.impl.TasteTestCase;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.junit.Test;
+
+import java.util.List;
+
+public class TopItemQueueTest extends TasteTestCase {
+
+ @Test
+ public void topK() {
+
+ float[] ratings = { .5f, .6f, .7f, 2f, 0f };
+
+ List<RecommendedItem> topItems = findTop(ratings, 2);
+
+ assertEquals(2, topItems.size());
+ assertEquals(3l, topItems.get(0).getItemID());
+ assertEquals(2f, topItems.get(0).getValue(), TasteTestCase.EPSILON);
+ assertEquals(2l, topItems.get(1).getItemID());
+ assertEquals(.7f, topItems.get(1).getValue(), TasteTestCase.EPSILON);
+ }
+
+ @Test
+ public void topKInputSmallerThanK() {
+
+ float[] ratings = {.7f, 2f};
+
+ List<RecommendedItem> topItems = findTop(ratings, 3);
+
+ assertEquals(2, topItems.size());
+ assertEquals(1l, topItems.get(0).getItemID());
+ assertEquals(2f, topItems.get(0).getValue(), TasteTestCase.EPSILON);
+ assertEquals(0l, topItems.get(1).getItemID());
+ assertEquals(.7f, topItems.get(1).getValue(), TasteTestCase.EPSILON);
+ }
+
+
+ private List<RecommendedItem> findTop(float[] ratings, int k) {
+ TopItemQueue queue = new TopItemQueue(k);
+
+ for (int item = 0; item < ratings.length; item++) {
+ MutableRecommendedItem top = queue.top();
+ if (ratings[item] > top.getValue()) {
+ top.set(item, ratings[item]);
+ queue.updateTop();
+ }
+ }
+
+ return queue.getTopItems();
+ }
+
+}