You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2010/05/11 15:01:43 UTC

svn commit: r943109 - in /lucene/mahout/trunk: core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ math/src/main/java/org/apache/mahout/math/

Author: srowen
Date: Tue May 11 13:01:42 2010
New Revision: 943109

URL: http://svn.apache.org/viewvc?rev=943109&view=rev
Log:
More fixes and improvements to item-based distributed recommender

Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/IndexIndexWritable.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/PartialMultiplyReducer.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceReducer.java
    lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/RandomAccessSparseVector.java
    lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java?rev=943109&r1=943108&r2=943109&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java Tue May 11 13:01:42 2010
@@ -106,11 +106,13 @@ public final class AggregateAndRecommend
       Vector.Element element = recommendationVectorIterator.next();
       int index = element.index();
       float value = (float) element.get();
-      if (topItems.size() < recommendationsPerUser && !Float.isNaN(value)) {
-        topItems.add(new GenericRecommendedItem(indexItemIDMap.get(index), value));
-      } else if (value > topItems.peek().getValue()) {
-        topItems.add(new GenericRecommendedItem(indexItemIDMap.get(index), value));
-        topItems.poll();
+      if (!Float.isNaN(value)) {
+        if (topItems.size() < recommendationsPerUser) {
+          topItems.add(new GenericRecommendedItem(indexItemIDMap.get(index), value));
+        } else if (value > topItems.peek().getValue()) {
+          topItems.add(new GenericRecommendedItem(indexItemIDMap.get(index), value));
+          topItems.poll();
+        }
       }
     }
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/IndexIndexWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/IndexIndexWritable.java?rev=943109&r1=943108&r2=943109&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/IndexIndexWritable.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/IndexIndexWritable.java Tue May 11 13:01:42 2010
@@ -22,6 +22,7 @@ import java.io.DataOutput;
 import java.io.IOException;
 
 import org.apache.hadoop.io.WritableComparable;
+import org.apache.mahout.math.Varint;
 
 /** A {@link WritableComparable} encapsulating two item indices. */
 public final class IndexIndexWritable
@@ -54,14 +55,14 @@ public final class IndexIndexWritable
 
   @Override
   public void write(DataOutput out) throws IOException {
-    out.writeInt(aID);
-    out.writeInt(bID);
+    Varint.writeUnsignedVarInt(aID, out);
+    Varint.writeUnsignedVarInt(bID, out);
   }
 
   @Override
   public void readFields(DataInput in) throws IOException {
-    aID = in.readInt();
-    bID = in.readInt();
+    aID = Varint.readUnsignedVarInt(in);
+    bID = Varint.readUnsignedVarInt(in);
   }
 
   @Override
@@ -83,7 +84,7 @@ public final class IndexIndexWritable
   public boolean equals(Object o) {
     if (o instanceof IndexIndexWritable) {
       IndexIndexWritable that = (IndexIndexWritable) o;
-      return (aID == that.getAID()) && (bID == that.getBID());
+      return aID == that.getAID() && bID == that.getBID();
     }
     return false;
   }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/PartialMultiplyReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/PartialMultiplyReducer.java?rev=943109&r1=943108&r2=943109&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/PartialMultiplyReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/PartialMultiplyReducer.java Tue May 11 13:01:42 2010
@@ -41,7 +41,7 @@ public final class PartialMultiplyReduce
 
   private static final Logger log = LoggerFactory.getLogger(PartialMultiplyReducer.class);
 
-  private static final int MAX_PRODUCTS_PER_ITEM = 1000;
+  private static final int MAX_PRODUCTS_PER_ITEM = 100;
 
   private enum Counters {
     PRODUCTS_OUTPUT,
@@ -72,11 +72,6 @@ public final class PartialMultiplyReduce
       }
     }
 
-    if (cooccurrenceColumn == null) {
-      log.info("Column vector missing for {}; continuing", itemIndex);
-      return;
-    }
-
     final VLongWritable userIDWritable = new VLongWritable();
 
     // These single-element vectors ensure that each user will not be recommended
@@ -98,6 +93,11 @@ public final class PartialMultiplyReduce
       }
     });
 
+    if (cooccurrenceColumn == null) {
+      log.info("Column vector missing for {}; continuing", itemIndex);
+      return;
+    }    
+
     final float smallestLargeValue = findSmallestLargeValue(savedValues);
 
     final VectorWritable vectorWritable = new VectorWritable();

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceMapper.java?rev=943109&r1=943108&r2=943109&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceMapper.java Tue May 11 13:01:42 2010
@@ -35,7 +35,7 @@ import org.apache.mahout.math.map.OpenIn
 public final class UserVectorToCooccurrenceMapper extends MapReduceBase implements
     Mapper<VLongWritable,VectorWritable,IndexIndexWritable,IntWritable> {
 
-  private static final int MAX_PREFS_CONSIDERED = 50;
+  private static final int MAX_PREFS_CONSIDERED = 100;
 
   private boolean outputGuardValue = true;
   private final OpenIntIntHashMap indexCounts = new OpenIntIntHashMap();
@@ -45,8 +45,9 @@ public final class UserVectorToCooccurre
                   VectorWritable userVectorWritable,
                   OutputCollector<IndexIndexWritable,IntWritable> output,
                   Reporter reporter) throws IOException {
-    Vector userVector = maybePruneUserVector(userVectorWritable.get());
+    Vector userVector = userVectorWritable.get();
     countSeen(userVector);
+    userVector = maybePruneUserVector(userVector);    
     Iterator<Vector.Element> it = userVector.iterateNonZero();
     IndexIndexWritable entityEntity = new IndexIndexWritable();
     IntWritable one = new IntWritable(1);
@@ -55,10 +56,8 @@ public final class UserVectorToCooccurre
       Iterator<Vector.Element> it2 = userVector.iterateNonZero();
       while (it2.hasNext()) {
         int index2 = it2.next().index();
-        if (index1 != index2) {
-          entityEntity.set(index1, index2);
-          output.collect(entityEntity, one);
-        }
+        entityEntity.set(index1, index2);
+        output.collect(entityEntity, one);
       }
     }
     // Guard value, output once, sorts after everything; will be dropped by combiner

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceReducer.java?rev=943109&r1=943108&r2=943109&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceReducer.java Tue May 11 13:01:42 2010
@@ -45,6 +45,7 @@ public final class UserVectorToCooccurre
 
     int item1ID = entityEntity.getAID();
     int item2ID = entityEntity.getBID();
+    int sum = CooccurrenceCombiner.sum(counts);
 
     if (item1ID < lastItem1ID) {
       throw new IllegalStateException();
@@ -54,17 +55,20 @@ public final class UserVectorToCooccurre
         throw new IllegalStateException();
       }
       if (item2ID == lastItem2ID) {
-        count += CooccurrenceCombiner.sum(counts);
+        count += sum;
       } else {
         if (cooccurrenceRow == null) {
-          cooccurrenceRow = new RandomAccessSparseVector(Integer.MAX_VALUE);
+          cooccurrenceRow = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
         }
-        cooccurrenceRow.set(item2ID, count);
+        cooccurrenceRow.set(lastItem2ID, count);
         lastItem2ID = item2ID;
-        count = CooccurrenceCombiner.sum(counts);
+        count = sum;
       }
     } else {
       if (cooccurrenceRow != null) {
+        if (count > 0) {
+          cooccurrenceRow.set(lastItem2ID, count);
+        }
         VectorWritable vw = new VectorWritable(cooccurrenceRow);
         vw.setWritesLaxPrecision(true);
         output.collect(new IntWritable(lastItem1ID), vw);
@@ -72,7 +76,7 @@ public final class UserVectorToCooccurre
       lastItem1ID = item1ID;
       lastItem2ID = item2ID;
       cooccurrenceRow = null;
-      count = CooccurrenceCombiner.sum(counts);
+      count = sum;
     }
   }
   

Modified: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/RandomAccessSparseVector.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/RandomAccessSparseVector.java?rev=943109&r1=943108&r2=943109&view=diff
==============================================================================
--- lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/RandomAccessSparseVector.java (original)
+++ lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/RandomAccessSparseVector.java Tue May 11 13:01:42 2010
@@ -77,6 +77,24 @@ public class RandomAccessSparseVector ex
   }
 
   @Override
+  public String toString() {
+    StringBuilder result = new StringBuilder();
+    result.append('{');
+    Iterator<Element> it = iterateNonZero();
+    while (it.hasNext()) {
+      Element e = it.next();
+      result.append(e.index());
+      result.append(':');
+      result.append(e.get());
+      result.append(',');
+    }
+    if (result.length() > 1) {
+      result.setCharAt(result.length() - 1, '}');
+    }
+    return result.toString();
+  }
+
+  @Override
   public Vector assign(Vector other) {
     if (size() != other.size()) {
       throw new CardinalityException(size(), other.size());

Modified: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java?rev=943109&r1=943108&r2=943109&view=diff
==============================================================================
--- lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java (original)
+++ lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java Tue May 11 13:01:42 2010
@@ -96,6 +96,24 @@ public class SequentialAccessSparseVecto
     return new SequentialAccessSparseVector(size(), values.clone());
   }
 
+  @Override
+  public String toString() {
+    StringBuilder result = new StringBuilder();
+    result.append('{');
+    Iterator<Element> it = iterateNonZero();
+    while (it.hasNext()) {
+      Element e = it.next();
+      result.append(e.index());
+      result.append(':');
+      result.append(e.get());
+      result.append(',');
+    }
+    if (result.length() > 1) {
+      result.setCharAt(result.length() - 1, '}');
+    }
+    return result.toString();
+  }
+
   /**
    * @return false
    */