You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2010/04/05 18:05:41 UTC

svn commit: r930890 - in /lucene/mahout/trunk/core/src: main/java/org/apache/mahout/cf/taste/hadoop/ main/java/org/apache/mahout/cf/taste/hadoop/item/ main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ main/java/org/apache/mahout/cf/taste/had...

Author: srowen
Date: Mon Apr  5 16:05:41 2010
New Revision: 930890

URL: http://svn.apache.org/viewvc?rev=930890&view=rev
Log:
MAHOUT-362 last refactorings for now

Added:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToEntityPrefsMapper.java
      - copied, changed from r930805, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToUserPrefsMapper.java
      - copied, changed from r930805, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPairWritable.java
      - copied, changed from r930806, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPairWritable.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPrefWithLengthArrayWritable.java
      - copied, changed from r930806, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthArrayWritable.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPrefWithLengthWritable.java
      - copied, changed from r930806, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthWritable.java
Removed:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityWritable.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/UserPrefsPerItemMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/
Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityPrefWritable.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CopreferredItemsMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CosineSimilarityReducer.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityPrefWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityPrefWritable.java?rev=930890&r1=930889&r2=930890&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityPrefWritable.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityPrefWritable.java Mon Apr  5 16:05:41 2010
@@ -21,11 +21,12 @@ import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
 
+import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Writable;
 import org.apache.mahout.common.RandomUtils;
 
 /** A {@link Writable} encapsulating an item ID and a preference value. */
-public final class EntityPrefWritable extends EntityWritable {
+public final class EntityPrefWritable extends LongWritable {
   
   private float prefValue;
   
@@ -39,7 +40,11 @@ public final class EntityPrefWritable ex
   }
   
   public EntityPrefWritable(EntityPrefWritable other) {
-    this(other.getID(), other.getPrefValue());
+    this(other.get(), other.getPrefValue());
+  }
+
+  public long getID() {
+    return get();
   }
 
   public float getPrefValue() {
@@ -75,12 +80,12 @@ public final class EntityPrefWritable ex
       return false;
     }
     EntityPrefWritable other = (EntityPrefWritable) o;
-    return getID() == other.getID() && prefValue == other.getPrefValue();
+    return get() == other.get() && prefValue == other.getPrefValue();
   }
 
   @Override
   public EntityPrefWritable clone() {
-    return new EntityPrefWritable(getID(), prefValue);
+    return new EntityPrefWritable(get(), prefValue);
   }
   
 }
\ No newline at end of file

Copied: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToEntityPrefsMapper.java (from r930805, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java)
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToEntityPrefsMapper.java?p2=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToEntityPrefsMapper.java&p1=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java&r1=930805&r2=930890&rev=930890&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToEntityPrefsMapper.java Mon Apr  5 16:05:41 2010
@@ -17,9 +17,6 @@
 
 package org.apache.mahout.cf.taste.hadoop;
 
-import java.io.IOException;
-import java.util.regex.Pattern;
-
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.JobConf;
@@ -29,52 +26,45 @@ import org.apache.hadoop.mapred.OutputCo
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.mahout.cf.taste.hadoop.item.RecommenderJob;
 
-/**
- * <h1>Input</h1>
- * 
- * <p>
- * Intended for use with {@link org.apache.hadoop.mapred.TextInputFormat}; accepts line number / line pairs as
- * {@link LongWritable}/{@link Text} pairs.
- * </p>
- * 
- * <p>
- * Each line is assumed to be of the form <code>userID,itemID,preference</code>, or
- * <code>userID,itemID</code>.</p>
- * </p>
- * 
- * <h1>Output</h1>
- * 
- * <p>
- * Outputs the user ID as a {@link LongWritable} mapped to the item ID and preference as a
- * {@link EntityPrefWritable}.
- * </p>
- */
-public final class ToItemPrefsMapper extends MapReduceBase implements
-    Mapper<LongWritable,Text,LongWritable, EntityWritable> {
-  
+import java.io.IOException;
+import java.util.regex.Pattern;
+
+abstract class ToEntityPrefsMapper extends MapReduceBase implements
+    Mapper<LongWritable,Text,LongWritable,LongWritable> {
+
   private static final Pattern COMMA = Pattern.compile(",");
 
   private boolean booleanData;
+  private final boolean itemKey;
+
+  ToEntityPrefsMapper(boolean itemKey) {
+    this.itemKey = itemKey;
+  }
 
   @Override
   public void configure(JobConf jobConf) {
     booleanData = jobConf.getBoolean(RecommenderJob.BOOLEAN_DATA, false);
   }
-  
+
   @Override
   public void map(LongWritable key,
                   Text value,
-                  OutputCollector<LongWritable, EntityWritable> output,
+                  OutputCollector<LongWritable,LongWritable> output,
                   Reporter reporter) throws IOException {
-    String[] tokens = ToItemPrefsMapper.COMMA.split(value.toString());
+    String[] tokens = ToEntityPrefsMapper.COMMA.split(value.toString());
     long userID = Long.parseLong(tokens[0]);
     long itemID = Long.parseLong(tokens[1]);
+    if (itemKey) {
+      long temp = userID;
+      userID = itemID;
+      itemID = temp;
+    }
     if (booleanData) {
-      output.collect(new LongWritable(userID), new EntityWritable(itemID));
+      output.collect(new LongWritable(userID), new LongWritable(itemID));
     } else {
       float prefValue = tokens.length > 2 ? Float.parseFloat(tokens[2]) : 1.0f;
       output.collect(new LongWritable(userID), new EntityPrefWritable(itemID, prefValue));
     }
   }
-  
+
 }
\ No newline at end of file

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java?rev=930890&r1=930889&r2=930890&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java Mon Apr  5 16:05:41 2010
@@ -17,17 +17,8 @@
 
 package org.apache.mahout.cf.taste.hadoop;
 
-import java.io.IOException;
-import java.util.regex.Pattern;
-
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.mahout.cf.taste.hadoop.item.RecommenderJob;
 
 /**
  * <h1>Input</h1>
@@ -49,32 +40,10 @@ import org.apache.mahout.cf.taste.hadoop
  * {@link EntityPrefWritable}.
  * </p>
  */
-public final class ToItemPrefsMapper extends MapReduceBase implements
-    Mapper<LongWritable,Text,LongWritable, EntityWritable> {
-  
-  private static final Pattern COMMA = Pattern.compile(",");
-
-  private boolean booleanData;
+public final class ToItemPrefsMapper extends ToEntityPrefsMapper {
 
-  @Override
-  public void configure(JobConf jobConf) {
-    booleanData = jobConf.getBoolean(RecommenderJob.BOOLEAN_DATA, false);
-  }
-  
-  @Override
-  public void map(LongWritable key,
-                  Text value,
-                  OutputCollector<LongWritable, EntityWritable> output,
-                  Reporter reporter) throws IOException {
-    String[] tokens = ToItemPrefsMapper.COMMA.split(value.toString());
-    long userID = Long.parseLong(tokens[0]);
-    long itemID = Long.parseLong(tokens[1]);
-    if (booleanData) {
-      output.collect(new LongWritable(userID), new EntityWritable(itemID));
-    } else {
-      float prefValue = tokens.length > 2 ? Float.parseFloat(tokens[2]) : 1.0f;
-      output.collect(new LongWritable(userID), new EntityPrefWritable(itemID, prefValue));
-    }
+  public ToItemPrefsMapper() {
+    super(false);
   }
   
 }
\ No newline at end of file

Copied: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToUserPrefsMapper.java (from r930805, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java)
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToUserPrefsMapper.java?p2=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToUserPrefsMapper.java&p1=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java&r1=930805&r2=930890&rev=930890&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToUserPrefsMapper.java Mon Apr  5 16:05:41 2010
@@ -17,64 +17,13 @@
 
 package org.apache.mahout.cf.taste.hadoop;
 
-import java.io.IOException;
-import java.util.regex.Pattern;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.mahout.cf.taste.hadoop.item.RecommenderJob;
-
 /**
- * <h1>Input</h1>
- * 
- * <p>
- * Intended for use with {@link org.apache.hadoop.mapred.TextInputFormat}; accepts line number / line pairs as
- * {@link LongWritable}/{@link Text} pairs.
- * </p>
- * 
- * <p>
- * Each line is assumed to be of the form <code>userID,itemID,preference</code>, or
- * <code>userID,itemID</code>.</p>
- * </p>
- * 
- * <h1>Output</h1>
- * 
- * <p>
- * Outputs the user ID as a {@link LongWritable} mapped to the item ID and preference as a
- * {@link EntityPrefWritable}.
- * </p>
+ * The 'reverse' of {@link ToItemPrefsMapper}; outputs item IDs mapped to user-pref data.
  */
-public final class ToItemPrefsMapper extends MapReduceBase implements
-    Mapper<LongWritable,Text,LongWritable, EntityWritable> {
-  
-  private static final Pattern COMMA = Pattern.compile(",");
-
-  private boolean booleanData;
+public final class ToUserPrefsMapper extends ToEntityPrefsMapper {
 
-  @Override
-  public void configure(JobConf jobConf) {
-    booleanData = jobConf.getBoolean(RecommenderJob.BOOLEAN_DATA, false);
+  public ToUserPrefsMapper() {
+    super(true);
   }
-  
-  @Override
-  public void map(LongWritable key,
-                  Text value,
-                  OutputCollector<LongWritable, EntityWritable> output,
-                  Reporter reporter) throws IOException {
-    String[] tokens = ToItemPrefsMapper.COMMA.split(value.toString());
-    long userID = Long.parseLong(tokens[0]);
-    long itemID = Long.parseLong(tokens[1]);
-    if (booleanData) {
-      output.collect(new LongWritable(userID), new EntityWritable(itemID));
-    } else {
-      float prefValue = tokens.length > 2 ? Float.parseFloat(tokens[2]) : 1.0f;
-      output.collect(new LongWritable(userID), new EntityPrefWritable(itemID, prefValue));
-    }
-  }
-  
+
 }
\ No newline at end of file

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java?rev=930890&r1=930889&r2=930890&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java Mon Apr  5 16:05:41 2010
@@ -36,7 +36,6 @@ import org.apache.hadoop.mapred.TextOutp
 import org.apache.hadoop.mapred.lib.IdentityReducer;
 import org.apache.hadoop.util.ToolRunner;
 import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable;
-import org.apache.mahout.cf.taste.hadoop.EntityWritable;
 import org.apache.mahout.common.AbstractJob;
 import org.apache.mahout.cf.taste.hadoop.RecommendedItemsWritable;
 import org.apache.mahout.cf.taste.hadoop.ToItemPrefsMapper;
@@ -100,7 +99,7 @@ public final class RecommenderJob extend
     
     JobConf toUserVectorConf = prepareJobConf(inputPath, userVectorPath,
       TextInputFormat.class, ToItemPrefsMapper.class, LongWritable.class,
-      booleanData ? EntityWritable.class : EntityPrefWritable.class,
+      booleanData ? LongWritable.class : EntityPrefWritable.class,
       ToUserVectorReducer.class, LongWritable.class, VectorWritable.class, SequenceFileOutputFormat.class);
     toUserVectorConf.setBoolean(BOOLEAN_DATA, booleanData);
     JobClient.runJob(toUserVectorConf);

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java?rev=930890&r1=930889&r2=930890&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java Mon Apr  5 16:05:41 2010
@@ -29,7 +29,6 @@ import org.apache.hadoop.mapred.OutputCo
 import org.apache.hadoop.mapred.Reducer;
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable;
-import org.apache.mahout.cf.taste.hadoop.EntityWritable;
 import org.apache.mahout.math.RandomAccessSparseVector;
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
@@ -58,7 +57,7 @@ import org.apache.mahout.math.VectorWrit
  * 
  */
 public final class ToUserVectorReducer extends MapReduceBase implements
-    Reducer<LongWritable, EntityWritable,LongWritable,VectorWritable> {
+    Reducer<LongWritable,LongWritable,LongWritable,VectorWritable> {
   
   public static final int MAX_PREFS_CONSIDERED = 20;
   
@@ -72,14 +71,14 @@ public final class ToUserVectorReducer e
   
   @Override
   public void reduce(LongWritable userID,
-                     Iterator<EntityWritable> itemPrefs,
+                     Iterator<LongWritable> itemPrefs,
                      OutputCollector<LongWritable,VectorWritable> output,
                      Reporter reporter) throws IOException {
     if (itemPrefs.hasNext()) {
       RandomAccessSparseVector userVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
       while (itemPrefs.hasNext()) {
-        EntityWritable itemPref = itemPrefs.next();
-        int index = ItemIDIndexMapper.idToIndex(itemPref.getID());
+        LongWritable itemPref = itemPrefs.next();
+        int index = ItemIDIndexMapper.idToIndex(itemPref.get());
         float value;
         if (itemPref instanceof EntityPrefWritable) {
           value = ((EntityPrefWritable) itemPref).getPrefValue();

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CopreferredItemsMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CopreferredItemsMapper.java?rev=930890&r1=930889&r2=930890&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CopreferredItemsMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CopreferredItemsMapper.java Mon Apr  5 16:05:41 2010
@@ -20,22 +20,25 @@ package org.apache.mahout.cf.taste.hadoo
 import java.io.IOException;
 
 import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.mahout.cf.taste.hadoop.EntityWritable;
-import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPairWritable;
-import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthArrayWritable;
-import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
 
 /**
  * map out each pair of items that appears in the same user-vector together with the multiplied vector lengths
  * of the associated item vectors
  */
-public final  class CopreferredItemsMapper
-    extends Mapper<EntityWritable,ItemPrefWithLengthArrayWritable,ItemPairWritable,FloatWritable> {
+public final class CopreferredItemsMapper extends MapReduceBase
+    implements Mapper<LongWritable,ItemPrefWithLengthArrayWritable,ItemPairWritable,FloatWritable> {
 
   @Override
-  protected void map(EntityWritable user, ItemPrefWithLengthArrayWritable itemPrefsArray, Context context)
-      throws IOException, InterruptedException {
+  public void map(LongWritable user,
+                  ItemPrefWithLengthArrayWritable itemPrefsArray,
+                  OutputCollector<ItemPairWritable,FloatWritable> output,
+                  Reporter reporter)
+      throws IOException {
 
     ItemPrefWithLengthWritable[] itemPrefs = itemPrefsArray.getItemPrefs();
 
@@ -49,7 +52,7 @@ public final  class CopreferredItemsMapp
         long itemAID = Math.min(itemNID, itemM.getItemID());
         long itemBID = Math.max(itemNID, itemM.getItemID());
         ItemPairWritable pair = new ItemPairWritable(itemAID, itemBID, itemNLength * itemM.getLength());
-        context.write(pair, new FloatWritable(itemNValue * itemM.getPrefValue()));
+        output.collect(pair, new FloatWritable(itemNValue * itemM.getPrefValue()));
       }
     }
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CosineSimilarityReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CosineSimilarityReducer.java?rev=930890&r1=930889&r2=930890&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CosineSimilarityReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CosineSimilarityReducer.java Mon Apr  5 16:05:41 2010
@@ -18,34 +18,36 @@
 package org.apache.mahout.cf.taste.hadoop.similarity.item;
 
 import java.io.IOException;
+import java.util.Iterator;
 
 import org.apache.hadoop.io.DoubleWritable;
 import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
 import org.apache.mahout.cf.taste.hadoop.EntityEntityWritable;
-import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPairWritable;
 
 /**
  * Finally compute the cosine for each item-pair
  */
-public final class CosineSimilarityReducer
-    extends Reducer<ItemPairWritable,FloatWritable, EntityEntityWritable,DoubleWritable> {
+public final class CosineSimilarityReducer extends MapReduceBase
+    implements Reducer<ItemPairWritable,FloatWritable,EntityEntityWritable,DoubleWritable> {
 
   @Override
-  protected void reduce(ItemPairWritable pair, Iterable<FloatWritable> numeratorSummands, Context context)
-      throws IOException, InterruptedException {
+  public void reduce(ItemPairWritable pair,
+                     Iterator<FloatWritable> numeratorSummands,
+                     OutputCollector<EntityEntityWritable,DoubleWritable> output,
+                     Reporter reporter)
+      throws IOException {
 
     double numerator = 0.0;
-
-    for (FloatWritable nummeratorSummand : numeratorSummands) {
-      numerator += nummeratorSummand.get();
+    while (numeratorSummands.hasNext()) {
+      numerator += numeratorSummands.next().get();
     }
-
     double denominator = pair.getMultipliedLength();
-
     double cosine = numerator / denominator;
-
-    context.write(pair.getItemItemWritable(), new DoubleWritable(cosine));
+    output.collect(pair.getItemItemWritable(), new DoubleWritable(cosine));
   }
 
 }

Copied: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPairWritable.java (from r930806, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPairWritable.java)
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPairWritable.java?p2=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPairWritable.java&p1=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPairWritable.java&r1=930806&r2=930890&rev=930890&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPairWritable.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPairWritable.java Mon Apr  5 16:05:41 2010
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.mahout.cf.taste.hadoop.similarity.item.writables;
+package org.apache.mahout.cf.taste.hadoop.similarity.item;
 
 import java.io.DataInput;
 import java.io.DataOutput;

Copied: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPrefWithLengthArrayWritable.java (from r930806, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthArrayWritable.java)
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPrefWithLengthArrayWritable.java?p2=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPrefWithLengthArrayWritable.java&p1=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthArrayWritable.java&r1=930806&r2=930890&rev=930890&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthArrayWritable.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPrefWithLengthArrayWritable.java Mon Apr  5 16:05:41 2010
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.mahout.cf.taste.hadoop.similarity.item.writables;
+package org.apache.mahout.cf.taste.hadoop.similarity.item;
 
 import org.apache.hadoop.io.ArrayWritable;
 

Copied: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPrefWithLengthWritable.java (from r930806, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthWritable.java)
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPrefWithLengthWritable.java?p2=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPrefWithLengthWritable.java&p1=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthWritable.java&r1=930806&r2=930890&rev=930890&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthWritable.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPrefWithLengthWritable.java Mon Apr  5 16:05:41 2010
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.mahout.cf.taste.hadoop.similarity.item.writables;
+package org.apache.mahout.cf.taste.hadoop.similarity.item;
 
 import java.io.DataInput;
 import java.io.DataOutput;

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java?rev=930890&r1=930889&r2=930890&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java Mon Apr  5 16:05:41 2010
@@ -17,32 +17,24 @@
 
 package org.apache.mahout.cf.taste.hadoop.similarity.item;
 
-import java.io.IOException;
 import java.util.Map;
 
+import org.apache.commons.cli2.Option;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.DoubleWritable;
 import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
-import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.SequenceFileInputFormat;
+import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.apache.hadoop.mapred.TextInputFormat;
+import org.apache.hadoop.mapred.TextOutputFormat;
 import org.apache.hadoop.util.ToolRunner;
+import org.apache.mahout.cf.taste.hadoop.EntityEntityWritable;
 import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable;
 import org.apache.mahout.cf.taste.hadoop.EntityPrefWritableArrayWritable;
-import org.apache.mahout.cf.taste.hadoop.EntityWritable;
-import org.apache.mahout.cf.taste.hadoop.EntityEntityWritable;
-import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPairWritable;
-import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthArrayWritable;
-import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthWritable;
+import org.apache.mahout.cf.taste.hadoop.ToUserPrefsMapper;
 import org.apache.mahout.common.AbstractJob;
 
 /**
@@ -108,7 +100,6 @@ public final class ItemSimilarityJob ext
   public int run(String[] args) throws Exception {
 
     Map<String,String> parsedArgs = AbstractJob.parseArguments(args);
-
     if (parsedArgs == null) {
       return -1;
     }
@@ -121,84 +112,47 @@ public final class ItemSimilarityJob ext
     String itemVectorsPath = tempDirPath + "/itemVectors";
     String userVectorsPath = tempDirPath + "/userVectors";
 
-    Job itemVectors = createJob(originalConf, "itemVectors", inputPath, itemVectorsPath, UserPrefsPerItemMapper.class,
-        EntityWritable.class, EntityPrefWritable.class, ToItemVectorReducer.class, EntityWritable.class,
-        EntityPrefWritableArrayWritable.class, TextInputFormat.class, SequenceFileOutputFormat.class, true);
-
-    itemVectors.waitForCompletion(true);
-
-    Job userVectors = createJob(originalConf, "userVectors", itemVectorsPath, userVectorsPath,
-        PreferredItemsPerUserMapper.class, EntityWritable.class, ItemPrefWithLengthWritable.class,
-        PreferredItemsPerUserReducer.class, EntityWritable.class, ItemPrefWithLengthArrayWritable.class);
-
-    userVectors.waitForCompletion(true);
-
-    Job similarity = createJob(originalConf, "similarity", userVectorsPath, outputPath,
-        CopreferredItemsMapper.class, ItemPairWritable.class, FloatWritable.class, CosineSimilarityReducer.class,
-        EntityEntityWritable.class, DoubleWritable.class, SequenceFileInputFormat.class, TextOutputFormat.class, false);
-
-    similarity.waitForCompletion(true);
+    JobConf itemVectors = prepareJobConf(inputPath,
+                                         itemVectorsPath,
+                                         TextInputFormat.class,
+                                         ToUserPrefsMapper.class,
+                                         LongWritable.class,
+                                         EntityPrefWritable.class,
+                                         ToItemVectorReducer.class,
+                                         LongWritable.class,
+                                         EntityPrefWritableArrayWritable.class,
+                                         SequenceFileOutputFormat.class);
+    JobClient.runJob(itemVectors);
+
+    JobConf userVectors = prepareJobConf(itemVectorsPath,
+                                         userVectorsPath,
+                                         SequenceFileInputFormat.class,
+                                         PreferredItemsPerUserMapper.class,
+                                         LongWritable.class,
+                                         ItemPrefWithLengthWritable.class,
+                                         PreferredItemsPerUserReducer.class,
+                                         LongWritable.class,
+                                         ItemPrefWithLengthArrayWritable.class,
+                                         SequenceFileOutputFormat.class);
+    JobClient.runJob(userVectors);
+
+    JobConf similarity = prepareJobConf(userVectorsPath,
+                                        outputPath,
+                                        SequenceFileInputFormat.class,
+                                        CopreferredItemsMapper.class,
+                                        ItemPairWritable.class,
+                                        FloatWritable.class,
+                                        CosineSimilarityReducer.class,
+                                        EntityEntityWritable.class,
+                                        DoubleWritable.class,
+                                        TextOutputFormat.class);
+    JobClient.runJob(similarity);
 
     return 0;
   }
 
   public static void main(String[] args) throws Exception {
-    ToolRunner.run(new Configuration(), new ItemSimilarityJob(), args);
-  }
-
-  protected static Job createJob(Configuration conf,
-                                 String jobName,
-                                 String inputPath,
-                                 String outputPath,
-                                 Class<? extends Mapper> mapperClass,
-                                 Class<? extends Writable> mapKeyOutClass,
-                                 Class<? extends Writable> mapValueOutClass,
-                                 Class<? extends Reducer> reducerClass,
-                                 Class<? extends Writable> keyOutClass,
-                                 Class<? extends Writable> valueOutClass) throws IOException {
-    return createJob(conf, jobName, inputPath, outputPath, mapperClass, mapKeyOutClass,
-        mapValueOutClass, reducerClass, keyOutClass, valueOutClass, SequenceFileInputFormat.class,
-        SequenceFileOutputFormat.class, true);
-  }
-
-  protected static Job createJob(Configuration conf,
-                                 String jobName,
-                                 String inputPath,
-                                 String outputPath,
-                                 Class<? extends Mapper> mapperClass,
-                                 Class<? extends Writable> mapKeyOutClass,
-                                 Class<? extends Writable> mapValueOutClass,
-                                 Class<? extends Reducer> reducerClass,
-                                 Class<? extends Writable> keyOutClass,
-                                 Class<? extends Writable> valueOutClass,
-                                 Class<? extends FileInputFormat> fileInputFormatClass,
-                                 Class<? extends FileOutputFormat> fileOutputFormatClass,
-                                 boolean compress) throws IOException {
-
-    Job job = new Job(conf, jobName);
-
-    FileSystem fs = FileSystem.get(conf);
-
-    Path inputPathPath = new Path(inputPath).makeQualified(fs);
-    Path outputPathPath = new Path(outputPath).makeQualified(fs);
-
-    FileInputFormat.setInputPaths(job, inputPathPath);
-    job.setInputFormatClass(fileInputFormatClass);
-
-    job.setMapperClass(mapperClass);
-    job.setMapOutputKeyClass(mapKeyOutClass);
-    job.setMapOutputValueClass(mapValueOutClass);
-
-    job.setReducerClass(reducerClass);
-    job.setOutputKeyClass(keyOutClass);
-    job.setOutputValueClass(valueOutClass);
-
-
-    FileOutputFormat.setOutputPath(job, outputPathPath);
-    FileOutputFormat.setCompressOutput(job, compress);
-    job.setOutputFormatClass(fileOutputFormatClass);
-
-    return job;
+    ToolRunner.run(new ItemSimilarityJob(), args);
   }
 
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java?rev=930890&r1=930889&r2=930890&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java Mon Apr  5 16:05:41 2010
@@ -19,22 +19,26 @@ package org.apache.mahout.cf.taste.hadoo
 
 import java.io.IOException;
 
-import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
 import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable;
 import org.apache.mahout.cf.taste.hadoop.EntityPrefWritableArrayWritable;
-import org.apache.mahout.cf.taste.hadoop.EntityWritable;
-import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthWritable;
 
 /**
  * for each item-vector, we compute its length here and map out all entries with the user as key,
  * so we can create the user-vectors in the reducer
  */
-public final class PreferredItemsPerUserMapper
-    extends Mapper<EntityWritable, EntityPrefWritableArrayWritable,EntityWritable,ItemPrefWithLengthWritable> {
+public final class PreferredItemsPerUserMapper extends MapReduceBase
+    implements Mapper<LongWritable,EntityPrefWritableArrayWritable,LongWritable,ItemPrefWithLengthWritable> {
 
   @Override
-  protected void map(EntityWritable item, EntityPrefWritableArrayWritable userPrefsArray, Context context)
-      throws IOException, InterruptedException {
+  public void map(LongWritable item,
+                  EntityPrefWritableArrayWritable userPrefsArray,
+                  OutputCollector<LongWritable,ItemPrefWithLengthWritable> output,
+                  Reporter reporter) throws IOException {
 
     EntityPrefWritable[] userPrefs = userPrefsArray.getPrefs();
 
@@ -47,8 +51,8 @@ public final class PreferredItemsPerUser
     length = Math.sqrt(length);
 
     for (EntityPrefWritable userPref : userPrefs) {
-      context.write(new EntityWritable(userPref.getID()),
-          new ItemPrefWithLengthWritable(item.getID(), length, userPref.getPrefValue()));
+      output.collect(new LongWritable(userPref.getID()),
+          new ItemPrefWithLengthWritable(item.get(), length, userPref.getPrefValue()));
     }
 
   }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java?rev=930890&r1=930889&r2=930890&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java Mon Apr  5 16:05:41 2010
@@ -19,27 +19,32 @@ package org.apache.mahout.cf.taste.hadoo
 
 import java.io.IOException;
 import java.util.HashSet;
+import java.util.Iterator;
 import java.util.Set;
 
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.mahout.cf.taste.hadoop.EntityWritable;
-import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthArrayWritable;
-import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
 
-public final class PreferredItemsPerUserReducer
-    extends Reducer<EntityWritable,ItemPrefWithLengthWritable, EntityWritable,ItemPrefWithLengthArrayWritable> {
+public final class PreferredItemsPerUserReducer extends MapReduceBase
+    implements Reducer<LongWritable,ItemPrefWithLengthWritable, LongWritable,ItemPrefWithLengthArrayWritable> {
 
   @Override
-  protected void reduce(EntityWritable user, Iterable<ItemPrefWithLengthWritable> itemPrefs, Context context)
-      throws IOException, InterruptedException {
+  public void reduce(LongWritable user,
+                     Iterator<ItemPrefWithLengthWritable> itemPrefs,
+                     OutputCollector<LongWritable,ItemPrefWithLengthArrayWritable> output,
+                     Reporter reporter)
+      throws IOException {
 
     Set<ItemPrefWithLengthWritable> itemPrefsWithLength = new HashSet<ItemPrefWithLengthWritable>();
 
-    for (ItemPrefWithLengthWritable itemPrefWithLength : itemPrefs) {
-      itemPrefsWithLength.add(itemPrefWithLength.deepCopy());
+    while (itemPrefs.hasNext()) {
+      itemPrefsWithLength.add(itemPrefs.next().clone());
     }
 
-    context.write(user, new ItemPrefWithLengthArrayWritable(
+    output.collect(user, new ItemPrefWithLengthArrayWritable(
         itemPrefsWithLength.toArray(new ItemPrefWithLengthWritable[itemPrefsWithLength.size()])));
   }
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java?rev=930890&r1=930889&r2=930890&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java Mon Apr  5 16:05:41 2010
@@ -19,31 +19,39 @@ package org.apache.mahout.cf.taste.hadoo
 
 import java.io.IOException;
 import java.util.HashSet;
+import java.util.Iterator;
 import java.util.Set;
 
-import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
 import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable;
 import org.apache.mahout.cf.taste.hadoop.EntityPrefWritableArrayWritable;
-import org.apache.mahout.cf.taste.hadoop.EntityWritable;
 
 /**
  * For each single item, collect all users with their preferences
  * (thereby building the item vectors of the user-item-matrix)
  */
 public final class ToItemVectorReducer
-    extends Reducer<EntityWritable, EntityPrefWritable, EntityWritable, EntityPrefWritableArrayWritable> {
+    extends MapReduceBase implements
+    Reducer<LongWritable,EntityPrefWritable,LongWritable,EntityPrefWritableArrayWritable> {
 
   @Override
-  protected void reduce(EntityWritable item, Iterable<EntityPrefWritable> userPrefs, Context context)
-      throws IOException, InterruptedException {
+  public void reduce(LongWritable item,
+                     Iterator<EntityPrefWritable> userPrefs,
+                     OutputCollector<LongWritable,EntityPrefWritableArrayWritable> output,
+                     Reporter reporter)
+      throws IOException {
 
     Set<EntityPrefWritable> collectedUserPrefs = new HashSet<EntityPrefWritable>();
 
-    for (EntityPrefWritable userPref : userPrefs) {
-      collectedUserPrefs.add(userPref.clone());
+    while (userPrefs.hasNext()) {
+      collectedUserPrefs.add(userPrefs.next().clone());
     }
 
-    context.write(item, new EntityPrefWritableArrayWritable(
+    output.collect(item, new EntityPrefWritableArrayWritable(
         collectedUserPrefs.toArray(new EntityPrefWritable[collectedUserPrefs.size()])));
   }
 

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java?rev=930890&r1=930889&r2=930890&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java Mon Apr  5 16:05:41 2010
@@ -17,12 +17,6 @@
 
 package org.apache.mahout.cf.taste.hadoop.similarity.item;
 
-import static org.easymock.EasyMock.eq;
-import static org.easymock.EasyMock.expect;
-import static org.easymock.classextension.EasyMock.createMock;
-import static org.easymock.classextension.EasyMock.replay;
-import static org.easymock.classextension.EasyMock.verify;
-
 import java.io.BufferedReader;
 import java.io.BufferedWriter;
 import java.io.File;
@@ -39,18 +33,15 @@ import org.apache.hadoop.io.DoubleWritab
 import org.apache.hadoop.io.FloatWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.easymock.classextension.EasyMock;
+import org.easymock.IArgumentMatcher;
+
 import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable;
 import org.apache.mahout.cf.taste.hadoop.EntityPrefWritableArrayWritable;
-import org.apache.mahout.cf.taste.hadoop.EntityWritable;
 import org.apache.mahout.cf.taste.hadoop.EntityEntityWritable;
-import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPairWritable;
-import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthArrayWritable;
-import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthWritable;
+import org.apache.mahout.cf.taste.hadoop.ToUserPrefsMapper;
 import org.apache.mahout.common.MahoutTestCase;
-import org.easymock.IArgumentMatcher;
-import org.easymock.classextension.EasyMock;
 
 /**
  * Unit tests for the mappers and reducers in org.apache.mahout.cf.taste.hadoop.similarity
@@ -61,36 +52,41 @@ public class ItemSimilarityTest extends 
 
 
   public void testUserPrefsPerItemMapper() throws Exception {
-    Mapper.Context ctx = createMock(Mapper.Context.class);
-    ctx.write(new EntityWritable(34L), new EntityPrefWritable(12L, 2.3f));
-    replay(ctx);
+    OutputCollector<LongWritable,LongWritable> output =
+        EasyMock.createMock(OutputCollector.class);
+    output.collect(new LongWritable(34L), new EntityPrefWritable(12L, 2.3f));
+    EasyMock.replay(output);
 
-    new UserPrefsPerItemMapper().map(new LongWritable(), new Text("12,34,2.3"), ctx);
+    new ToUserPrefsMapper().map(new LongWritable(), new Text("12,34,2.3"), output, null);
 
-    verify(ctx);
+    EasyMock.verify(output);
   }
 
   public void testToItemVectorReducer() throws Exception {
 
-    List<EntityPrefWritable> userPrefs = Arrays.asList(new EntityPrefWritable(34L, 1.0f), new EntityPrefWritable(56L, 2.0f));
+    List<EntityPrefWritable> userPrefs = Arrays.asList(
+        new EntityPrefWritable(34L, 1.0f), new EntityPrefWritable(56L, 2.0f));
 
-    Reducer.Context ctx = createMock(Reducer.Context.class);
+    OutputCollector<LongWritable,EntityPrefWritableArrayWritable> output =
+        EasyMock.createMock(OutputCollector.class);
 
-    ctx.write(eq(new EntityWritable(12L)), equalToUserPrefs(userPrefs));
+    output.collect(EasyMock.eq(new LongWritable(12L)), equalToUserPrefs(userPrefs));
 
-    replay(ctx);
+    EasyMock.replay(output);
 
-    new ToItemVectorReducer().reduce(new EntityWritable(12L), userPrefs, ctx);
+    new ToItemVectorReducer().reduce(new LongWritable(12L), userPrefs.iterator(), output, null);
 
-    verify(ctx);
+    EasyMock.verify(output);
   }
 
-  static EntityPrefWritableArrayWritable equalToUserPrefs(final Collection<EntityPrefWritable> prefsToCheck) {
+  static EntityPrefWritableArrayWritable equalToUserPrefs(
+      final Collection<EntityPrefWritable> prefsToCheck) {
     EasyMock.reportMatcher(new IArgumentMatcher() {
       @Override
       public boolean matches(Object argument) {
         if (argument instanceof EntityPrefWritableArrayWritable) {
-          EntityPrefWritableArrayWritable userPrefArray = (EntityPrefWritableArrayWritable) argument;
+          EntityPrefWritableArrayWritable userPrefArray =
+              (EntityPrefWritableArrayWritable) argument;
           Set<EntityPrefWritable> set = new HashSet<EntityPrefWritable>();
           set.addAll(Arrays.asList(userPrefArray.getPrefs()));
 
@@ -116,50 +112,56 @@ public class ItemSimilarityTest extends 
   }
 
   public void testPreferredItemsPerUserMapper() throws Exception {
-    Mapper.Context ctx = createMock(Mapper.Context.class);
-    EntityPrefWritableArrayWritable userPrefs = createMock(EntityPrefWritableArrayWritable.class);
-
-    expect(userPrefs.getPrefs())
-        .andReturn(new EntityPrefWritable[] { new EntityPrefWritable(12L, 2.0f), new EntityPrefWritable(56L, 3.0f) });
+    OutputCollector<LongWritable,ItemPrefWithLengthWritable> output =
+        EasyMock.createMock(OutputCollector.class);
+    EntityPrefWritableArrayWritable userPrefs =
+        EasyMock.createMock(EntityPrefWritableArrayWritable.class);
+
+    EasyMock.expect(userPrefs.getPrefs()).andReturn(
+        new EntityPrefWritable[] {
+            new EntityPrefWritable(12L, 2.0f),
+            new EntityPrefWritable(56L, 3.0f) });
 
     double length = Math.sqrt(Math.pow(2.0f, 2) + Math.pow(3.0f, 2));
 
-    ctx.write(new EntityWritable(12L), new ItemPrefWithLengthWritable(34L, length, 2.0f));
-    ctx.write(new EntityWritable(56L), new ItemPrefWithLengthWritable(34L, length, 3.0f));
+    output.collect(new LongWritable(12L), new ItemPrefWithLengthWritable(34L, length, 2.0f));
+    output.collect(new LongWritable(56L), new ItemPrefWithLengthWritable(34L, length, 3.0f));
 
-    replay(ctx, userPrefs);
+    EasyMock.replay(output, userPrefs);
 
-    new PreferredItemsPerUserMapper().map(new EntityWritable(34L), userPrefs, ctx);
+    new PreferredItemsPerUserMapper().map(new LongWritable(34L), userPrefs, output, null);
 
-    verify(ctx, userPrefs);
+    EasyMock.verify(output, userPrefs);
   }
 
   public void testPreferredItemsPerUserReducer() throws Exception {
 
     List<ItemPrefWithLengthWritable> itemPrefs =
-        Arrays.asList(new ItemPrefWithLengthWritable(34L, 5.0, 1.0f), new ItemPrefWithLengthWritable(56L, 7.0, 2.0f));
+        Arrays.asList(new ItemPrefWithLengthWritable(34L, 5.0, 1.0f),
+                      new ItemPrefWithLengthWritable(56L, 7.0, 2.0f));
 
-    Reducer.Context ctx = createMock(Reducer.Context.class);
+    OutputCollector<LongWritable,ItemPrefWithLengthArrayWritable> output =
+        EasyMock.createMock(OutputCollector.class);
 
-    ctx.write(eq(new EntityWritable(12L)), equalToItemPrefs(itemPrefs));
+    output.collect(EasyMock.eq(new LongWritable(12L)), equalToItemPrefs(itemPrefs));
 
-    replay(ctx);
+    EasyMock.replay(output);
 
-    new PreferredItemsPerUserReducer().reduce(new EntityWritable(12L), itemPrefs, ctx);
+    new PreferredItemsPerUserReducer().reduce(
+        new LongWritable(12L), itemPrefs.iterator(), output, null);
 
-    verify(ctx);
+    EasyMock.verify(output);
   }
 
-  static ItemPrefWithLengthArrayWritable equalToItemPrefs(final Collection<ItemPrefWithLengthWritable> prefsToCheck) {
+  static ItemPrefWithLengthArrayWritable equalToItemPrefs(
+      final Collection<ItemPrefWithLengthWritable> prefsToCheck) {
     EasyMock.reportMatcher(new IArgumentMatcher() {
       @Override
       public boolean matches(Object argument) {
         if (argument instanceof ItemPrefWithLengthArrayWritable) {
           ItemPrefWithLengthArrayWritable itemPrefArray = (ItemPrefWithLengthArrayWritable) argument;
-          Set<ItemPrefWithLengthWritable> set = new HashSet<ItemPrefWithLengthWritable>();
-          for (ItemPrefWithLengthWritable itemPref : itemPrefArray.getItemPrefs()) {
-            set.add(itemPref);
-          }
+          Collection<ItemPrefWithLengthWritable> set = new HashSet<ItemPrefWithLengthWritable>();
+          set.addAll(Arrays.asList(itemPrefArray.getItemPrefs()));
 
           if (set.size() != prefsToCheck.size()) {
             return false;
@@ -183,40 +185,45 @@ public class ItemSimilarityTest extends 
   }
 
   public void testCopreferredItemsMapper() throws Exception {
-    Mapper.Context ctx = createMock(Mapper.Context.class);
-    ItemPrefWithLengthArrayWritable itemPrefs = createMock(ItemPrefWithLengthArrayWritable.class);
+    OutputCollector<ItemPairWritable,FloatWritable> output =
+        EasyMock.createMock(OutputCollector.class);
+    ItemPrefWithLengthArrayWritable itemPrefs =
+        EasyMock.createMock(ItemPrefWithLengthArrayWritable.class);
 
-    expect(itemPrefs.getItemPrefs()).andReturn(new ItemPrefWithLengthWritable[] {
+    EasyMock.expect(itemPrefs.getItemPrefs()).andReturn(new ItemPrefWithLengthWritable[] {
         new ItemPrefWithLengthWritable(34L, 2.0, 1.0f), new ItemPrefWithLengthWritable(56L, 3.0, 2.0f),
         new ItemPrefWithLengthWritable(78L, 4.0, 3.0f) });
 
-    ctx.write(new ItemPairWritable(34L, 56L, 6.0), new FloatWritable(2.0f));
-    ctx.write(new ItemPairWritable(34L, 78L, 8.0), new FloatWritable(3.0f));
-    ctx.write(new ItemPairWritable(56L, 78L, 12.0), new FloatWritable(6.0f));
+    output.collect(new ItemPairWritable(34L, 56L, 6.0), new FloatWritable(2.0f));
+    output.collect(new ItemPairWritable(34L, 78L, 8.0), new FloatWritable(3.0f));
+    output.collect(new ItemPairWritable(56L, 78L, 12.0), new FloatWritable(6.0f));
 
-    replay(ctx, itemPrefs);
+    EasyMock.replay(output, itemPrefs);
 
-    new CopreferredItemsMapper().map(new EntityWritable(), itemPrefs, ctx);
+    new CopreferredItemsMapper().map(new LongWritable(), itemPrefs, output, null);
 
-    verify(ctx, itemPrefs);
+    EasyMock.verify(output, itemPrefs);
   }
 
   public void testCosineSimilarityReducer() throws Exception {
-    Reducer.Context ctx = createMock(Reducer.Context.class);
+    OutputCollector<EntityEntityWritable,DoubleWritable> output =
+        EasyMock.createMock(OutputCollector.class);
 
-    ctx.write(new EntityEntityWritable(12L, 34L), new DoubleWritable(0.5d));
+    output.collect(new EntityEntityWritable(12L, 34L), new DoubleWritable(0.5d));
 
-    replay(ctx);
+    EasyMock.replay(output);
 
     new CosineSimilarityReducer().reduce(new ItemPairWritable(12L, 34L, 20.0),
-        Arrays.asList(new FloatWritable(5.0f), new FloatWritable(5.0f)), ctx);
+        Arrays.asList(new FloatWritable(5.0f),
+                      new FloatWritable(5.0f)).iterator(), output, null);
 
-    verify(ctx);
+    EasyMock.verify(output);
   }
 
   public void testCompleteJob() throws Exception {
 
-    String tmpDirPath = System.getProperty("java.io.tmpdir")+ '/' +ItemSimilarityTest.class.getCanonicalName();
+    String tmpDirPath = System.getProperty("java.io.tmpdir") +
+          ItemSimilarityTest.class.getCanonicalName();
     File tmpDir = new File(tmpDirPath);
 
     try {
@@ -250,14 +257,16 @@ public class ItemSimilarityTest extends 
       Configuration conf = new Configuration();
       conf.set("mapred.input.dir", tmpDirPath+"/prefs.txt");
       conf.set("mapred.output.dir", tmpDirPath+"/output");
+      conf.set("mapred.output.compress", Boolean.FALSE.toString());
 
       similarityJob.setConf(conf);
 
       similarityJob.run(new String[] { "--tempDir", tmpDirPath+"/tmp"});
 
-      BufferedReader reader = new BufferedReader(new FileReader(tmpDirPath+"/output/part-r-00000"));
+      String filePath = tmpDirPath+"/output/part-00000";
+      BufferedReader reader = new BufferedReader(new FileReader(filePath));
 
-      String line = null;
+      String line;
       int currentLine = 1;
       while ( (line = reader.readLine()) != null) {