You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2010/05/09 10:44:30 UTC

svn commit: r942513 - in /lucene/mahout/trunk/core/src: main/java/org/apache/mahout/cf/taste/hadoop/ main/java/org/apache/mahout/cf/taste/hadoop/item/ main/java/org/apache/mahout/cf/taste/hadoop/pseudo/ main/java/org/apache/mahout/cf/taste/hadoop/simil...

Author: srowen
Date: Sun May  9 08:44:29 2010
New Revision: 942513

URL: http://svn.apache.org/viewvc?rev=942513&view=rev
Log:
More possible improvements -- using VLongWritable and adjusting combiner settings, fixed 'phase' arguments

Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityCountWritable.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityEntityWritable.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityPrefWritable.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToEntityPrefsMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateCombiner.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexReducer.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/PartialMultiplyReducer.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorSplitterMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceReducer.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/VectorOrPrefWritable.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderReducer.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/UserIDsMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CopreferredItemsMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOnePrefsToDiffsReducer.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityCountWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityCountWritable.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityCountWritable.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityCountWritable.java Sun May  9 08:44:29 2010
@@ -21,12 +21,12 @@ import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
 
-import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.VLongWritable;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableUtils;
 
 /** A {@link Writable} encapsulating an item ID and a count . */
-public final class EntityCountWritable extends LongWritable implements Cloneable {
+public final class EntityCountWritable extends VLongWritable implements Cloneable {
 
   private int count;
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityEntityWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityEntityWritable.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityEntityWritable.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityEntityWritable.java Sun May  9 08:44:29 2010
@@ -22,6 +22,7 @@ import java.io.DataOutput;
 import java.io.IOException;
 
 import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.io.WritableUtils;
 import org.apache.mahout.common.RandomUtils;
 
 /** A {@link WritableComparable} encapsulating two items. */
@@ -55,14 +56,14 @@ public final class EntityEntityWritable
   
   @Override
   public void write(DataOutput out) throws IOException {
-    out.writeLong(aID);
-    out.writeLong(bID);
+    WritableUtils.writeVLong(out, aID);
+    WritableUtils.writeVLong(out, bID);
   }
   
   @Override
   public void readFields(DataInput in) throws IOException {
-    aID = in.readLong();
-    bID = in.readLong();
+    aID = WritableUtils.readVLong(in);
+    bID = WritableUtils.readVLong(in);
   }
   
   @Override

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityPrefWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityPrefWritable.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityPrefWritable.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityPrefWritable.java Sun May  9 08:44:29 2010
@@ -21,12 +21,12 @@ import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
 
-import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.VLongWritable;
 import org.apache.hadoop.io.Writable;
 import org.apache.mahout.common.RandomUtils;
 
 /** A {@link Writable} encapsulating an item ID and a preference value. */
-public final class EntityPrefWritable extends LongWritable implements Cloneable {
+public final class EntityPrefWritable extends VLongWritable implements Cloneable {
   
   private float prefValue;
   

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java Sun May  9 08:44:29 2010
@@ -24,6 +24,7 @@ import java.util.ArrayList;
 import java.util.List;
 
 import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableUtils;
 import org.apache.mahout.cf.taste.impl.recommender.GenericRecommendedItem;
 import org.apache.mahout.cf.taste.recommender.RecommendedItem;
 
@@ -56,7 +57,7 @@ public final class RecommendedItemsWrita
   public void write(DataOutput out) throws IOException {
     out.writeInt(recommended.size());
     for (RecommendedItem item : recommended) {
-      out.writeLong(item.getItemID());
+      WritableUtils.writeVLong(out, item.getItemID());
       out.writeFloat(item.getValue());
     }
     
@@ -67,7 +68,7 @@ public final class RecommendedItemsWrita
     int size = in.readInt();
     recommended = new ArrayList<RecommendedItem>(size);
     for (int i = 0; i < size; i++) {
-      long itemID = in.readLong();
+      long itemID = WritableUtils.readVLong(in);
       float value = in.readFloat();
       RecommendedItem recommendedItem = new GenericRecommendedItem(itemID, value);
       recommended.add(recommendedItem);

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToEntityPrefsMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToEntityPrefsMapper.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToEntityPrefsMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToEntityPrefsMapper.java Sun May  9 08:44:29 2010
@@ -19,6 +19,7 @@ package org.apache.mahout.cf.taste.hadoo
 
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.VLongWritable;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.MapReduceBase;
 import org.apache.hadoop.mapred.Mapper;
@@ -30,7 +31,7 @@ import java.io.IOException;
 import java.util.regex.Pattern;
 
 abstract class ToEntityPrefsMapper extends MapReduceBase implements
-    Mapper<LongWritable,Text,LongWritable,LongWritable> {
+    Mapper<LongWritable,Text,VLongWritable,VLongWritable> {
 
   static final String TRANSPOSE_USER_ITEM = "transposeUserItem";
 
@@ -53,7 +54,7 @@ abstract class ToEntityPrefsMapper exten
   @Override
   public void map(LongWritable key,
                   Text value,
-                  OutputCollector<LongWritable,LongWritable> output,
+                  OutputCollector<VLongWritable,VLongWritable> output,
                   Reporter reporter) throws IOException {
     String[] tokens = ToEntityPrefsMapper.DELIMITER.split(value.toString());
     long userID = Long.parseLong(tokens[0]);
@@ -67,10 +68,10 @@ abstract class ToEntityPrefsMapper exten
       itemID = temp;
     }
     if (booleanData) {
-      output.collect(new LongWritable(userID), new LongWritable(itemID));
+      output.collect(new VLongWritable(userID), new VLongWritable(itemID));
     } else {
       float prefValue = tokens.length > 2 ? Float.parseFloat(tokens[2]) : 1.0f;
-      output.collect(new LongWritable(userID), new EntityPrefWritable(itemID, prefValue));
+      output.collect(new VLongWritable(userID), new EntityPrefWritable(itemID, prefValue));
     }
   }
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java Sun May  9 08:44:29 2010
@@ -17,7 +17,6 @@
 
 package org.apache.mahout.cf.taste.hadoop;
 
-import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
 
 /**
@@ -25,7 +24,7 @@ import org.apache.hadoop.io.Text;
  * 
  * <p>
  * Intended for use with {@link org.apache.hadoop.mapred.TextInputFormat}; accepts line number / line pairs as
- * {@link LongWritable}/{@link Text} pairs.
+ * {@link org.apache.hadoop.io.VLongWritable}/{@link Text} pairs.
  * </p>
  * 
  * <p>
@@ -36,7 +35,7 @@ import org.apache.hadoop.io.Text;
  * <h1>Output</h1>
  * 
  * <p>
- * Outputs the user ID as a {@link LongWritable} mapped to the item ID and preference as a
+ * Outputs the user ID as a {@link org.apache.hadoop.io.VLongWritable} mapped to the item ID and preference as a
  * {@link EntityPrefWritable}.
  * </p>
  */

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java Sun May  9 08:44:29 2010
@@ -30,8 +30,8 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathFilter;
 import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.VLongWritable;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.MapReduceBase;
 import org.apache.hadoop.mapred.OutputCollector;
@@ -46,7 +46,7 @@ import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.map.OpenIntLongHashMap;
 
 public final class AggregateAndRecommendReducer extends MapReduceBase implements
-    Reducer<LongWritable,VectorWritable,LongWritable,RecommendedItemsWritable> {
+    Reducer<VLongWritable,VectorWritable,VLongWritable,RecommendedItemsWritable> {
 
   static final String ITEMID_INDEX_PATH = "itemIDIndexPath";
   static final String RECOMMENDATIONS_PER_USER = "recommendationsPerUser";
@@ -69,7 +69,7 @@ public final class AggregateAndRecommend
       Path itemIDIndexPath = new Path(jobConf.get(ITEMID_INDEX_PATH)).makeQualified(fs);
       indexItemIDMap = new OpenIntLongHashMap();
       IntWritable index = new IntWritable();
-      LongWritable id = new LongWritable();
+      VLongWritable id = new VLongWritable();
       for (FileStatus status : fs.listStatus(itemIDIndexPath, PARTS_FILTER)) {
         String path = status.getPath().toString();
         SequenceFile.Reader reader =
@@ -85,9 +85,9 @@ public final class AggregateAndRecommend
   }
 
   @Override
-  public void reduce(LongWritable key,
+  public void reduce(VLongWritable key,
                      Iterator<VectorWritable> values,
-                     OutputCollector<LongWritable, RecommendedItemsWritable> output,
+                     OutputCollector<VLongWritable,RecommendedItemsWritable> output,
                      Reporter reporter) throws IOException {
     if (!values.hasNext()) {
       return;

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateCombiner.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateCombiner.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateCombiner.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateCombiner.java Sun May  9 08:44:29 2010
@@ -20,7 +20,7 @@ package org.apache.mahout.cf.taste.hadoo
 import java.io.IOException;
 import java.util.Iterator;
 
-import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.VLongWritable;
 import org.apache.hadoop.mapred.MapReduceBase;
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.Reducer;
@@ -29,21 +29,22 @@ import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
 
 public final class AggregateCombiner extends MapReduceBase implements
-    Reducer<LongWritable,VectorWritable,LongWritable,VectorWritable> {
+    Reducer<VLongWritable,VectorWritable,VLongWritable,VectorWritable> {
 
   @Override
-  public void reduce(LongWritable key,
+  public void reduce(VLongWritable key,
                      Iterator<VectorWritable> values,
-                     OutputCollector<LongWritable, VectorWritable> output,
+                     OutputCollector<VLongWritable,VectorWritable> output,
                      Reporter reporter) throws IOException {
-    if (!values.hasNext()) {
-      return;
+    if (values.hasNext()) {
+      Vector partial = values.next().get();
+      while (values.hasNext()) {
+        partial = partial.plus(values.next().get());
+      }
+      VectorWritable vw = new VectorWritable(partial);
+      vw.setWritesLaxPrecision(true);
+      output.collect(key, vw);
     }
-    Vector partial = values.next().get();
-    while (values.hasNext()) {
-      partial = partial.plus(values.next().get());
-    }
-    output.collect(key, new VectorWritable(partial));
   }
 
 }
\ No newline at end of file

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexMapper.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexMapper.java Sun May  9 08:44:29 2010
@@ -23,25 +23,26 @@ import java.util.regex.Pattern;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.VLongWritable;
 import org.apache.hadoop.mapred.MapReduceBase;
 import org.apache.hadoop.mapred.Mapper;
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.Reporter;
 
 public final class ItemIDIndexMapper extends MapReduceBase implements
-    Mapper<LongWritable,Text,IntWritable,LongWritable> {
+    Mapper<LongWritable,Text,IntWritable,VLongWritable> {
   
   private static final Pattern COMMA = Pattern.compile(",");
   
   @Override
   public void map(LongWritable key,
                   Text value,
-                  OutputCollector<IntWritable,LongWritable> output,
+                  OutputCollector<IntWritable,VLongWritable> output,
                   Reporter reporter) throws IOException {
     String[] tokens = ItemIDIndexMapper.COMMA.split(value.toString());
     long itemID = Long.parseLong(tokens[1]);
     int index = idToIndex(itemID);
-    output.collect(new IntWritable(index), new LongWritable(itemID));
+    output.collect(new IntWritable(index), new VLongWritable(itemID));
   }
   
   static int idToIndex(long itemID) {

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexReducer.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexReducer.java Sun May  9 08:44:29 2010
@@ -21,19 +21,19 @@ import java.io.IOException;
 import java.util.Iterator;
 
 import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.VLongWritable;
 import org.apache.hadoop.mapred.MapReduceBase;
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.Reducer;
 import org.apache.hadoop.mapred.Reporter;
 
 public final class ItemIDIndexReducer extends MapReduceBase implements
-    Reducer<IntWritable,LongWritable,IntWritable,LongWritable> {
+    Reducer<IntWritable,VLongWritable,IntWritable,VLongWritable> {
   
   @Override
   public void reduce(IntWritable index,
-                     Iterator<LongWritable> possibleItemIDs,
-                     OutputCollector<IntWritable,LongWritable> output,
+                     Iterator<VLongWritable> possibleItemIDs,
+                     OutputCollector<IntWritable,VLongWritable> output,
                      Reporter reporter) throws IOException {
     if (possibleItemIDs.hasNext()) {
       long minimumItemID = Long.MAX_VALUE;
@@ -43,7 +43,7 @@ public final class ItemIDIndexReducer ex
           minimumItemID = itemID;
         }
       }
-      output.collect(index, new LongWritable(minimumItemID));
+      output.collect(index, new VLongWritable(minimumItemID));
     }
   }
   

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/PartialMultiplyReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/PartialMultiplyReducer.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/PartialMultiplyReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/PartialMultiplyReducer.java Sun May  9 08:44:29 2010
@@ -21,7 +21,7 @@ import java.io.IOException;
 import java.util.Iterator;
 
 import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.VLongWritable;
 import org.apache.hadoop.mapred.MapReduceBase;
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.Reducer;
@@ -32,20 +32,21 @@ import org.apache.mahout.math.function.L
 import org.apache.mahout.math.map.OpenLongFloatHashMap;
 
 public final class PartialMultiplyReducer extends MapReduceBase implements
-    Reducer<IntWritable,VectorOrPrefWritable,LongWritable,VectorWritable> {
+    Reducer<IntWritable,VectorOrPrefWritable,VLongWritable,VectorWritable> {
 
   @Override
   public void reduce(IntWritable key,
                      Iterator<VectorOrPrefWritable> values,
-                     final OutputCollector<LongWritable,VectorWritable> output,
+                     final OutputCollector<VLongWritable,VectorWritable> output,
                      Reporter reporter) throws IOException {
 
     OpenLongFloatHashMap savedValues = new OpenLongFloatHashMap();
-    Vector cooccurrenceColumn = null;
     final int itemIndex = key.get();
-    final LongWritable userIDWritable = new LongWritable();
+    final VLongWritable userIDWritable = new VLongWritable();
     final VectorWritable vectorWritable = new VectorWritable();
+    vectorWritable.setWritesLaxPrecision(true);
 
+    Vector cooccurrenceColumn = null;
     while (values.hasNext()) {
 
       VectorOrPrefWritable value = values.next();
@@ -60,7 +61,8 @@ public final class PartialMultiplyReduce
           savedValues.put(userID, preferenceValue);
         } else {
           // Have seen it
-          Vector partialProduct = cooccurrenceColumn.times(preferenceValue);
+          Vector partialProduct = preferenceValue == 1.0f ?
+              cooccurrenceColumn : cooccurrenceColumn.times(preferenceValue);
           // This makes sure this item isn't recommended for this user:
           partialProduct.set(itemIndex, Double.NEGATIVE_INFINITY);
           userIDWritable.set(userID);

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java Sun May  9 08:44:29 2010
@@ -28,7 +28,7 @@ import org.apache.hadoop.conf.Configurat
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.VLongWritable;
 import org.apache.hadoop.mapred.JobClient;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.Partitioner;
@@ -102,8 +102,8 @@ public final class RecommenderJob extend
 
     JobConf itemIDIndexConf = prepareJobConf(
       inputPath, itemIDIndexPath, TextInputFormat.class,
-      ItemIDIndexMapper.class, IntWritable.class, LongWritable.class,
-      ItemIDIndexReducer.class, IntWritable.class, LongWritable.class,
+      ItemIDIndexMapper.class, IntWritable.class, VLongWritable.class,
+      ItemIDIndexReducer.class, IntWritable.class, VLongWritable.class,
       SequenceFileOutputFormat.class);
     itemIDIndexConf.setClass("mapred.combiner.class", ItemIDIndexReducer.class, Reducer.class);    
     if (shouldRunNextPhase(parsedArgs, currentPhase)) {
@@ -112,8 +112,8 @@ public final class RecommenderJob extend
     
     JobConf toUserVectorConf = prepareJobConf(
       inputPath, userVectorPath, TextInputFormat.class,
-      ToItemPrefsMapper.class, LongWritable.class, booleanData ? LongWritable.class : EntityPrefWritable.class,
-      ToUserVectorReducer.class, LongWritable.class, VectorWritable.class,
+      ToItemPrefsMapper.class, VLongWritable.class, booleanData ? VLongWritable.class : EntityPrefWritable.class,
+      ToUserVectorReducer.class, VLongWritable.class, VectorWritable.class,
       SequenceFileOutputFormat.class);
     toUserVectorConf.setBoolean(BOOLEAN_DATA, booleanData);
     if (shouldRunNextPhase(parsedArgs, currentPhase)) {
@@ -135,7 +135,7 @@ public final class RecommenderJob extend
     JobConf partialMultiplyConf = prepareJobConf(
       cooccurrencePath, parialMultiplyPath, SequenceFileInputFormat.class,
       CooccurrenceColumnWrapperMapper.class, IntWritable.class, VectorOrPrefWritable.class,
-      PartialMultiplyReducer.class, LongWritable.class, VectorWritable.class,
+      PartialMultiplyReducer.class, VLongWritable.class, VectorWritable.class,
       SequenceFileOutputFormat.class);
     MultipleInputs.addInputPath(
         partialMultiplyConf,
@@ -154,8 +154,8 @@ public final class RecommenderJob extend
 
     JobConf aggregateAndRecommendConf = prepareJobConf(
         parialMultiplyPath, outputPath, SequenceFileInputFormat.class,
-        IdentityMapper.class, LongWritable.class, VectorWritable.class,
-        AggregateAndRecommendReducer.class, LongWritable.class, RecommendedItemsWritable.class,
+        IdentityMapper.class, VLongWritable.class, VectorWritable.class,
+        AggregateAndRecommendReducer.class, VLongWritable.class, RecommendedItemsWritable.class,
         TextOutputFormat.class);
     setIOSort(aggregateAndRecommendConf);
     aggregateAndRecommendConf.setClass("mapred.combiner.class", AggregateCombiner.class, Reducer.class);
@@ -170,19 +170,22 @@ public final class RecommenderJob extend
 
   private static void setIOSort(JobConf conf) {
     conf.setInt("io.sort.factor", 100);
-    conf.setInt("io.sort.mb", 200);
+    int assumedHeapSize = 512;
     String javaOpts = conf.get("mapred.child.java.opts");
     if (javaOpts != null) {
       Matcher m = Pattern.compile("-Xmx([0-9]+)([mMgG])").matcher(javaOpts);
       if (m.find()) {
-        int heapMB = Integer.parseInt(m.group(1));
+        assumedHeapSize = Integer.parseInt(m.group(1));
         String megabyteOrGigabyte = m.group(2);
         if ("g".equalsIgnoreCase(megabyteOrGigabyte)) {
-          heapMB *= 1024;
+          assumedHeapSize *= 1024;
         }
-        conf.setInt("io.sort.mb", heapMB / 2);
       }
     }
+    conf.setInt("io.sort.mb", assumedHeapSize / 2);
+    // For some reason the Merger doesn't report status for a long time; increase
+    // timeout when running these jobs
+    conf.setInt("mapred.task.timeout", 60*60*1000);
   }
   
   public static void main(String[] args) throws Exception {

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java Sun May  9 08:44:29 2010
@@ -20,7 +20,7 @@ package org.apache.mahout.cf.taste.hadoo
 import java.io.IOException;
 import java.util.Iterator;
 
-import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.VLongWritable;
 import org.apache.hadoop.mapred.MapReduceBase;
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.Reducer;
@@ -34,7 +34,7 @@ import org.apache.mahout.math.VectorWrit
  * <h1>Input</h1>
  * 
  * <p>
- * Takes user IDs as {@link LongWritable} mapped to all associated item IDs and preference values, as
+ * Takes user IDs as {@link VLongWritable} mapped to all associated item IDs and preference values, as
  * {@link EntityPrefWritable}s.
  * </p>
  * 
@@ -48,19 +48,19 @@ import org.apache.mahout.math.VectorWrit
  * </p>
  */
 public final class ToUserVectorReducer extends MapReduceBase implements
-    Reducer<LongWritable,LongWritable,LongWritable, VectorWritable> {
+    Reducer<VLongWritable,VLongWritable,VLongWritable,VectorWritable> {
   
   @Override
-  public void reduce(LongWritable userID,
-                     Iterator<LongWritable> itemPrefs,
-                     OutputCollector<LongWritable,VectorWritable> output,
+  public void reduce(VLongWritable userID,
+                     Iterator<VLongWritable> itemPrefs,
+                     OutputCollector<VLongWritable,VectorWritable> output,
                      Reporter reporter) throws IOException {
     if (!itemPrefs.hasNext()) {
       return;
     }
     Vector userVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
     while (itemPrefs.hasNext()) {
-      LongWritable itemPref = itemPrefs.next();
+      VLongWritable itemPref = itemPrefs.next();
       int index = ItemIDIndexMapper.idToIndex(itemPref.get());
       float value;
       if (itemPref instanceof EntityPrefWritable) {
@@ -71,7 +71,9 @@ public final class ToUserVectorReducer e
       userVector.set(index, value);
     }
 
-    output.collect(userID, new VectorWritable(userVector));
+    VectorWritable vw = new VectorWritable(userVector);
+    vw.setWritesLaxPrecision(true);
+    output.collect(userID, vw);
   }
   
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorSplitterMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorSplitterMapper.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorSplitterMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorSplitterMapper.java Sun May  9 08:44:29 2010
@@ -24,7 +24,7 @@ import org.apache.hadoop.fs.FSDataInputS
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.VLongWritable;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.MapReduceBase;
 import org.apache.hadoop.mapred.Mapper;
@@ -36,7 +36,7 @@ import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
 
 public final class UserVectorSplitterMapper extends MapReduceBase implements
-    Mapper<LongWritable, VectorWritable,IntWritable,VectorOrPrefWritable> {
+    Mapper<VLongWritable,VectorWritable,IntWritable,VectorOrPrefWritable> {
 
   static final String USERS_FILE = "usersFile";
   
@@ -63,7 +63,7 @@ public final class UserVectorSplitterMap
   }
 
   @Override
-  public void map(LongWritable key,
+  public void map(VLongWritable key,
                   VectorWritable value,
                   OutputCollector<IntWritable,VectorOrPrefWritable> output,
                   Reporter reporter) throws IOException {

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceMapper.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceMapper.java Sun May  9 08:44:29 2010
@@ -22,7 +22,7 @@ import java.util.Arrays;
 import java.util.Iterator;
 
 import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.VLongWritable;
 import org.apache.hadoop.mapred.MapReduceBase;
 import org.apache.hadoop.mapred.Mapper;
 import org.apache.hadoop.mapred.OutputCollector;
@@ -33,7 +33,7 @@ import org.apache.mahout.math.list.IntAr
 import org.apache.mahout.math.map.OpenIntIntHashMap;
 
 public final class UserVectorToCooccurrenceMapper extends MapReduceBase implements
-    Mapper<LongWritable, VectorWritable,IndexIndexWritable,IntWritable> {
+    Mapper<VLongWritable,VectorWritable,IndexIndexWritable,IntWritable> {
 
   private static final int MAX_PREFS_CONSIDERED = 50;
 
@@ -41,7 +41,7 @@ public final class UserVectorToCooccurre
   private final OpenIntIntHashMap indexCounts = new OpenIntIntHashMap();
 
   @Override
-  public void map(LongWritable userID,
+  public void map(VLongWritable userID,
                   VectorWritable userVectorWritable,
                   OutputCollector<IndexIndexWritable,IntWritable> output,
                   Reporter reporter) throws IOException {

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceReducer.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceReducer.java Sun May  9 08:44:29 2010
@@ -65,7 +65,9 @@ public final class UserVectorToCooccurre
       }
     } else {
       if (cooccurrenceRow != null) {
-        output.collect(new IntWritable(lastItem1ID), new VectorWritable(cooccurrenceRow));
+        VectorWritable vw = new VectorWritable(cooccurrenceRow);
+        vw.setWritesLaxPrecision(true);
+        output.collect(new IntWritable(lastItem1ID), vw);
       }
       lastItem1ID = item1ID;
       lastItem2ID = item2ID;

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/VectorOrPrefWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/VectorOrPrefWritable.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/VectorOrPrefWritable.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/VectorOrPrefWritable.java Sun May  9 08:44:29 2010
@@ -23,6 +23,7 @@ import java.io.DataOutput;
 import java.io.IOException;
 
 import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableUtils;
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
 
@@ -72,11 +73,13 @@ public final class VectorOrPrefWritable 
   public void write(DataOutput out) throws IOException {
     if (vector == null) {
       out.writeBoolean(false);
-      out.writeLong(userID);
+      WritableUtils.writeVLong(out, userID);
       out.writeFloat(value);
     } else {
       out.writeBoolean(true);
-      new VectorWritable(vector).write(out);
+      VectorWritable vw = new VectorWritable(vector);
+      vw.setWritesLaxPrecision(true);
+      vw.write(out);
     }
   }
 
@@ -88,7 +91,7 @@ public final class VectorOrPrefWritable 
       writable.readFields(in);
       set(writable.get());
     } else {
-      long theUserID = in.readLong();
+      long theUserID = WritableUtils.readVLong(in);
       float theValue = in.readFloat();
       set(theUserID, theValue);
     }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java Sun May  9 08:44:29 2010
@@ -22,8 +22,8 @@ import java.util.Map;
 
 import org.apache.commons.cli2.Option;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.VLongWritable;
 import org.apache.hadoop.io.compress.CompressionCodec;
 import org.apache.hadoop.io.compress.GzipCodec;
 import org.apache.hadoop.mapred.JobClient;
@@ -130,8 +130,8 @@ public final class RecommenderJob extend
     int recommendationsPerUser = Integer.parseInt(parsedArgs.get("--numRecommendations"));
     
     JobConf jobConf = prepareJobConf(usersFile, outputPath, TextInputFormat.class,
-      UserIDsMapper.class, LongWritable.class, NullWritable.class, RecommenderReducer.class,
-      LongWritable.class, RecommendedItemsWritable.class, TextOutputFormat.class);
+      UserIDsMapper.class, VLongWritable.class, NullWritable.class, RecommenderReducer.class,
+      VLongWritable.class, RecommendedItemsWritable.class, TextOutputFormat.class);
     
     jobConf.set(RecommenderReducer.RECOMMENDER_CLASS_NAME, recommendClassName);
     jobConf.setInt(RecommenderReducer.RECOMMENDATIONS_PER_USER, recommendationsPerUser);

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderReducer.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderReducer.java Sun May  9 08:44:29 2010
@@ -26,8 +26,8 @@ import java.util.List;
 
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.VLongWritable;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.MapReduceBase;
 import org.apache.hadoop.mapred.OutputCollector;
@@ -50,7 +50,7 @@ import org.apache.mahout.cf.taste.recomm
  * @see RecommenderJob
  */
 public final class RecommenderReducer extends MapReduceBase implements
-    Reducer<LongWritable,NullWritable,LongWritable,RecommendedItemsWritable> {
+    Reducer<VLongWritable,NullWritable,VLongWritable,RecommendedItemsWritable> {
   
   static final String RECOMMENDER_CLASS_NAME = "recommenderClassName";
   static final String RECOMMENDATIONS_PER_USER = "recommendationsPerUser";
@@ -94,9 +94,9 @@ public final class RecommenderReducer ex
   }
   
   @Override
-  public void reduce(LongWritable key,
+  public void reduce(VLongWritable key,
                      Iterator<NullWritable> values,
-                     OutputCollector<LongWritable,RecommendedItemsWritable> output,
+                     OutputCollector<VLongWritable,RecommendedItemsWritable> output,
                      Reporter reporter) throws IOException {
     long userID = key.get();
     List<RecommendedItem> recommendedItems;
@@ -112,7 +112,7 @@ public final class RecommenderReducer ex
       }
     }
     RecommendedItemsWritable writable = new RecommendedItemsWritable(recommendedItems);
-    output.collect(new LongWritable(userID), writable);
+    output.collect(key, writable);
     reporter.getCounter(ReducerMetrics.USERS_PROCESSED).increment(1L);
     reporter.getCounter(ReducerMetrics.RECOMMENDATIONS_MADE).increment(recommendedItems.size());
   }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/UserIDsMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/UserIDsMapper.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/UserIDsMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/UserIDsMapper.java Sun May  9 08:44:29 2010
@@ -22,6 +22,7 @@ import java.io.IOException;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.VLongWritable;
 import org.apache.hadoop.mapred.MapReduceBase;
 import org.apache.hadoop.mapred.Mapper;
 import org.apache.hadoop.mapred.OutputCollector;
@@ -31,17 +32,17 @@ import org.apache.hadoop.mapred.Reporter
  * Extracts and emits all user IDs from the users file, or input file.
  */
 public final class UserIDsMapper extends MapReduceBase implements
-    Mapper<LongWritable,Text,LongWritable,NullWritable> {
+    Mapper<LongWritable,Text,VLongWritable,NullWritable> {
   
   @Override
   public void map(LongWritable key,
                   Text value,
-                  OutputCollector<LongWritable,NullWritable> output,
+                  OutputCollector<VLongWritable,NullWritable> output,
                   Reporter reporter) throws IOException {
     String line = value.toString();
     int comma = line.indexOf(',');
     long userID = comma >= 0 ? Long.parseLong(line.substring(0, comma)) : Long.parseLong(line);
-    output.collect(new LongWritable(userID), NullWritable.get());
+    output.collect(new VLongWritable(userID), NullWritable.get());
   }
   
 }
\ No newline at end of file

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CopreferredItemsMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CopreferredItemsMapper.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CopreferredItemsMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CopreferredItemsMapper.java Sun May  9 08:44:29 2010
@@ -19,7 +19,7 @@ package org.apache.mahout.cf.taste.hadoo
 
 import java.io.IOException;
 
-import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.VLongWritable;
 import org.apache.hadoop.mapred.MapReduceBase;
 import org.apache.hadoop.mapred.Mapper;
 import org.apache.hadoop.mapred.OutputCollector;
@@ -31,10 +31,10 @@ import org.apache.mahout.cf.taste.hadoop
  * of the associated item vectors
  */
 public final class CopreferredItemsMapper extends MapReduceBase
-    implements Mapper<LongWritable,ItemPrefWithItemVectorWeightArrayWritable,ItemPairWritable,CoRating> {
+    implements Mapper<VLongWritable,ItemPrefWithItemVectorWeightArrayWritable,ItemPairWritable,CoRating> {
 
   @Override
-  public void map(LongWritable user,
+  public void map(VLongWritable user,
                   ItemPrefWithItemVectorWeightArrayWritable itemPrefsArray,
                   OutputCollector<ItemPairWritable, CoRating> output,
                   Reporter reporter)

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java Sun May  9 08:44:29 2010
@@ -23,7 +23,7 @@ import java.util.Map;
 import org.apache.commons.cli2.Option;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.DoubleWritable;
-import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.VLongWritable;
 import org.apache.hadoop.mapred.JobClient;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.SequenceFileInputFormat;
@@ -129,10 +129,10 @@ public final class ItemSimilarityJob ext
                                          itemVectorsPath,
                                          TextInputFormat.class,
                                          ToUserPrefsMapper.class,
-                                         LongWritable.class,
+                                         VLongWritable.class,
                                          EntityPrefWritable.class,
                                          ToItemVectorReducer.class,
-                                         LongWritable.class,
+                                         VLongWritable.class,
                                          EntityPrefWritableArrayWritable.class,
                                          SequenceFileOutputFormat.class);
     JobClient.runJob(itemVectors);
@@ -141,10 +141,10 @@ public final class ItemSimilarityJob ext
                                          userVectorsPath,
                                          SequenceFileInputFormat.class,
                                          PreferredItemsPerUserMapper.class,
-                                         LongWritable.class,
+                                         VLongWritable.class,
                                          ItemPrefWithItemVectorWeightWritable.class,
                                          PreferredItemsPerUserReducer.class,
-                                         LongWritable.class,
+                                         VLongWritable.class,
                                          ItemPrefWithItemVectorWeightArrayWritable.class,
                                          SequenceFileOutputFormat.class);
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java Sun May  9 08:44:29 2010
@@ -21,7 +21,7 @@ import java.io.IOException;
 import java.util.Iterator;
 import java.util.NoSuchElementException;
 
-import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.VLongWritable;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.MapReduceBase;
 import org.apache.hadoop.mapred.Mapper;
@@ -36,7 +36,7 @@ import org.apache.mahout.cf.taste.hadoop
  * so we can create the user-vectors in the reducer
  */
 public final class PreferredItemsPerUserMapper extends MapReduceBase
-    implements Mapper<LongWritable,EntityPrefWritableArrayWritable,LongWritable,ItemPrefWithItemVectorWeightWritable> {
+    implements Mapper<VLongWritable,EntityPrefWritableArrayWritable,VLongWritable,ItemPrefWithItemVectorWeightWritable> {
 
   private DistributedSimilarity distributedSimilarity;
 
@@ -48,9 +48,9 @@ public final class PreferredItemsPerUser
   }
 
   @Override
-  public void map(LongWritable item,
+  public void map(VLongWritable item,
                   EntityPrefWritableArrayWritable userPrefsArray,
-                  OutputCollector<LongWritable,ItemPrefWithItemVectorWeightWritable> output,
+                  OutputCollector<VLongWritable,ItemPrefWithItemVectorWeightWritable> output,
                   Reporter reporter) throws IOException {
 
     EntityPrefWritable[] userPrefs = userPrefsArray.getPrefs();
@@ -58,7 +58,7 @@ public final class PreferredItemsPerUser
     double weight = distributedSimilarity.weightOfItemVector(new UserPrefsIterator(userPrefs));
 
     for (EntityPrefWritable userPref : userPrefs) {
-      output.collect(new LongWritable(userPref.getID()),
+      output.collect(new VLongWritable(userPref.getID()),
           new ItemPrefWithItemVectorWeightWritable(item.get(), weight, userPref.getPrefValue()));
     }
   }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java Sun May  9 08:44:29 2010
@@ -22,19 +22,19 @@ import java.util.HashSet;
 import java.util.Iterator;
 import java.util.Set;
 
-import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.VLongWritable;
 import org.apache.hadoop.mapred.MapReduceBase;
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.Reducer;
 import org.apache.hadoop.mapred.Reporter;
 
 public final class PreferredItemsPerUserReducer extends MapReduceBase
-    implements Reducer<LongWritable,ItemPrefWithItemVectorWeightWritable, LongWritable,ItemPrefWithItemVectorWeightArrayWritable> {
+    implements Reducer<VLongWritable,ItemPrefWithItemVectorWeightWritable,VLongWritable,ItemPrefWithItemVectorWeightArrayWritable> {
 
   @Override
-  public void reduce(LongWritable user,
+  public void reduce(VLongWritable user,
                      Iterator<ItemPrefWithItemVectorWeightWritable> itemPrefs,
-                     OutputCollector<LongWritable,ItemPrefWithItemVectorWeightArrayWritable> output,
+                     OutputCollector<VLongWritable,ItemPrefWithItemVectorWeightArrayWritable> output,
                      Reporter reporter)
       throws IOException {
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java Sun May  9 08:44:29 2010
@@ -22,7 +22,7 @@ import java.util.HashSet;
 import java.util.Iterator;
 import java.util.Set;
 
-import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.VLongWritable;
 import org.apache.hadoop.mapred.MapReduceBase;
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.Reducer;
@@ -36,12 +36,12 @@ import org.apache.mahout.cf.taste.hadoop
  */
 public final class ToItemVectorReducer
     extends MapReduceBase implements
-    Reducer<LongWritable,EntityPrefWritable,LongWritable,EntityPrefWritableArrayWritable> {
+    Reducer<VLongWritable,EntityPrefWritable,VLongWritable,EntityPrefWritableArrayWritable> {
 
   @Override
-  public void reduce(LongWritable item,
+  public void reduce(VLongWritable item,
                      Iterator<EntityPrefWritable> userPrefs,
-                     OutputCollector<LongWritable,EntityPrefWritableArrayWritable> output,
+                     OutputCollector<VLongWritable,EntityPrefWritableArrayWritable> output,
                      Reporter reporter)
       throws IOException {
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java Sun May  9 08:44:29 2010
@@ -22,7 +22,7 @@ import java.util.Map;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.VLongWritable;
 import org.apache.hadoop.io.compress.CompressionCodec;
 import org.apache.hadoop.io.compress.GzipCodec;
 import org.apache.hadoop.mapred.JobClient;
@@ -54,7 +54,7 @@ public final class SlopeOneAverageDiffsJ
     String averagesOutputPath = parsedArgs.get("--tempDir");
     
     JobConf prefsToDiffsJobConf = prepareJobConf(prefsFile, averagesOutputPath,
-      TextInputFormat.class, ToItemPrefsMapper.class, LongWritable.class, EntityPrefWritable.class,
+      TextInputFormat.class, ToItemPrefsMapper.class, VLongWritable.class, EntityPrefWritable.class,
       SlopeOnePrefsToDiffsReducer.class, EntityEntityWritable.class, FloatWritable.class,
       SequenceFileOutputFormat.class);
     JobClient.runJob(prefsToDiffsJobConf);

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOnePrefsToDiffsReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOnePrefsToDiffsReducer.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOnePrefsToDiffsReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOnePrefsToDiffsReducer.java Sun May  9 08:44:29 2010
@@ -24,7 +24,7 @@ import java.util.Iterator;
 import java.util.List;
 
 import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.VLongWritable;
 import org.apache.hadoop.mapred.MapReduceBase;
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.Reducer;
@@ -33,10 +33,10 @@ import org.apache.mahout.cf.taste.hadoop
 import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable;
 
 public final class SlopeOnePrefsToDiffsReducer extends MapReduceBase implements
-    Reducer<LongWritable, EntityPrefWritable, EntityEntityWritable,FloatWritable> {
+    Reducer<VLongWritable,EntityPrefWritable,EntityEntityWritable,FloatWritable> {
   
   @Override
-  public void reduce(LongWritable key,
+  public void reduce(VLongWritable key,
                      Iterator<EntityPrefWritable> values,
                      OutputCollector<EntityEntityWritable,FloatWritable> output,
                      Reporter reporter) throws IOException {

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java Sun May  9 08:44:29 2010
@@ -18,8 +18,8 @@
 package org.apache.mahout.common;
 
 import java.io.IOException;
-import java.util.HashMap;
 import java.util.Map;
+import java.util.TreeMap;
 import java.util.concurrent.atomic.AtomicInteger;
 
 import org.apache.commons.cli2.Argument;
@@ -136,10 +136,11 @@ public abstract class AbstractJob extend
       return null;
     }
     
-    Map<String,String> result = new HashMap<String,String>();
-    maybePut(result, cmdLine, tempDirOpt, helpOpt);
+    Map<String,String> result = new TreeMap<String,String>();
+    maybePut(result, cmdLine, tempDirOpt, helpOpt, startPhase, endPhase);
     maybePut(result, cmdLine, extraOpts);
-    
+
+    log.info("Command line arguments: {}", result);
     return result;
   }
   
@@ -156,8 +157,13 @@ public abstract class AbstractJob extend
     int phase = currentPhase.getAndIncrement();
     String startPhase = args.get("--startPhase");
     String endPhase = args.get("--endPhase");
-    return !((startPhase != null && phase < Integer.parseInt(startPhase)) ||
-             (endPhase != null && phase > Integer.parseInt(endPhase)));
+    boolean phaseSkipped =
+        (startPhase != null && phase < Integer.parseInt(startPhase)) ||
+        (endPhase != null && phase > Integer.parseInt(endPhase));
+    if (phaseSkipped) {
+      log.info("Skipping phase {}", phase);
+    }
+    return !phaseSkipped;
   }
   
   protected JobConf prepareJobConf(String inputPath,

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java Sun May  9 08:44:29 2010
@@ -32,6 +32,7 @@ import org.apache.hadoop.conf.Configurat
 import org.apache.hadoop.io.DoubleWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.VLongWritable;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.mahout.cf.taste.hadoop.EntityEntityWritable;
@@ -52,9 +53,9 @@ import org.easymock.classextension.EasyM
 public final class ItemSimilarityTest extends MahoutTestCase {
 
   public void testUserPrefsPerItemMapper() throws Exception {
-    OutputCollector<LongWritable,LongWritable> output =
+    OutputCollector<VLongWritable,VLongWritable> output =
         EasyMock.createMock(OutputCollector.class);
-    output.collect(new LongWritable(34L), new EntityPrefWritable(12L, 2.3f));
+    output.collect(new VLongWritable(34L), new EntityPrefWritable(12L, 2.3f));
     EasyMock.replay(output);
 
     new ToUserPrefsMapper().map(new LongWritable(), new Text("12,34,2.3"), output, null);
@@ -67,14 +68,14 @@ public final class ItemSimilarityTest ex
     List<EntityPrefWritable> userPrefs = Arrays.asList(
         new EntityPrefWritable(34L, 1.0f), new EntityPrefWritable(56L, 2.0f));
 
-    OutputCollector<LongWritable,EntityPrefWritableArrayWritable> output =
+    OutputCollector<VLongWritable,EntityPrefWritableArrayWritable> output =
         EasyMock.createMock(OutputCollector.class);
 
-    output.collect(EasyMock.eq(new LongWritable(12L)), equalToUserPrefs(userPrefs));
+    output.collect(EasyMock.eq(new VLongWritable(12L)), equalToUserPrefs(userPrefs));
 
     EasyMock.replay(output);
 
-    new ToItemVectorReducer().reduce(new LongWritable(12L), userPrefs.iterator(), output, null);
+    new ToItemVectorReducer().reduce(new VLongWritable(12L), userPrefs.iterator(), output, null);
 
     EasyMock.verify(output);
   }
@@ -112,7 +113,7 @@ public final class ItemSimilarityTest ex
   }
 
   public void testPreferredItemsPerUserMapper() throws Exception {
-    OutputCollector<LongWritable,ItemPrefWithItemVectorWeightWritable> output =
+    OutputCollector<VLongWritable,ItemPrefWithItemVectorWeightWritable> output =
         EasyMock.createMock(OutputCollector.class);
     EntityPrefWritableArrayWritable userPrefs = new EntityPrefWritableArrayWritable(
         new EntityPrefWritable[] {
@@ -122,8 +123,8 @@ public final class ItemSimilarityTest ex
     double weight =
       new DistributedUncenteredZeroAssumingCosineSimilarity().weightOfItemVector(Arrays.asList(2.0f, 3.0f).iterator());
 
-    output.collect(new LongWritable(12L), new ItemPrefWithItemVectorWeightWritable(34L, weight, 2.0f));
-    output.collect(new LongWritable(56L), new ItemPrefWithItemVectorWeightWritable(34L, weight, 3.0f));
+    output.collect(new VLongWritable(12L), new ItemPrefWithItemVectorWeightWritable(34L, weight, 2.0f));
+    output.collect(new VLongWritable(56L), new ItemPrefWithItemVectorWeightWritable(34L, weight, 3.0f));
 
     JobConf conf = new JobConf();
     conf.set(ItemSimilarityJob.DISTRIBUTED_SIMILARITY_CLASSNAME,
@@ -133,7 +134,7 @@ public final class ItemSimilarityTest ex
 
     PreferredItemsPerUserMapper mapper = new PreferredItemsPerUserMapper();
     mapper.configure(conf);
-    mapper.map(new LongWritable(34L), userPrefs, output, null);
+    mapper.map(new VLongWritable(34L), userPrefs, output, null);
 
     EasyMock.verify(output);
   }
@@ -144,15 +145,15 @@ public final class ItemSimilarityTest ex
         Arrays.asList(new ItemPrefWithItemVectorWeightWritable(34L, 5.0, 1.0f),
                       new ItemPrefWithItemVectorWeightWritable(56L, 7.0, 2.0f));
 
-    OutputCollector<LongWritable,ItemPrefWithItemVectorWeightArrayWritable> output =
+    OutputCollector<VLongWritable,ItemPrefWithItemVectorWeightArrayWritable> output =
         EasyMock.createMock(OutputCollector.class);
 
-    output.collect(EasyMock.eq(new LongWritable(12L)), equalToItemPrefs(itemPrefs));
+    output.collect(EasyMock.eq(new VLongWritable(12L)), equalToItemPrefs(itemPrefs));
 
     EasyMock.replay(output);
 
     new PreferredItemsPerUserReducer().reduce(
-        new LongWritable(12L), itemPrefs.iterator(), output, null);
+        new VLongWritable(12L), itemPrefs.iterator(), output, null);
 
     EasyMock.verify(output);
   }
@@ -204,7 +205,7 @@ public final class ItemSimilarityTest ex
 
     EasyMock.replay(output, itemPrefs);
 
-    new CopreferredItemsMapper().map(new LongWritable(), itemPrefs, output, null);
+    new CopreferredItemsMapper().map(new VLongWritable(), itemPrefs, output, null);
 
     EasyMock.verify(output, itemPrefs);
   }