You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2010/05/09 10:44:30 UTC
svn commit: r942513 - in /lucene/mahout/trunk/core/src:
main/java/org/apache/mahout/cf/taste/hadoop/
main/java/org/apache/mahout/cf/taste/hadoop/item/
main/java/org/apache/mahout/cf/taste/hadoop/pseudo/
main/java/org/apache/mahout/cf/taste/hadoop/simil...
Author: srowen
Date: Sun May 9 08:44:29 2010
New Revision: 942513
URL: http://svn.apache.org/viewvc?rev=942513&view=rev
Log:
More possible improvements -- using VLongWritable and adjusting combiner settings, fixed 'phase' arguments
Modified:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityCountWritable.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityEntityWritable.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityPrefWritable.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToEntityPrefsMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateCombiner.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexReducer.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/PartialMultiplyReducer.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorSplitterMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceReducer.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/VectorOrPrefWritable.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderReducer.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/UserIDsMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CopreferredItemsMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOnePrefsToDiffsReducer.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityCountWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityCountWritable.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityCountWritable.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityCountWritable.java Sun May 9 08:44:29 2010
@@ -21,12 +21,12 @@ import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
-import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.VLongWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableUtils;
/** A {@link Writable} encapsulating an item ID and a count . */
-public final class EntityCountWritable extends LongWritable implements Cloneable {
+public final class EntityCountWritable extends VLongWritable implements Cloneable {
private int count;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityEntityWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityEntityWritable.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityEntityWritable.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityEntityWritable.java Sun May 9 08:44:29 2010
@@ -22,6 +22,7 @@ import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.io.WritableUtils;
import org.apache.mahout.common.RandomUtils;
/** A {@link WritableComparable} encapsulating two items. */
@@ -55,14 +56,14 @@ public final class EntityEntityWritable
@Override
public void write(DataOutput out) throws IOException {
- out.writeLong(aID);
- out.writeLong(bID);
+ WritableUtils.writeVLong(out, aID);
+ WritableUtils.writeVLong(out, bID);
}
@Override
public void readFields(DataInput in) throws IOException {
- aID = in.readLong();
- bID = in.readLong();
+ aID = WritableUtils.readVLong(in);
+ bID = WritableUtils.readVLong(in);
}
@Override
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityPrefWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityPrefWritable.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityPrefWritable.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityPrefWritable.java Sun May 9 08:44:29 2010
@@ -21,12 +21,12 @@ import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
-import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.VLongWritable;
import org.apache.hadoop.io.Writable;
import org.apache.mahout.common.RandomUtils;
/** A {@link Writable} encapsulating an item ID and a preference value. */
-public final class EntityPrefWritable extends LongWritable implements Cloneable {
+public final class EntityPrefWritable extends VLongWritable implements Cloneable {
private float prefValue;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java Sun May 9 08:44:29 2010
@@ -24,6 +24,7 @@ import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableUtils;
import org.apache.mahout.cf.taste.impl.recommender.GenericRecommendedItem;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
@@ -56,7 +57,7 @@ public final class RecommendedItemsWrita
public void write(DataOutput out) throws IOException {
out.writeInt(recommended.size());
for (RecommendedItem item : recommended) {
- out.writeLong(item.getItemID());
+ WritableUtils.writeVLong(out, item.getItemID());
out.writeFloat(item.getValue());
}
@@ -67,7 +68,7 @@ public final class RecommendedItemsWrita
int size = in.readInt();
recommended = new ArrayList<RecommendedItem>(size);
for (int i = 0; i < size; i++) {
- long itemID = in.readLong();
+ long itemID = WritableUtils.readVLong(in);
float value = in.readFloat();
RecommendedItem recommendedItem = new GenericRecommendedItem(itemID, value);
recommended.add(recommendedItem);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToEntityPrefsMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToEntityPrefsMapper.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToEntityPrefsMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToEntityPrefsMapper.java Sun May 9 08:44:29 2010
@@ -19,6 +19,7 @@ package org.apache.mahout.cf.taste.hadoo
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.VLongWritable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
@@ -30,7 +31,7 @@ import java.io.IOException;
import java.util.regex.Pattern;
abstract class ToEntityPrefsMapper extends MapReduceBase implements
- Mapper<LongWritable,Text,LongWritable,LongWritable> {
+ Mapper<LongWritable,Text,VLongWritable,VLongWritable> {
static final String TRANSPOSE_USER_ITEM = "transposeUserItem";
@@ -53,7 +54,7 @@ abstract class ToEntityPrefsMapper exten
@Override
public void map(LongWritable key,
Text value,
- OutputCollector<LongWritable,LongWritable> output,
+ OutputCollector<VLongWritable,VLongWritable> output,
Reporter reporter) throws IOException {
String[] tokens = ToEntityPrefsMapper.DELIMITER.split(value.toString());
long userID = Long.parseLong(tokens[0]);
@@ -67,10 +68,10 @@ abstract class ToEntityPrefsMapper exten
itemID = temp;
}
if (booleanData) {
- output.collect(new LongWritable(userID), new LongWritable(itemID));
+ output.collect(new VLongWritable(userID), new VLongWritable(itemID));
} else {
float prefValue = tokens.length > 2 ? Float.parseFloat(tokens[2]) : 1.0f;
- output.collect(new LongWritable(userID), new EntityPrefWritable(itemID, prefValue));
+ output.collect(new VLongWritable(userID), new EntityPrefWritable(itemID, prefValue));
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java Sun May 9 08:44:29 2010
@@ -17,7 +17,6 @@
package org.apache.mahout.cf.taste.hadoop;
-import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
/**
@@ -25,7 +24,7 @@ import org.apache.hadoop.io.Text;
*
* <p>
* Intended for use with {@link org.apache.hadoop.mapred.TextInputFormat}; accepts line number / line pairs as
- * {@link LongWritable}/{@link Text} pairs.
+ * {@link org.apache.hadoop.io.VLongWritable}/{@link Text} pairs.
* </p>
*
* <p>
@@ -36,7 +35,7 @@ import org.apache.hadoop.io.Text;
* <h1>Output</h1>
*
* <p>
- * Outputs the user ID as a {@link LongWritable} mapped to the item ID and preference as a
+ * Outputs the user ID as a {@link org.apache.hadoop.io.VLongWritable} mapped to the item ID and preference as a
* {@link EntityPrefWritable}.
* </p>
*/
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java Sun May 9 08:44:29 2010
@@ -30,8 +30,8 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.VLongWritable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
@@ -46,7 +46,7 @@ import org.apache.mahout.math.Vector;
import org.apache.mahout.math.map.OpenIntLongHashMap;
public final class AggregateAndRecommendReducer extends MapReduceBase implements
- Reducer<LongWritable,VectorWritable,LongWritable,RecommendedItemsWritable> {
+ Reducer<VLongWritable,VectorWritable,VLongWritable,RecommendedItemsWritable> {
static final String ITEMID_INDEX_PATH = "itemIDIndexPath";
static final String RECOMMENDATIONS_PER_USER = "recommendationsPerUser";
@@ -69,7 +69,7 @@ public final class AggregateAndRecommend
Path itemIDIndexPath = new Path(jobConf.get(ITEMID_INDEX_PATH)).makeQualified(fs);
indexItemIDMap = new OpenIntLongHashMap();
IntWritable index = new IntWritable();
- LongWritable id = new LongWritable();
+ VLongWritable id = new VLongWritable();
for (FileStatus status : fs.listStatus(itemIDIndexPath, PARTS_FILTER)) {
String path = status.getPath().toString();
SequenceFile.Reader reader =
@@ -85,9 +85,9 @@ public final class AggregateAndRecommend
}
@Override
- public void reduce(LongWritable key,
+ public void reduce(VLongWritable key,
Iterator<VectorWritable> values,
- OutputCollector<LongWritable, RecommendedItemsWritable> output,
+ OutputCollector<VLongWritable,RecommendedItemsWritable> output,
Reporter reporter) throws IOException {
if (!values.hasNext()) {
return;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateCombiner.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateCombiner.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateCombiner.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateCombiner.java Sun May 9 08:44:29 2010
@@ -20,7 +20,7 @@ package org.apache.mahout.cf.taste.hadoo
import java.io.IOException;
import java.util.Iterator;
-import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.VLongWritable;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
@@ -29,21 +29,22 @@ import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
public final class AggregateCombiner extends MapReduceBase implements
- Reducer<LongWritable,VectorWritable,LongWritable,VectorWritable> {
+ Reducer<VLongWritable,VectorWritable,VLongWritable,VectorWritable> {
@Override
- public void reduce(LongWritable key,
+ public void reduce(VLongWritable key,
Iterator<VectorWritable> values,
- OutputCollector<LongWritable, VectorWritable> output,
+ OutputCollector<VLongWritable,VectorWritable> output,
Reporter reporter) throws IOException {
- if (!values.hasNext()) {
- return;
+ if (values.hasNext()) {
+ Vector partial = values.next().get();
+ while (values.hasNext()) {
+ partial = partial.plus(values.next().get());
+ }
+ VectorWritable vw = new VectorWritable(partial);
+ vw.setWritesLaxPrecision(true);
+ output.collect(key, vw);
}
- Vector partial = values.next().get();
- while (values.hasNext()) {
- partial = partial.plus(values.next().get());
- }
- output.collect(key, new VectorWritable(partial));
}
}
\ No newline at end of file
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexMapper.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexMapper.java Sun May 9 08:44:29 2010
@@ -23,25 +23,26 @@ import java.util.regex.Pattern;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.VLongWritable;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
public final class ItemIDIndexMapper extends MapReduceBase implements
- Mapper<LongWritable,Text,IntWritable,LongWritable> {
+ Mapper<LongWritable,Text,IntWritable,VLongWritable> {
private static final Pattern COMMA = Pattern.compile(",");
@Override
public void map(LongWritable key,
Text value,
- OutputCollector<IntWritable,LongWritable> output,
+ OutputCollector<IntWritable,VLongWritable> output,
Reporter reporter) throws IOException {
String[] tokens = ItemIDIndexMapper.COMMA.split(value.toString());
long itemID = Long.parseLong(tokens[1]);
int index = idToIndex(itemID);
- output.collect(new IntWritable(index), new LongWritable(itemID));
+ output.collect(new IntWritable(index), new VLongWritable(itemID));
}
static int idToIndex(long itemID) {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexReducer.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexReducer.java Sun May 9 08:44:29 2010
@@ -21,19 +21,19 @@ import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.VLongWritable;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
public final class ItemIDIndexReducer extends MapReduceBase implements
- Reducer<IntWritable,LongWritable,IntWritable,LongWritable> {
+ Reducer<IntWritable,VLongWritable,IntWritable,VLongWritable> {
@Override
public void reduce(IntWritable index,
- Iterator<LongWritable> possibleItemIDs,
- OutputCollector<IntWritable,LongWritable> output,
+ Iterator<VLongWritable> possibleItemIDs,
+ OutputCollector<IntWritable,VLongWritable> output,
Reporter reporter) throws IOException {
if (possibleItemIDs.hasNext()) {
long minimumItemID = Long.MAX_VALUE;
@@ -43,7 +43,7 @@ public final class ItemIDIndexReducer ex
minimumItemID = itemID;
}
}
- output.collect(index, new LongWritable(minimumItemID));
+ output.collect(index, new VLongWritable(minimumItemID));
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/PartialMultiplyReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/PartialMultiplyReducer.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/PartialMultiplyReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/PartialMultiplyReducer.java Sun May 9 08:44:29 2010
@@ -21,7 +21,7 @@ import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.VLongWritable;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
@@ -32,20 +32,21 @@ import org.apache.mahout.math.function.L
import org.apache.mahout.math.map.OpenLongFloatHashMap;
public final class PartialMultiplyReducer extends MapReduceBase implements
- Reducer<IntWritable,VectorOrPrefWritable,LongWritable,VectorWritable> {
+ Reducer<IntWritable,VectorOrPrefWritable,VLongWritable,VectorWritable> {
@Override
public void reduce(IntWritable key,
Iterator<VectorOrPrefWritable> values,
- final OutputCollector<LongWritable,VectorWritable> output,
+ final OutputCollector<VLongWritable,VectorWritable> output,
Reporter reporter) throws IOException {
OpenLongFloatHashMap savedValues = new OpenLongFloatHashMap();
- Vector cooccurrenceColumn = null;
final int itemIndex = key.get();
- final LongWritable userIDWritable = new LongWritable();
+ final VLongWritable userIDWritable = new VLongWritable();
final VectorWritable vectorWritable = new VectorWritable();
+ vectorWritable.setWritesLaxPrecision(true);
+ Vector cooccurrenceColumn = null;
while (values.hasNext()) {
VectorOrPrefWritable value = values.next();
@@ -60,7 +61,8 @@ public final class PartialMultiplyReduce
savedValues.put(userID, preferenceValue);
} else {
// Have seen it
- Vector partialProduct = cooccurrenceColumn.times(preferenceValue);
+ Vector partialProduct = preferenceValue == 1.0f ?
+ cooccurrenceColumn : cooccurrenceColumn.times(preferenceValue);
// This makes sure this item isn't recommended for this user:
partialProduct.set(itemIndex, Double.NEGATIVE_INFINITY);
userIDWritable.set(userID);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java Sun May 9 08:44:29 2010
@@ -28,7 +28,7 @@ import org.apache.hadoop.conf.Configurat
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.VLongWritable;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Partitioner;
@@ -102,8 +102,8 @@ public final class RecommenderJob extend
JobConf itemIDIndexConf = prepareJobConf(
inputPath, itemIDIndexPath, TextInputFormat.class,
- ItemIDIndexMapper.class, IntWritable.class, LongWritable.class,
- ItemIDIndexReducer.class, IntWritable.class, LongWritable.class,
+ ItemIDIndexMapper.class, IntWritable.class, VLongWritable.class,
+ ItemIDIndexReducer.class, IntWritable.class, VLongWritable.class,
SequenceFileOutputFormat.class);
itemIDIndexConf.setClass("mapred.combiner.class", ItemIDIndexReducer.class, Reducer.class);
if (shouldRunNextPhase(parsedArgs, currentPhase)) {
@@ -112,8 +112,8 @@ public final class RecommenderJob extend
JobConf toUserVectorConf = prepareJobConf(
inputPath, userVectorPath, TextInputFormat.class,
- ToItemPrefsMapper.class, LongWritable.class, booleanData ? LongWritable.class : EntityPrefWritable.class,
- ToUserVectorReducer.class, LongWritable.class, VectorWritable.class,
+ ToItemPrefsMapper.class, VLongWritable.class, booleanData ? VLongWritable.class : EntityPrefWritable.class,
+ ToUserVectorReducer.class, VLongWritable.class, VectorWritable.class,
SequenceFileOutputFormat.class);
toUserVectorConf.setBoolean(BOOLEAN_DATA, booleanData);
if (shouldRunNextPhase(parsedArgs, currentPhase)) {
@@ -135,7 +135,7 @@ public final class RecommenderJob extend
JobConf partialMultiplyConf = prepareJobConf(
cooccurrencePath, parialMultiplyPath, SequenceFileInputFormat.class,
CooccurrenceColumnWrapperMapper.class, IntWritable.class, VectorOrPrefWritable.class,
- PartialMultiplyReducer.class, LongWritable.class, VectorWritable.class,
+ PartialMultiplyReducer.class, VLongWritable.class, VectorWritable.class,
SequenceFileOutputFormat.class);
MultipleInputs.addInputPath(
partialMultiplyConf,
@@ -154,8 +154,8 @@ public final class RecommenderJob extend
JobConf aggregateAndRecommendConf = prepareJobConf(
parialMultiplyPath, outputPath, SequenceFileInputFormat.class,
- IdentityMapper.class, LongWritable.class, VectorWritable.class,
- AggregateAndRecommendReducer.class, LongWritable.class, RecommendedItemsWritable.class,
+ IdentityMapper.class, VLongWritable.class, VectorWritable.class,
+ AggregateAndRecommendReducer.class, VLongWritable.class, RecommendedItemsWritable.class,
TextOutputFormat.class);
setIOSort(aggregateAndRecommendConf);
aggregateAndRecommendConf.setClass("mapred.combiner.class", AggregateCombiner.class, Reducer.class);
@@ -170,19 +170,22 @@ public final class RecommenderJob extend
private static void setIOSort(JobConf conf) {
conf.setInt("io.sort.factor", 100);
- conf.setInt("io.sort.mb", 200);
+ int assumedHeapSize = 512;
String javaOpts = conf.get("mapred.child.java.opts");
if (javaOpts != null) {
Matcher m = Pattern.compile("-Xmx([0-9]+)([mMgG])").matcher(javaOpts);
if (m.find()) {
- int heapMB = Integer.parseInt(m.group(1));
+ assumedHeapSize = Integer.parseInt(m.group(1));
String megabyteOrGigabyte = m.group(2);
if ("g".equalsIgnoreCase(megabyteOrGigabyte)) {
- heapMB *= 1024;
+ assumedHeapSize *= 1024;
}
- conf.setInt("io.sort.mb", heapMB / 2);
}
}
+ conf.setInt("io.sort.mb", assumedHeapSize / 2);
+ // For some reason the Merger doesn't report status for a long time; increase
+ // timeout when running these jobs
+ conf.setInt("mapred.task.timeout", 60*60*1000);
}
public static void main(String[] args) throws Exception {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java Sun May 9 08:44:29 2010
@@ -20,7 +20,7 @@ package org.apache.mahout.cf.taste.hadoo
import java.io.IOException;
import java.util.Iterator;
-import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.VLongWritable;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
@@ -34,7 +34,7 @@ import org.apache.mahout.math.VectorWrit
* <h1>Input</h1>
*
* <p>
- * Takes user IDs as {@link LongWritable} mapped to all associated item IDs and preference values, as
+ * Takes user IDs as {@link VLongWritable} mapped to all associated item IDs and preference values, as
* {@link EntityPrefWritable}s.
* </p>
*
@@ -48,19 +48,19 @@ import org.apache.mahout.math.VectorWrit
* </p>
*/
public final class ToUserVectorReducer extends MapReduceBase implements
- Reducer<LongWritable,LongWritable,LongWritable, VectorWritable> {
+ Reducer<VLongWritable,VLongWritable,VLongWritable,VectorWritable> {
@Override
- public void reduce(LongWritable userID,
- Iterator<LongWritable> itemPrefs,
- OutputCollector<LongWritable,VectorWritable> output,
+ public void reduce(VLongWritable userID,
+ Iterator<VLongWritable> itemPrefs,
+ OutputCollector<VLongWritable,VectorWritable> output,
Reporter reporter) throws IOException {
if (!itemPrefs.hasNext()) {
return;
}
Vector userVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
while (itemPrefs.hasNext()) {
- LongWritable itemPref = itemPrefs.next();
+ VLongWritable itemPref = itemPrefs.next();
int index = ItemIDIndexMapper.idToIndex(itemPref.get());
float value;
if (itemPref instanceof EntityPrefWritable) {
@@ -71,7 +71,9 @@ public final class ToUserVectorReducer e
userVector.set(index, value);
}
- output.collect(userID, new VectorWritable(userVector));
+ VectorWritable vw = new VectorWritable(userVector);
+ vw.setWritesLaxPrecision(true);
+ output.collect(userID, vw);
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorSplitterMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorSplitterMapper.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorSplitterMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorSplitterMapper.java Sun May 9 08:44:29 2010
@@ -24,7 +24,7 @@ import org.apache.hadoop.fs.FSDataInputS
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.VLongWritable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
@@ -36,7 +36,7 @@ import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
public final class UserVectorSplitterMapper extends MapReduceBase implements
- Mapper<LongWritable, VectorWritable,IntWritable,VectorOrPrefWritable> {
+ Mapper<VLongWritable,VectorWritable,IntWritable,VectorOrPrefWritable> {
static final String USERS_FILE = "usersFile";
@@ -63,7 +63,7 @@ public final class UserVectorSplitterMap
}
@Override
- public void map(LongWritable key,
+ public void map(VLongWritable key,
VectorWritable value,
OutputCollector<IntWritable,VectorOrPrefWritable> output,
Reporter reporter) throws IOException {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceMapper.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceMapper.java Sun May 9 08:44:29 2010
@@ -22,7 +22,7 @@ import java.util.Arrays;
import java.util.Iterator;
import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.VLongWritable;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
@@ -33,7 +33,7 @@ import org.apache.mahout.math.list.IntAr
import org.apache.mahout.math.map.OpenIntIntHashMap;
public final class UserVectorToCooccurrenceMapper extends MapReduceBase implements
- Mapper<LongWritable, VectorWritable,IndexIndexWritable,IntWritable> {
+ Mapper<VLongWritable,VectorWritable,IndexIndexWritable,IntWritable> {
private static final int MAX_PREFS_CONSIDERED = 50;
@@ -41,7 +41,7 @@ public final class UserVectorToCooccurre
private final OpenIntIntHashMap indexCounts = new OpenIntIntHashMap();
@Override
- public void map(LongWritable userID,
+ public void map(VLongWritable userID,
VectorWritable userVectorWritable,
OutputCollector<IndexIndexWritable,IntWritable> output,
Reporter reporter) throws IOException {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceReducer.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceReducer.java Sun May 9 08:44:29 2010
@@ -65,7 +65,9 @@ public final class UserVectorToCooccurre
}
} else {
if (cooccurrenceRow != null) {
- output.collect(new IntWritable(lastItem1ID), new VectorWritable(cooccurrenceRow));
+ VectorWritable vw = new VectorWritable(cooccurrenceRow);
+ vw.setWritesLaxPrecision(true);
+ output.collect(new IntWritable(lastItem1ID), vw);
}
lastItem1ID = item1ID;
lastItem2ID = item2ID;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/VectorOrPrefWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/VectorOrPrefWritable.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/VectorOrPrefWritable.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/VectorOrPrefWritable.java Sun May 9 08:44:29 2010
@@ -23,6 +23,7 @@ import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableUtils;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
@@ -72,11 +73,13 @@ public final class VectorOrPrefWritable
public void write(DataOutput out) throws IOException {
if (vector == null) {
out.writeBoolean(false);
- out.writeLong(userID);
+ WritableUtils.writeVLong(out, userID);
out.writeFloat(value);
} else {
out.writeBoolean(true);
- new VectorWritable(vector).write(out);
+ VectorWritable vw = new VectorWritable(vector);
+ vw.setWritesLaxPrecision(true);
+ vw.write(out);
}
}
@@ -88,7 +91,7 @@ public final class VectorOrPrefWritable
writable.readFields(in);
set(writable.get());
} else {
- long theUserID = in.readLong();
+ long theUserID = WritableUtils.readVLong(in);
float theValue = in.readFloat();
set(theUserID, theValue);
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java Sun May 9 08:44:29 2010
@@ -22,8 +22,8 @@ import java.util.Map;
import org.apache.commons.cli2.Option;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.VLongWritable;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapred.JobClient;
@@ -130,8 +130,8 @@ public final class RecommenderJob extend
int recommendationsPerUser = Integer.parseInt(parsedArgs.get("--numRecommendations"));
JobConf jobConf = prepareJobConf(usersFile, outputPath, TextInputFormat.class,
- UserIDsMapper.class, LongWritable.class, NullWritable.class, RecommenderReducer.class,
- LongWritable.class, RecommendedItemsWritable.class, TextOutputFormat.class);
+ UserIDsMapper.class, VLongWritable.class, NullWritable.class, RecommenderReducer.class,
+ VLongWritable.class, RecommendedItemsWritable.class, TextOutputFormat.class);
jobConf.set(RecommenderReducer.RECOMMENDER_CLASS_NAME, recommendClassName);
jobConf.setInt(RecommenderReducer.RECOMMENDATIONS_PER_USER, recommendationsPerUser);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderReducer.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderReducer.java Sun May 9 08:44:29 2010
@@ -26,8 +26,8 @@ import java.util.List;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.VLongWritable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
@@ -50,7 +50,7 @@ import org.apache.mahout.cf.taste.recomm
* @see RecommenderJob
*/
public final class RecommenderReducer extends MapReduceBase implements
- Reducer<LongWritable,NullWritable,LongWritable,RecommendedItemsWritable> {
+ Reducer<VLongWritable,NullWritable,VLongWritable,RecommendedItemsWritable> {
static final String RECOMMENDER_CLASS_NAME = "recommenderClassName";
static final String RECOMMENDATIONS_PER_USER = "recommendationsPerUser";
@@ -94,9 +94,9 @@ public final class RecommenderReducer ex
}
@Override
- public void reduce(LongWritable key,
+ public void reduce(VLongWritable key,
Iterator<NullWritable> values,
- OutputCollector<LongWritable,RecommendedItemsWritable> output,
+ OutputCollector<VLongWritable,RecommendedItemsWritable> output,
Reporter reporter) throws IOException {
long userID = key.get();
List<RecommendedItem> recommendedItems;
@@ -112,7 +112,7 @@ public final class RecommenderReducer ex
}
}
RecommendedItemsWritable writable = new RecommendedItemsWritable(recommendedItems);
- output.collect(new LongWritable(userID), writable);
+ output.collect(key, writable);
reporter.getCounter(ReducerMetrics.USERS_PROCESSED).increment(1L);
reporter.getCounter(ReducerMetrics.RECOMMENDATIONS_MADE).increment(recommendedItems.size());
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/UserIDsMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/UserIDsMapper.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/UserIDsMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/UserIDsMapper.java Sun May 9 08:44:29 2010
@@ -22,6 +22,7 @@ import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.VLongWritable;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
@@ -31,17 +32,17 @@ import org.apache.hadoop.mapred.Reporter
* Extracts and emits all user IDs from the users file, or input file.
*/
public final class UserIDsMapper extends MapReduceBase implements
- Mapper<LongWritable,Text,LongWritable,NullWritable> {
+ Mapper<LongWritable,Text,VLongWritable,NullWritable> {
@Override
public void map(LongWritable key,
Text value,
- OutputCollector<LongWritable,NullWritable> output,
+ OutputCollector<VLongWritable,NullWritable> output,
Reporter reporter) throws IOException {
String line = value.toString();
int comma = line.indexOf(',');
long userID = comma >= 0 ? Long.parseLong(line.substring(0, comma)) : Long.parseLong(line);
- output.collect(new LongWritable(userID), NullWritable.get());
+ output.collect(new VLongWritable(userID), NullWritable.get());
}
}
\ No newline at end of file
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CopreferredItemsMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CopreferredItemsMapper.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CopreferredItemsMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CopreferredItemsMapper.java Sun May 9 08:44:29 2010
@@ -19,7 +19,7 @@ package org.apache.mahout.cf.taste.hadoo
import java.io.IOException;
-import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.VLongWritable;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
@@ -31,10 +31,10 @@ import org.apache.mahout.cf.taste.hadoop
* of the associated item vectors
*/
public final class CopreferredItemsMapper extends MapReduceBase
- implements Mapper<LongWritable,ItemPrefWithItemVectorWeightArrayWritable,ItemPairWritable,CoRating> {
+ implements Mapper<VLongWritable,ItemPrefWithItemVectorWeightArrayWritable,ItemPairWritable,CoRating> {
@Override
- public void map(LongWritable user,
+ public void map(VLongWritable user,
ItemPrefWithItemVectorWeightArrayWritable itemPrefsArray,
OutputCollector<ItemPairWritable, CoRating> output,
Reporter reporter)
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java Sun May 9 08:44:29 2010
@@ -23,7 +23,7 @@ import java.util.Map;
import org.apache.commons.cli2.Option;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.DoubleWritable;
-import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.VLongWritable;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
@@ -129,10 +129,10 @@ public final class ItemSimilarityJob ext
itemVectorsPath,
TextInputFormat.class,
ToUserPrefsMapper.class,
- LongWritable.class,
+ VLongWritable.class,
EntityPrefWritable.class,
ToItemVectorReducer.class,
- LongWritable.class,
+ VLongWritable.class,
EntityPrefWritableArrayWritable.class,
SequenceFileOutputFormat.class);
JobClient.runJob(itemVectors);
@@ -141,10 +141,10 @@ public final class ItemSimilarityJob ext
userVectorsPath,
SequenceFileInputFormat.class,
PreferredItemsPerUserMapper.class,
- LongWritable.class,
+ VLongWritable.class,
ItemPrefWithItemVectorWeightWritable.class,
PreferredItemsPerUserReducer.class,
- LongWritable.class,
+ VLongWritable.class,
ItemPrefWithItemVectorWeightArrayWritable.class,
SequenceFileOutputFormat.class);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java Sun May 9 08:44:29 2010
@@ -21,7 +21,7 @@ import java.io.IOException;
import java.util.Iterator;
import java.util.NoSuchElementException;
-import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.VLongWritable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
@@ -36,7 +36,7 @@ import org.apache.mahout.cf.taste.hadoop
* so we can create the user-vectors in the reducer
*/
public final class PreferredItemsPerUserMapper extends MapReduceBase
- implements Mapper<LongWritable,EntityPrefWritableArrayWritable,LongWritable,ItemPrefWithItemVectorWeightWritable> {
+ implements Mapper<VLongWritable,EntityPrefWritableArrayWritable,VLongWritable,ItemPrefWithItemVectorWeightWritable> {
private DistributedSimilarity distributedSimilarity;
@@ -48,9 +48,9 @@ public final class PreferredItemsPerUser
}
@Override
- public void map(LongWritable item,
+ public void map(VLongWritable item,
EntityPrefWritableArrayWritable userPrefsArray,
- OutputCollector<LongWritable,ItemPrefWithItemVectorWeightWritable> output,
+ OutputCollector<VLongWritable,ItemPrefWithItemVectorWeightWritable> output,
Reporter reporter) throws IOException {
EntityPrefWritable[] userPrefs = userPrefsArray.getPrefs();
@@ -58,7 +58,7 @@ public final class PreferredItemsPerUser
double weight = distributedSimilarity.weightOfItemVector(new UserPrefsIterator(userPrefs));
for (EntityPrefWritable userPref : userPrefs) {
- output.collect(new LongWritable(userPref.getID()),
+ output.collect(new VLongWritable(userPref.getID()),
new ItemPrefWithItemVectorWeightWritable(item.get(), weight, userPref.getPrefValue()));
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java Sun May 9 08:44:29 2010
@@ -22,19 +22,19 @@ import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
-import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.VLongWritable;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
public final class PreferredItemsPerUserReducer extends MapReduceBase
- implements Reducer<LongWritable,ItemPrefWithItemVectorWeightWritable, LongWritable,ItemPrefWithItemVectorWeightArrayWritable> {
+ implements Reducer<VLongWritable,ItemPrefWithItemVectorWeightWritable,VLongWritable,ItemPrefWithItemVectorWeightArrayWritable> {
@Override
- public void reduce(LongWritable user,
+ public void reduce(VLongWritable user,
Iterator<ItemPrefWithItemVectorWeightWritable> itemPrefs,
- OutputCollector<LongWritable,ItemPrefWithItemVectorWeightArrayWritable> output,
+ OutputCollector<VLongWritable,ItemPrefWithItemVectorWeightArrayWritable> output,
Reporter reporter)
throws IOException {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java Sun May 9 08:44:29 2010
@@ -22,7 +22,7 @@ import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
-import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.VLongWritable;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
@@ -36,12 +36,12 @@ import org.apache.mahout.cf.taste.hadoop
*/
public final class ToItemVectorReducer
extends MapReduceBase implements
- Reducer<LongWritable,EntityPrefWritable,LongWritable,EntityPrefWritableArrayWritable> {
+ Reducer<VLongWritable,EntityPrefWritable,VLongWritable,EntityPrefWritableArrayWritable> {
@Override
- public void reduce(LongWritable item,
+ public void reduce(VLongWritable item,
Iterator<EntityPrefWritable> userPrefs,
- OutputCollector<LongWritable,EntityPrefWritableArrayWritable> output,
+ OutputCollector<VLongWritable,EntityPrefWritableArrayWritable> output,
Reporter reporter)
throws IOException {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java Sun May 9 08:44:29 2010
@@ -22,7 +22,7 @@ import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.VLongWritable;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapred.JobClient;
@@ -54,7 +54,7 @@ public final class SlopeOneAverageDiffsJ
String averagesOutputPath = parsedArgs.get("--tempDir");
JobConf prefsToDiffsJobConf = prepareJobConf(prefsFile, averagesOutputPath,
- TextInputFormat.class, ToItemPrefsMapper.class, LongWritable.class, EntityPrefWritable.class,
+ TextInputFormat.class, ToItemPrefsMapper.class, VLongWritable.class, EntityPrefWritable.class,
SlopeOnePrefsToDiffsReducer.class, EntityEntityWritable.class, FloatWritable.class,
SequenceFileOutputFormat.class);
JobClient.runJob(prefsToDiffsJobConf);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOnePrefsToDiffsReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOnePrefsToDiffsReducer.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOnePrefsToDiffsReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOnePrefsToDiffsReducer.java Sun May 9 08:44:29 2010
@@ -24,7 +24,7 @@ import java.util.Iterator;
import java.util.List;
import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.VLongWritable;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
@@ -33,10 +33,10 @@ import org.apache.mahout.cf.taste.hadoop
import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable;
public final class SlopeOnePrefsToDiffsReducer extends MapReduceBase implements
- Reducer<LongWritable, EntityPrefWritable, EntityEntityWritable,FloatWritable> {
+ Reducer<VLongWritable,EntityPrefWritable,EntityEntityWritable,FloatWritable> {
@Override
- public void reduce(LongWritable key,
+ public void reduce(VLongWritable key,
Iterator<EntityPrefWritable> values,
OutputCollector<EntityEntityWritable,FloatWritable> output,
Reporter reporter) throws IOException {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java Sun May 9 08:44:29 2010
@@ -18,8 +18,8 @@
package org.apache.mahout.common;
import java.io.IOException;
-import java.util.HashMap;
import java.util.Map;
+import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.commons.cli2.Argument;
@@ -136,10 +136,11 @@ public abstract class AbstractJob extend
return null;
}
- Map<String,String> result = new HashMap<String,String>();
- maybePut(result, cmdLine, tempDirOpt, helpOpt);
+ Map<String,String> result = new TreeMap<String,String>();
+ maybePut(result, cmdLine, tempDirOpt, helpOpt, startPhase, endPhase);
maybePut(result, cmdLine, extraOpts);
-
+
+ log.info("Command line arguments: {}", result);
return result;
}
@@ -156,8 +157,13 @@ public abstract class AbstractJob extend
int phase = currentPhase.getAndIncrement();
String startPhase = args.get("--startPhase");
String endPhase = args.get("--endPhase");
- return !((startPhase != null && phase < Integer.parseInt(startPhase)) ||
- (endPhase != null && phase > Integer.parseInt(endPhase)));
+ boolean phaseSkipped =
+ (startPhase != null && phase < Integer.parseInt(startPhase)) ||
+ (endPhase != null && phase > Integer.parseInt(endPhase));
+ if (phaseSkipped) {
+ log.info("Skipping phase {}", phase);
+ }
+ return !phaseSkipped;
}
protected JobConf prepareJobConf(String inputPath,
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java?rev=942513&r1=942512&r2=942513&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java Sun May 9 08:44:29 2010
@@ -32,6 +32,7 @@ import org.apache.hadoop.conf.Configurat
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.VLongWritable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.mahout.cf.taste.hadoop.EntityEntityWritable;
@@ -52,9 +53,9 @@ import org.easymock.classextension.EasyM
public final class ItemSimilarityTest extends MahoutTestCase {
public void testUserPrefsPerItemMapper() throws Exception {
- OutputCollector<LongWritable,LongWritable> output =
+ OutputCollector<VLongWritable,VLongWritable> output =
EasyMock.createMock(OutputCollector.class);
- output.collect(new LongWritable(34L), new EntityPrefWritable(12L, 2.3f));
+ output.collect(new VLongWritable(34L), new EntityPrefWritable(12L, 2.3f));
EasyMock.replay(output);
new ToUserPrefsMapper().map(new LongWritable(), new Text("12,34,2.3"), output, null);
@@ -67,14 +68,14 @@ public final class ItemSimilarityTest ex
List<EntityPrefWritable> userPrefs = Arrays.asList(
new EntityPrefWritable(34L, 1.0f), new EntityPrefWritable(56L, 2.0f));
- OutputCollector<LongWritable,EntityPrefWritableArrayWritable> output =
+ OutputCollector<VLongWritable,EntityPrefWritableArrayWritable> output =
EasyMock.createMock(OutputCollector.class);
- output.collect(EasyMock.eq(new LongWritable(12L)), equalToUserPrefs(userPrefs));
+ output.collect(EasyMock.eq(new VLongWritable(12L)), equalToUserPrefs(userPrefs));
EasyMock.replay(output);
- new ToItemVectorReducer().reduce(new LongWritable(12L), userPrefs.iterator(), output, null);
+ new ToItemVectorReducer().reduce(new VLongWritable(12L), userPrefs.iterator(), output, null);
EasyMock.verify(output);
}
@@ -112,7 +113,7 @@ public final class ItemSimilarityTest ex
}
public void testPreferredItemsPerUserMapper() throws Exception {
- OutputCollector<LongWritable,ItemPrefWithItemVectorWeightWritable> output =
+ OutputCollector<VLongWritable,ItemPrefWithItemVectorWeightWritable> output =
EasyMock.createMock(OutputCollector.class);
EntityPrefWritableArrayWritable userPrefs = new EntityPrefWritableArrayWritable(
new EntityPrefWritable[] {
@@ -122,8 +123,8 @@ public final class ItemSimilarityTest ex
double weight =
new DistributedUncenteredZeroAssumingCosineSimilarity().weightOfItemVector(Arrays.asList(2.0f, 3.0f).iterator());
- output.collect(new LongWritable(12L), new ItemPrefWithItemVectorWeightWritable(34L, weight, 2.0f));
- output.collect(new LongWritable(56L), new ItemPrefWithItemVectorWeightWritable(34L, weight, 3.0f));
+ output.collect(new VLongWritable(12L), new ItemPrefWithItemVectorWeightWritable(34L, weight, 2.0f));
+ output.collect(new VLongWritable(56L), new ItemPrefWithItemVectorWeightWritable(34L, weight, 3.0f));
JobConf conf = new JobConf();
conf.set(ItemSimilarityJob.DISTRIBUTED_SIMILARITY_CLASSNAME,
@@ -133,7 +134,7 @@ public final class ItemSimilarityTest ex
PreferredItemsPerUserMapper mapper = new PreferredItemsPerUserMapper();
mapper.configure(conf);
- mapper.map(new LongWritable(34L), userPrefs, output, null);
+ mapper.map(new VLongWritable(34L), userPrefs, output, null);
EasyMock.verify(output);
}
@@ -144,15 +145,15 @@ public final class ItemSimilarityTest ex
Arrays.asList(new ItemPrefWithItemVectorWeightWritable(34L, 5.0, 1.0f),
new ItemPrefWithItemVectorWeightWritable(56L, 7.0, 2.0f));
- OutputCollector<LongWritable,ItemPrefWithItemVectorWeightArrayWritable> output =
+ OutputCollector<VLongWritable,ItemPrefWithItemVectorWeightArrayWritable> output =
EasyMock.createMock(OutputCollector.class);
- output.collect(EasyMock.eq(new LongWritable(12L)), equalToItemPrefs(itemPrefs));
+ output.collect(EasyMock.eq(new VLongWritable(12L)), equalToItemPrefs(itemPrefs));
EasyMock.replay(output);
new PreferredItemsPerUserReducer().reduce(
- new LongWritable(12L), itemPrefs.iterator(), output, null);
+ new VLongWritable(12L), itemPrefs.iterator(), output, null);
EasyMock.verify(output);
}
@@ -204,7 +205,7 @@ public final class ItemSimilarityTest ex
EasyMock.replay(output, itemPrefs);
- new CopreferredItemsMapper().map(new LongWritable(), itemPrefs, output, null);
+ new CopreferredItemsMapper().map(new VLongWritable(), itemPrefs, output, null);
EasyMock.verify(output, itemPrefs);
}