You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2010/04/05 18:05:41 UTC
svn commit: r930890 - in /lucene/mahout/trunk/core/src:
main/java/org/apache/mahout/cf/taste/hadoop/
main/java/org/apache/mahout/cf/taste/hadoop/item/
main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/
main/java/org/apache/mahout/cf/taste/had...
Author: srowen
Date: Mon Apr 5 16:05:41 2010
New Revision: 930890
URL: http://svn.apache.org/viewvc?rev=930890&view=rev
Log:
MAHOUT-362 last refactorings for now
Added:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToEntityPrefsMapper.java
- copied, changed from r930805, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToUserPrefsMapper.java
- copied, changed from r930805, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPairWritable.java
- copied, changed from r930806, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPairWritable.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPrefWithLengthArrayWritable.java
- copied, changed from r930806, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthArrayWritable.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPrefWithLengthWritable.java
- copied, changed from r930806, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthWritable.java
Removed:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityWritable.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/UserPrefsPerItemMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/
Modified:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityPrefWritable.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CopreferredItemsMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CosineSimilarityReducer.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityPrefWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityPrefWritable.java?rev=930890&r1=930889&r2=930890&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityPrefWritable.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityPrefWritable.java Mon Apr 5 16:05:41 2010
@@ -21,11 +21,12 @@ import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
+import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Writable;
import org.apache.mahout.common.RandomUtils;
/** A {@link Writable} encapsulating an item ID and a preference value. */
-public final class EntityPrefWritable extends EntityWritable {
+public final class EntityPrefWritable extends LongWritable {
private float prefValue;
@@ -39,7 +40,11 @@ public final class EntityPrefWritable ex
}
public EntityPrefWritable(EntityPrefWritable other) {
- this(other.getID(), other.getPrefValue());
+ this(other.get(), other.getPrefValue());
+ }
+
+ public long getID() {
+ return get();
}
public float getPrefValue() {
@@ -75,12 +80,12 @@ public final class EntityPrefWritable ex
return false;
}
EntityPrefWritable other = (EntityPrefWritable) o;
- return getID() == other.getID() && prefValue == other.getPrefValue();
+ return get() == other.get() && prefValue == other.getPrefValue();
}
@Override
public EntityPrefWritable clone() {
- return new EntityPrefWritable(getID(), prefValue);
+ return new EntityPrefWritable(get(), prefValue);
}
}
\ No newline at end of file
Copied: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToEntityPrefsMapper.java (from r930805, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java)
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToEntityPrefsMapper.java?p2=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToEntityPrefsMapper.java&p1=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java&r1=930805&r2=930890&rev=930890&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToEntityPrefsMapper.java Mon Apr 5 16:05:41 2010
@@ -17,9 +17,6 @@
package org.apache.mahout.cf.taste.hadoop;
-import java.io.IOException;
-import java.util.regex.Pattern;
-
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
@@ -29,52 +26,45 @@ import org.apache.hadoop.mapred.OutputCo
import org.apache.hadoop.mapred.Reporter;
import org.apache.mahout.cf.taste.hadoop.item.RecommenderJob;
-/**
- * <h1>Input</h1>
- *
- * <p>
- * Intended for use with {@link org.apache.hadoop.mapred.TextInputFormat}; accepts line number / line pairs as
- * {@link LongWritable}/{@link Text} pairs.
- * </p>
- *
- * <p>
- * Each line is assumed to be of the form <code>userID,itemID,preference</code>, or
- * <code>userID,itemID</code>.</p>
- * </p>
- *
- * <h1>Output</h1>
- *
- * <p>
- * Outputs the user ID as a {@link LongWritable} mapped to the item ID and preference as a
- * {@link EntityPrefWritable}.
- * </p>
- */
-public final class ToItemPrefsMapper extends MapReduceBase implements
- Mapper<LongWritable,Text,LongWritable, EntityWritable> {
-
+import java.io.IOException;
+import java.util.regex.Pattern;
+
+abstract class ToEntityPrefsMapper extends MapReduceBase implements
+ Mapper<LongWritable,Text,LongWritable,LongWritable> {
+
private static final Pattern COMMA = Pattern.compile(",");
private boolean booleanData;
+ private final boolean itemKey;
+
+ ToEntityPrefsMapper(boolean itemKey) {
+ this.itemKey = itemKey;
+ }
@Override
public void configure(JobConf jobConf) {
booleanData = jobConf.getBoolean(RecommenderJob.BOOLEAN_DATA, false);
}
-
+
@Override
public void map(LongWritable key,
Text value,
- OutputCollector<LongWritable, EntityWritable> output,
+ OutputCollector<LongWritable,LongWritable> output,
Reporter reporter) throws IOException {
- String[] tokens = ToItemPrefsMapper.COMMA.split(value.toString());
+ String[] tokens = ToEntityPrefsMapper.COMMA.split(value.toString());
long userID = Long.parseLong(tokens[0]);
long itemID = Long.parseLong(tokens[1]);
+ if (itemKey) {
+ long temp = userID;
+ userID = itemID;
+ itemID = temp;
+ }
if (booleanData) {
- output.collect(new LongWritable(userID), new EntityWritable(itemID));
+ output.collect(new LongWritable(userID), new LongWritable(itemID));
} else {
float prefValue = tokens.length > 2 ? Float.parseFloat(tokens[2]) : 1.0f;
output.collect(new LongWritable(userID), new EntityPrefWritable(itemID, prefValue));
}
}
-
+
}
\ No newline at end of file
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java?rev=930890&r1=930889&r2=930890&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java Mon Apr 5 16:05:41 2010
@@ -17,17 +17,8 @@
package org.apache.mahout.cf.taste.hadoop;
-import java.io.IOException;
-import java.util.regex.Pattern;
-
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.mahout.cf.taste.hadoop.item.RecommenderJob;
/**
* <h1>Input</h1>
@@ -49,32 +40,10 @@ import org.apache.mahout.cf.taste.hadoop
* {@link EntityPrefWritable}.
* </p>
*/
-public final class ToItemPrefsMapper extends MapReduceBase implements
- Mapper<LongWritable,Text,LongWritable, EntityWritable> {
-
- private static final Pattern COMMA = Pattern.compile(",");
-
- private boolean booleanData;
+public final class ToItemPrefsMapper extends ToEntityPrefsMapper {
- @Override
- public void configure(JobConf jobConf) {
- booleanData = jobConf.getBoolean(RecommenderJob.BOOLEAN_DATA, false);
- }
-
- @Override
- public void map(LongWritable key,
- Text value,
- OutputCollector<LongWritable, EntityWritable> output,
- Reporter reporter) throws IOException {
- String[] tokens = ToItemPrefsMapper.COMMA.split(value.toString());
- long userID = Long.parseLong(tokens[0]);
- long itemID = Long.parseLong(tokens[1]);
- if (booleanData) {
- output.collect(new LongWritable(userID), new EntityWritable(itemID));
- } else {
- float prefValue = tokens.length > 2 ? Float.parseFloat(tokens[2]) : 1.0f;
- output.collect(new LongWritable(userID), new EntityPrefWritable(itemID, prefValue));
- }
+ public ToItemPrefsMapper() {
+ super(false);
}
}
\ No newline at end of file
Copied: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToUserPrefsMapper.java (from r930805, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java)
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToUserPrefsMapper.java?p2=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToUserPrefsMapper.java&p1=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java&r1=930805&r2=930890&rev=930890&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToUserPrefsMapper.java Mon Apr 5 16:05:41 2010
@@ -17,64 +17,13 @@
package org.apache.mahout.cf.taste.hadoop;
-import java.io.IOException;
-import java.util.regex.Pattern;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.mahout.cf.taste.hadoop.item.RecommenderJob;
-
/**
- * <h1>Input</h1>
- *
- * <p>
- * Intended for use with {@link org.apache.hadoop.mapred.TextInputFormat}; accepts line number / line pairs as
- * {@link LongWritable}/{@link Text} pairs.
- * </p>
- *
- * <p>
- * Each line is assumed to be of the form <code>userID,itemID,preference</code>, or
- * <code>userID,itemID</code>.</p>
- * </p>
- *
- * <h1>Output</h1>
- *
- * <p>
- * Outputs the user ID as a {@link LongWritable} mapped to the item ID and preference as a
- * {@link EntityPrefWritable}.
- * </p>
+ * The 'reverse' of {@link ToItemPrefsMapper}; outputs item IDs mapped to user-pref data.
*/
-public final class ToItemPrefsMapper extends MapReduceBase implements
- Mapper<LongWritable,Text,LongWritable, EntityWritable> {
-
- private static final Pattern COMMA = Pattern.compile(",");
-
- private boolean booleanData;
+public final class ToUserPrefsMapper extends ToEntityPrefsMapper {
- @Override
- public void configure(JobConf jobConf) {
- booleanData = jobConf.getBoolean(RecommenderJob.BOOLEAN_DATA, false);
+ public ToUserPrefsMapper() {
+ super(true);
}
-
- @Override
- public void map(LongWritable key,
- Text value,
- OutputCollector<LongWritable, EntityWritable> output,
- Reporter reporter) throws IOException {
- String[] tokens = ToItemPrefsMapper.COMMA.split(value.toString());
- long userID = Long.parseLong(tokens[0]);
- long itemID = Long.parseLong(tokens[1]);
- if (booleanData) {
- output.collect(new LongWritable(userID), new EntityWritable(itemID));
- } else {
- float prefValue = tokens.length > 2 ? Float.parseFloat(tokens[2]) : 1.0f;
- output.collect(new LongWritable(userID), new EntityPrefWritable(itemID, prefValue));
- }
- }
-
+
}
\ No newline at end of file
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java?rev=930890&r1=930889&r2=930890&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java Mon Apr 5 16:05:41 2010
@@ -36,7 +36,6 @@ import org.apache.hadoop.mapred.TextOutp
import org.apache.hadoop.mapred.lib.IdentityReducer;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable;
-import org.apache.mahout.cf.taste.hadoop.EntityWritable;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.cf.taste.hadoop.RecommendedItemsWritable;
import org.apache.mahout.cf.taste.hadoop.ToItemPrefsMapper;
@@ -100,7 +99,7 @@ public final class RecommenderJob extend
JobConf toUserVectorConf = prepareJobConf(inputPath, userVectorPath,
TextInputFormat.class, ToItemPrefsMapper.class, LongWritable.class,
- booleanData ? EntityWritable.class : EntityPrefWritable.class,
+ booleanData ? LongWritable.class : EntityPrefWritable.class,
ToUserVectorReducer.class, LongWritable.class, VectorWritable.class, SequenceFileOutputFormat.class);
toUserVectorConf.setBoolean(BOOLEAN_DATA, booleanData);
JobClient.runJob(toUserVectorConf);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java?rev=930890&r1=930889&r2=930890&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java Mon Apr 5 16:05:41 2010
@@ -29,7 +29,6 @@ import org.apache.hadoop.mapred.OutputCo
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable;
-import org.apache.mahout.cf.taste.hadoop.EntityWritable;
import org.apache.mahout.math.RandomAccessSparseVector;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
@@ -58,7 +57,7 @@ import org.apache.mahout.math.VectorWrit
*
*/
public final class ToUserVectorReducer extends MapReduceBase implements
- Reducer<LongWritable, EntityWritable,LongWritable,VectorWritable> {
+ Reducer<LongWritable,LongWritable,LongWritable,VectorWritable> {
public static final int MAX_PREFS_CONSIDERED = 20;
@@ -72,14 +71,14 @@ public final class ToUserVectorReducer e
@Override
public void reduce(LongWritable userID,
- Iterator<EntityWritable> itemPrefs,
+ Iterator<LongWritable> itemPrefs,
OutputCollector<LongWritable,VectorWritable> output,
Reporter reporter) throws IOException {
if (itemPrefs.hasNext()) {
RandomAccessSparseVector userVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
while (itemPrefs.hasNext()) {
- EntityWritable itemPref = itemPrefs.next();
- int index = ItemIDIndexMapper.idToIndex(itemPref.getID());
+ LongWritable itemPref = itemPrefs.next();
+ int index = ItemIDIndexMapper.idToIndex(itemPref.get());
float value;
if (itemPref instanceof EntityPrefWritable) {
value = ((EntityPrefWritable) itemPref).getPrefValue();
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CopreferredItemsMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CopreferredItemsMapper.java?rev=930890&r1=930889&r2=930890&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CopreferredItemsMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CopreferredItemsMapper.java Mon Apr 5 16:05:41 2010
@@ -20,22 +20,25 @@ package org.apache.mahout.cf.taste.hadoo
import java.io.IOException;
import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.mahout.cf.taste.hadoop.EntityWritable;
-import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPairWritable;
-import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthArrayWritable;
-import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
/**
* map out each pair of items that appears in the same user-vector together with the multiplied vector lengths
* of the associated item vectors
*/
-public final class CopreferredItemsMapper
- extends Mapper<EntityWritable,ItemPrefWithLengthArrayWritable,ItemPairWritable,FloatWritable> {
+public final class CopreferredItemsMapper extends MapReduceBase
+ implements Mapper<LongWritable,ItemPrefWithLengthArrayWritable,ItemPairWritable,FloatWritable> {
@Override
- protected void map(EntityWritable user, ItemPrefWithLengthArrayWritable itemPrefsArray, Context context)
- throws IOException, InterruptedException {
+ public void map(LongWritable user,
+ ItemPrefWithLengthArrayWritable itemPrefsArray,
+ OutputCollector<ItemPairWritable,FloatWritable> output,
+ Reporter reporter)
+ throws IOException {
ItemPrefWithLengthWritable[] itemPrefs = itemPrefsArray.getItemPrefs();
@@ -49,7 +52,7 @@ public final class CopreferredItemsMapp
long itemAID = Math.min(itemNID, itemM.getItemID());
long itemBID = Math.max(itemNID, itemM.getItemID());
ItemPairWritable pair = new ItemPairWritable(itemAID, itemBID, itemNLength * itemM.getLength());
- context.write(pair, new FloatWritable(itemNValue * itemM.getPrefValue()));
+ output.collect(pair, new FloatWritable(itemNValue * itemM.getPrefValue()));
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CosineSimilarityReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CosineSimilarityReducer.java?rev=930890&r1=930889&r2=930890&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CosineSimilarityReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CosineSimilarityReducer.java Mon Apr 5 16:05:41 2010
@@ -18,34 +18,36 @@
package org.apache.mahout.cf.taste.hadoop.similarity.item;
import java.io.IOException;
+import java.util.Iterator;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
import org.apache.mahout.cf.taste.hadoop.EntityEntityWritable;
-import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPairWritable;
/**
* Finally compute the cosine for each item-pair
*/
-public final class CosineSimilarityReducer
- extends Reducer<ItemPairWritable,FloatWritable, EntityEntityWritable,DoubleWritable> {
+public final class CosineSimilarityReducer extends MapReduceBase
+ implements Reducer<ItemPairWritable,FloatWritable,EntityEntityWritable,DoubleWritable> {
@Override
- protected void reduce(ItemPairWritable pair, Iterable<FloatWritable> numeratorSummands, Context context)
- throws IOException, InterruptedException {
+ public void reduce(ItemPairWritable pair,
+ Iterator<FloatWritable> numeratorSummands,
+ OutputCollector<EntityEntityWritable,DoubleWritable> output,
+ Reporter reporter)
+ throws IOException {
double numerator = 0.0;
-
- for (FloatWritable nummeratorSummand : numeratorSummands) {
- numerator += nummeratorSummand.get();
+ while (numeratorSummands.hasNext()) {
+ numerator += numeratorSummands.next().get();
}
-
double denominator = pair.getMultipliedLength();
-
double cosine = numerator / denominator;
-
- context.write(pair.getItemItemWritable(), new DoubleWritable(cosine));
+ output.collect(pair.getItemItemWritable(), new DoubleWritable(cosine));
}
}
Copied: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPairWritable.java (from r930806, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPairWritable.java)
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPairWritable.java?p2=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPairWritable.java&p1=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPairWritable.java&r1=930806&r2=930890&rev=930890&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPairWritable.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPairWritable.java Mon Apr 5 16:05:41 2010
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.mahout.cf.taste.hadoop.similarity.item.writables;
+package org.apache.mahout.cf.taste.hadoop.similarity.item;
import java.io.DataInput;
import java.io.DataOutput;
Copied: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPrefWithLengthArrayWritable.java (from r930806, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthArrayWritable.java)
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPrefWithLengthArrayWritable.java?p2=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPrefWithLengthArrayWritable.java&p1=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthArrayWritable.java&r1=930806&r2=930890&rev=930890&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthArrayWritable.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPrefWithLengthArrayWritable.java Mon Apr 5 16:05:41 2010
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.mahout.cf.taste.hadoop.similarity.item.writables;
+package org.apache.mahout.cf.taste.hadoop.similarity.item;
import org.apache.hadoop.io.ArrayWritable;
Copied: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPrefWithLengthWritable.java (from r930806, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthWritable.java)
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPrefWithLengthWritable.java?p2=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPrefWithLengthWritable.java&p1=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthWritable.java&r1=930806&r2=930890&rev=930890&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthWritable.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPrefWithLengthWritable.java Mon Apr 5 16:05:41 2010
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.mahout.cf.taste.hadoop.similarity.item.writables;
+package org.apache.mahout.cf.taste.hadoop.similarity.item;
import java.io.DataInput;
import java.io.DataOutput;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java?rev=930890&r1=930889&r2=930890&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java Mon Apr 5 16:05:41 2010
@@ -17,32 +17,24 @@
package org.apache.mahout.cf.taste.hadoop.similarity.item;
-import java.io.IOException;
import java.util.Map;
+import org.apache.commons.cli2.Option;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
-import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.SequenceFileInputFormat;
+import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.apache.hadoop.mapred.TextInputFormat;
+import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.util.ToolRunner;
+import org.apache.mahout.cf.taste.hadoop.EntityEntityWritable;
import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable;
import org.apache.mahout.cf.taste.hadoop.EntityPrefWritableArrayWritable;
-import org.apache.mahout.cf.taste.hadoop.EntityWritable;
-import org.apache.mahout.cf.taste.hadoop.EntityEntityWritable;
-import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPairWritable;
-import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthArrayWritable;
-import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthWritable;
+import org.apache.mahout.cf.taste.hadoop.ToUserPrefsMapper;
import org.apache.mahout.common.AbstractJob;
/**
@@ -108,7 +100,6 @@ public final class ItemSimilarityJob ext
public int run(String[] args) throws Exception {
Map<String,String> parsedArgs = AbstractJob.parseArguments(args);
-
if (parsedArgs == null) {
return -1;
}
@@ -121,84 +112,47 @@ public final class ItemSimilarityJob ext
String itemVectorsPath = tempDirPath + "/itemVectors";
String userVectorsPath = tempDirPath + "/userVectors";
- Job itemVectors = createJob(originalConf, "itemVectors", inputPath, itemVectorsPath, UserPrefsPerItemMapper.class,
- EntityWritable.class, EntityPrefWritable.class, ToItemVectorReducer.class, EntityWritable.class,
- EntityPrefWritableArrayWritable.class, TextInputFormat.class, SequenceFileOutputFormat.class, true);
-
- itemVectors.waitForCompletion(true);
-
- Job userVectors = createJob(originalConf, "userVectors", itemVectorsPath, userVectorsPath,
- PreferredItemsPerUserMapper.class, EntityWritable.class, ItemPrefWithLengthWritable.class,
- PreferredItemsPerUserReducer.class, EntityWritable.class, ItemPrefWithLengthArrayWritable.class);
-
- userVectors.waitForCompletion(true);
-
- Job similarity = createJob(originalConf, "similarity", userVectorsPath, outputPath,
- CopreferredItemsMapper.class, ItemPairWritable.class, FloatWritable.class, CosineSimilarityReducer.class,
- EntityEntityWritable.class, DoubleWritable.class, SequenceFileInputFormat.class, TextOutputFormat.class, false);
-
- similarity.waitForCompletion(true);
+ JobConf itemVectors = prepareJobConf(inputPath,
+ itemVectorsPath,
+ TextInputFormat.class,
+ ToUserPrefsMapper.class,
+ LongWritable.class,
+ EntityPrefWritable.class,
+ ToItemVectorReducer.class,
+ LongWritable.class,
+ EntityPrefWritableArrayWritable.class,
+ SequenceFileOutputFormat.class);
+ JobClient.runJob(itemVectors);
+
+ JobConf userVectors = prepareJobConf(itemVectorsPath,
+ userVectorsPath,
+ SequenceFileInputFormat.class,
+ PreferredItemsPerUserMapper.class,
+ LongWritable.class,
+ ItemPrefWithLengthWritable.class,
+ PreferredItemsPerUserReducer.class,
+ LongWritable.class,
+ ItemPrefWithLengthArrayWritable.class,
+ SequenceFileOutputFormat.class);
+ JobClient.runJob(userVectors);
+
+ JobConf similarity = prepareJobConf(userVectorsPath,
+ outputPath,
+ SequenceFileInputFormat.class,
+ CopreferredItemsMapper.class,
+ ItemPairWritable.class,
+ FloatWritable.class,
+ CosineSimilarityReducer.class,
+ EntityEntityWritable.class,
+ DoubleWritable.class,
+ TextOutputFormat.class);
+ JobClient.runJob(similarity);
return 0;
}
public static void main(String[] args) throws Exception {
- ToolRunner.run(new Configuration(), new ItemSimilarityJob(), args);
- }
-
- protected static Job createJob(Configuration conf,
- String jobName,
- String inputPath,
- String outputPath,
- Class<? extends Mapper> mapperClass,
- Class<? extends Writable> mapKeyOutClass,
- Class<? extends Writable> mapValueOutClass,
- Class<? extends Reducer> reducerClass,
- Class<? extends Writable> keyOutClass,
- Class<? extends Writable> valueOutClass) throws IOException {
- return createJob(conf, jobName, inputPath, outputPath, mapperClass, mapKeyOutClass,
- mapValueOutClass, reducerClass, keyOutClass, valueOutClass, SequenceFileInputFormat.class,
- SequenceFileOutputFormat.class, true);
- }
-
- protected static Job createJob(Configuration conf,
- String jobName,
- String inputPath,
- String outputPath,
- Class<? extends Mapper> mapperClass,
- Class<? extends Writable> mapKeyOutClass,
- Class<? extends Writable> mapValueOutClass,
- Class<? extends Reducer> reducerClass,
- Class<? extends Writable> keyOutClass,
- Class<? extends Writable> valueOutClass,
- Class<? extends FileInputFormat> fileInputFormatClass,
- Class<? extends FileOutputFormat> fileOutputFormatClass,
- boolean compress) throws IOException {
-
- Job job = new Job(conf, jobName);
-
- FileSystem fs = FileSystem.get(conf);
-
- Path inputPathPath = new Path(inputPath).makeQualified(fs);
- Path outputPathPath = new Path(outputPath).makeQualified(fs);
-
- FileInputFormat.setInputPaths(job, inputPathPath);
- job.setInputFormatClass(fileInputFormatClass);
-
- job.setMapperClass(mapperClass);
- job.setMapOutputKeyClass(mapKeyOutClass);
- job.setMapOutputValueClass(mapValueOutClass);
-
- job.setReducerClass(reducerClass);
- job.setOutputKeyClass(keyOutClass);
- job.setOutputValueClass(valueOutClass);
-
-
- FileOutputFormat.setOutputPath(job, outputPathPath);
- FileOutputFormat.setCompressOutput(job, compress);
- job.setOutputFormatClass(fileOutputFormatClass);
-
- return job;
+ ToolRunner.run(new ItemSimilarityJob(), args);
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java?rev=930890&r1=930889&r2=930890&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java Mon Apr 5 16:05:41 2010
@@ -19,22 +19,26 @@ package org.apache.mahout.cf.taste.hadoo
import java.io.IOException;
-import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable;
import org.apache.mahout.cf.taste.hadoop.EntityPrefWritableArrayWritable;
-import org.apache.mahout.cf.taste.hadoop.EntityWritable;
-import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthWritable;
/**
* for each item-vector, we compute its length here and map out all entries with the user as key,
* so we can create the user-vectors in the reducer
*/
-public final class PreferredItemsPerUserMapper
- extends Mapper<EntityWritable, EntityPrefWritableArrayWritable,EntityWritable,ItemPrefWithLengthWritable> {
+public final class PreferredItemsPerUserMapper extends MapReduceBase
+ implements Mapper<LongWritable,EntityPrefWritableArrayWritable,LongWritable,ItemPrefWithLengthWritable> {
@Override
- protected void map(EntityWritable item, EntityPrefWritableArrayWritable userPrefsArray, Context context)
- throws IOException, InterruptedException {
+ public void map(LongWritable item,
+ EntityPrefWritableArrayWritable userPrefsArray,
+ OutputCollector<LongWritable,ItemPrefWithLengthWritable> output,
+ Reporter reporter) throws IOException {
EntityPrefWritable[] userPrefs = userPrefsArray.getPrefs();
@@ -47,8 +51,8 @@ public final class PreferredItemsPerUser
length = Math.sqrt(length);
for (EntityPrefWritable userPref : userPrefs) {
- context.write(new EntityWritable(userPref.getID()),
- new ItemPrefWithLengthWritable(item.getID(), length, userPref.getPrefValue()));
+ output.collect(new LongWritable(userPref.getID()),
+ new ItemPrefWithLengthWritable(item.get(), length, userPref.getPrefValue()));
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java?rev=930890&r1=930889&r2=930890&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java Mon Apr 5 16:05:41 2010
@@ -19,27 +19,32 @@ package org.apache.mahout.cf.taste.hadoo
import java.io.IOException;
import java.util.HashSet;
+import java.util.Iterator;
import java.util.Set;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.mahout.cf.taste.hadoop.EntityWritable;
-import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthArrayWritable;
-import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
-public final class PreferredItemsPerUserReducer
- extends Reducer<EntityWritable,ItemPrefWithLengthWritable, EntityWritable,ItemPrefWithLengthArrayWritable> {
+public final class PreferredItemsPerUserReducer extends MapReduceBase
+ implements Reducer<LongWritable,ItemPrefWithLengthWritable, LongWritable,ItemPrefWithLengthArrayWritable> {
@Override
- protected void reduce(EntityWritable user, Iterable<ItemPrefWithLengthWritable> itemPrefs, Context context)
- throws IOException, InterruptedException {
+ public void reduce(LongWritable user,
+ Iterator<ItemPrefWithLengthWritable> itemPrefs,
+ OutputCollector<LongWritable,ItemPrefWithLengthArrayWritable> output,
+ Reporter reporter)
+ throws IOException {
Set<ItemPrefWithLengthWritable> itemPrefsWithLength = new HashSet<ItemPrefWithLengthWritable>();
- for (ItemPrefWithLengthWritable itemPrefWithLength : itemPrefs) {
- itemPrefsWithLength.add(itemPrefWithLength.deepCopy());
+ while (itemPrefs.hasNext()) {
+ itemPrefsWithLength.add(itemPrefs.next().clone());
}
- context.write(user, new ItemPrefWithLengthArrayWritable(
+ output.collect(user, new ItemPrefWithLengthArrayWritable(
itemPrefsWithLength.toArray(new ItemPrefWithLengthWritable[itemPrefsWithLength.size()])));
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java?rev=930890&r1=930889&r2=930890&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java Mon Apr 5 16:05:41 2010
@@ -19,31 +19,39 @@ package org.apache.mahout.cf.taste.hadoo
import java.io.IOException;
import java.util.HashSet;
+import java.util.Iterator;
import java.util.Set;
-import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable;
import org.apache.mahout.cf.taste.hadoop.EntityPrefWritableArrayWritable;
-import org.apache.mahout.cf.taste.hadoop.EntityWritable;
/**
* For each single item, collect all users with their preferences
* (thereby building the item vectors of the user-item-matrix)
*/
public final class ToItemVectorReducer
- extends Reducer<EntityWritable, EntityPrefWritable, EntityWritable, EntityPrefWritableArrayWritable> {
+ extends MapReduceBase implements
+ Reducer<LongWritable,EntityPrefWritable,LongWritable,EntityPrefWritableArrayWritable> {
@Override
- protected void reduce(EntityWritable item, Iterable<EntityPrefWritable> userPrefs, Context context)
- throws IOException, InterruptedException {
+ public void reduce(LongWritable item,
+ Iterator<EntityPrefWritable> userPrefs,
+ OutputCollector<LongWritable,EntityPrefWritableArrayWritable> output,
+ Reporter reporter)
+ throws IOException {
Set<EntityPrefWritable> collectedUserPrefs = new HashSet<EntityPrefWritable>();
- for (EntityPrefWritable userPref : userPrefs) {
- collectedUserPrefs.add(userPref.clone());
+ while (userPrefs.hasNext()) {
+ collectedUserPrefs.add(userPrefs.next().clone());
}
- context.write(item, new EntityPrefWritableArrayWritable(
+ output.collect(item, new EntityPrefWritableArrayWritable(
collectedUserPrefs.toArray(new EntityPrefWritable[collectedUserPrefs.size()])));
}
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java?rev=930890&r1=930889&r2=930890&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java Mon Apr 5 16:05:41 2010
@@ -17,12 +17,6 @@
package org.apache.mahout.cf.taste.hadoop.similarity.item;
-import static org.easymock.EasyMock.eq;
-import static org.easymock.EasyMock.expect;
-import static org.easymock.classextension.EasyMock.createMock;
-import static org.easymock.classextension.EasyMock.replay;
-import static org.easymock.classextension.EasyMock.verify;
-
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
@@ -39,18 +33,15 @@ import org.apache.hadoop.io.DoubleWritab
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.easymock.classextension.EasyMock;
+import org.easymock.IArgumentMatcher;
+
import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable;
import org.apache.mahout.cf.taste.hadoop.EntityPrefWritableArrayWritable;
-import org.apache.mahout.cf.taste.hadoop.EntityWritable;
import org.apache.mahout.cf.taste.hadoop.EntityEntityWritable;
-import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPairWritable;
-import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthArrayWritable;
-import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthWritable;
+import org.apache.mahout.cf.taste.hadoop.ToUserPrefsMapper;
import org.apache.mahout.common.MahoutTestCase;
-import org.easymock.IArgumentMatcher;
-import org.easymock.classextension.EasyMock;
/**
* Unit tests for the mappers and reducers in org.apache.mahout.cf.taste.hadoop.similarity
@@ -61,36 +52,41 @@ public class ItemSimilarityTest extends
public void testUserPrefsPerItemMapper() throws Exception {
- Mapper.Context ctx = createMock(Mapper.Context.class);
- ctx.write(new EntityWritable(34L), new EntityPrefWritable(12L, 2.3f));
- replay(ctx);
+ OutputCollector<LongWritable,LongWritable> output =
+ EasyMock.createMock(OutputCollector.class);
+ output.collect(new LongWritable(34L), new EntityPrefWritable(12L, 2.3f));
+ EasyMock.replay(output);
- new UserPrefsPerItemMapper().map(new LongWritable(), new Text("12,34,2.3"), ctx);
+ new ToUserPrefsMapper().map(new LongWritable(), new Text("12,34,2.3"), output, null);
- verify(ctx);
+ EasyMock.verify(output);
}
public void testToItemVectorReducer() throws Exception {
- List<EntityPrefWritable> userPrefs = Arrays.asList(new EntityPrefWritable(34L, 1.0f), new EntityPrefWritable(56L, 2.0f));
+ List<EntityPrefWritable> userPrefs = Arrays.asList(
+ new EntityPrefWritable(34L, 1.0f), new EntityPrefWritable(56L, 2.0f));
- Reducer.Context ctx = createMock(Reducer.Context.class);
+ OutputCollector<LongWritable,EntityPrefWritableArrayWritable> output =
+ EasyMock.createMock(OutputCollector.class);
- ctx.write(eq(new EntityWritable(12L)), equalToUserPrefs(userPrefs));
+ output.collect(EasyMock.eq(new LongWritable(12L)), equalToUserPrefs(userPrefs));
- replay(ctx);
+ EasyMock.replay(output);
- new ToItemVectorReducer().reduce(new EntityWritable(12L), userPrefs, ctx);
+ new ToItemVectorReducer().reduce(new LongWritable(12L), userPrefs.iterator(), output, null);
- verify(ctx);
+ EasyMock.verify(output);
}
- static EntityPrefWritableArrayWritable equalToUserPrefs(final Collection<EntityPrefWritable> prefsToCheck) {
+ static EntityPrefWritableArrayWritable equalToUserPrefs(
+ final Collection<EntityPrefWritable> prefsToCheck) {
EasyMock.reportMatcher(new IArgumentMatcher() {
@Override
public boolean matches(Object argument) {
if (argument instanceof EntityPrefWritableArrayWritable) {
- EntityPrefWritableArrayWritable userPrefArray = (EntityPrefWritableArrayWritable) argument;
+ EntityPrefWritableArrayWritable userPrefArray =
+ (EntityPrefWritableArrayWritable) argument;
Set<EntityPrefWritable> set = new HashSet<EntityPrefWritable>();
set.addAll(Arrays.asList(userPrefArray.getPrefs()));
@@ -116,50 +112,56 @@ public class ItemSimilarityTest extends
}
public void testPreferredItemsPerUserMapper() throws Exception {
- Mapper.Context ctx = createMock(Mapper.Context.class);
- EntityPrefWritableArrayWritable userPrefs = createMock(EntityPrefWritableArrayWritable.class);
-
- expect(userPrefs.getPrefs())
- .andReturn(new EntityPrefWritable[] { new EntityPrefWritable(12L, 2.0f), new EntityPrefWritable(56L, 3.0f) });
+ OutputCollector<LongWritable,ItemPrefWithLengthWritable> output =
+ EasyMock.createMock(OutputCollector.class);
+ EntityPrefWritableArrayWritable userPrefs =
+ EasyMock.createMock(EntityPrefWritableArrayWritable.class);
+
+ EasyMock.expect(userPrefs.getPrefs()).andReturn(
+ new EntityPrefWritable[] {
+ new EntityPrefWritable(12L, 2.0f),
+ new EntityPrefWritable(56L, 3.0f) });
double length = Math.sqrt(Math.pow(2.0f, 2) + Math.pow(3.0f, 2));
- ctx.write(new EntityWritable(12L), new ItemPrefWithLengthWritable(34L, length, 2.0f));
- ctx.write(new EntityWritable(56L), new ItemPrefWithLengthWritable(34L, length, 3.0f));
+ output.collect(new LongWritable(12L), new ItemPrefWithLengthWritable(34L, length, 2.0f));
+ output.collect(new LongWritable(56L), new ItemPrefWithLengthWritable(34L, length, 3.0f));
- replay(ctx, userPrefs);
+ EasyMock.replay(output, userPrefs);
- new PreferredItemsPerUserMapper().map(new EntityWritable(34L), userPrefs, ctx);
+ new PreferredItemsPerUserMapper().map(new LongWritable(34L), userPrefs, output, null);
- verify(ctx, userPrefs);
+ EasyMock.verify(output, userPrefs);
}
public void testPreferredItemsPerUserReducer() throws Exception {
List<ItemPrefWithLengthWritable> itemPrefs =
- Arrays.asList(new ItemPrefWithLengthWritable(34L, 5.0, 1.0f), new ItemPrefWithLengthWritable(56L, 7.0, 2.0f));
+ Arrays.asList(new ItemPrefWithLengthWritable(34L, 5.0, 1.0f),
+ new ItemPrefWithLengthWritable(56L, 7.0, 2.0f));
- Reducer.Context ctx = createMock(Reducer.Context.class);
+ OutputCollector<LongWritable,ItemPrefWithLengthArrayWritable> output =
+ EasyMock.createMock(OutputCollector.class);
- ctx.write(eq(new EntityWritable(12L)), equalToItemPrefs(itemPrefs));
+ output.collect(EasyMock.eq(new LongWritable(12L)), equalToItemPrefs(itemPrefs));
- replay(ctx);
+ EasyMock.replay(output);
- new PreferredItemsPerUserReducer().reduce(new EntityWritable(12L), itemPrefs, ctx);
+ new PreferredItemsPerUserReducer().reduce(
+ new LongWritable(12L), itemPrefs.iterator(), output, null);
- verify(ctx);
+ EasyMock.verify(output);
}
- static ItemPrefWithLengthArrayWritable equalToItemPrefs(final Collection<ItemPrefWithLengthWritable> prefsToCheck) {
+ static ItemPrefWithLengthArrayWritable equalToItemPrefs(
+ final Collection<ItemPrefWithLengthWritable> prefsToCheck) {
EasyMock.reportMatcher(new IArgumentMatcher() {
@Override
public boolean matches(Object argument) {
if (argument instanceof ItemPrefWithLengthArrayWritable) {
ItemPrefWithLengthArrayWritable itemPrefArray = (ItemPrefWithLengthArrayWritable) argument;
- Set<ItemPrefWithLengthWritable> set = new HashSet<ItemPrefWithLengthWritable>();
- for (ItemPrefWithLengthWritable itemPref : itemPrefArray.getItemPrefs()) {
- set.add(itemPref);
- }
+ Collection<ItemPrefWithLengthWritable> set = new HashSet<ItemPrefWithLengthWritable>();
+ set.addAll(Arrays.asList(itemPrefArray.getItemPrefs()));
if (set.size() != prefsToCheck.size()) {
return false;
@@ -183,40 +185,45 @@ public class ItemSimilarityTest extends
}
public void testCopreferredItemsMapper() throws Exception {
- Mapper.Context ctx = createMock(Mapper.Context.class);
- ItemPrefWithLengthArrayWritable itemPrefs = createMock(ItemPrefWithLengthArrayWritable.class);
+ OutputCollector<ItemPairWritable,FloatWritable> output =
+ EasyMock.createMock(OutputCollector.class);
+ ItemPrefWithLengthArrayWritable itemPrefs =
+ EasyMock.createMock(ItemPrefWithLengthArrayWritable.class);
- expect(itemPrefs.getItemPrefs()).andReturn(new ItemPrefWithLengthWritable[] {
+ EasyMock.expect(itemPrefs.getItemPrefs()).andReturn(new ItemPrefWithLengthWritable[] {
new ItemPrefWithLengthWritable(34L, 2.0, 1.0f), new ItemPrefWithLengthWritable(56L, 3.0, 2.0f),
new ItemPrefWithLengthWritable(78L, 4.0, 3.0f) });
- ctx.write(new ItemPairWritable(34L, 56L, 6.0), new FloatWritable(2.0f));
- ctx.write(new ItemPairWritable(34L, 78L, 8.0), new FloatWritable(3.0f));
- ctx.write(new ItemPairWritable(56L, 78L, 12.0), new FloatWritable(6.0f));
+ output.collect(new ItemPairWritable(34L, 56L, 6.0), new FloatWritable(2.0f));
+ output.collect(new ItemPairWritable(34L, 78L, 8.0), new FloatWritable(3.0f));
+ output.collect(new ItemPairWritable(56L, 78L, 12.0), new FloatWritable(6.0f));
- replay(ctx, itemPrefs);
+ EasyMock.replay(output, itemPrefs);
- new CopreferredItemsMapper().map(new EntityWritable(), itemPrefs, ctx);
+ new CopreferredItemsMapper().map(new LongWritable(), itemPrefs, output, null);
- verify(ctx, itemPrefs);
+ EasyMock.verify(output, itemPrefs);
}
public void testCosineSimilarityReducer() throws Exception {
- Reducer.Context ctx = createMock(Reducer.Context.class);
+ OutputCollector<EntityEntityWritable,DoubleWritable> output =
+ EasyMock.createMock(OutputCollector.class);
- ctx.write(new EntityEntityWritable(12L, 34L), new DoubleWritable(0.5d));
+ output.collect(new EntityEntityWritable(12L, 34L), new DoubleWritable(0.5d));
- replay(ctx);
+ EasyMock.replay(output);
new CosineSimilarityReducer().reduce(new ItemPairWritable(12L, 34L, 20.0),
- Arrays.asList(new FloatWritable(5.0f), new FloatWritable(5.0f)), ctx);
+ Arrays.asList(new FloatWritable(5.0f),
+ new FloatWritable(5.0f)).iterator(), output, null);
- verify(ctx);
+ EasyMock.verify(output);
}
public void testCompleteJob() throws Exception {
- String tmpDirPath = System.getProperty("java.io.tmpdir")+ '/' +ItemSimilarityTest.class.getCanonicalName();
+ String tmpDirPath = System.getProperty("java.io.tmpdir") +
+ ItemSimilarityTest.class.getCanonicalName();
File tmpDir = new File(tmpDirPath);
try {
@@ -250,14 +257,16 @@ public class ItemSimilarityTest extends
Configuration conf = new Configuration();
conf.set("mapred.input.dir", tmpDirPath+"/prefs.txt");
conf.set("mapred.output.dir", tmpDirPath+"/output");
+ conf.set("mapred.output.compress", Boolean.FALSE.toString());
similarityJob.setConf(conf);
similarityJob.run(new String[] { "--tempDir", tmpDirPath+"/tmp"});
- BufferedReader reader = new BufferedReader(new FileReader(tmpDirPath+"/output/part-r-00000"));
+ String filePath = tmpDirPath+"/output/part-00000";
+ BufferedReader reader = new BufferedReader(new FileReader(filePath));
- String line = null;
+ String line;
int currentLine = 1;
while ( (line = reader.readLine()) != null) {