You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2010/04/05 07:59:18 UTC

svn commit: r930801 - in /lucene/mahout/trunk/core/src: main/java/org/apache/mahout/cf/taste/hadoop/ main/java/org/apache/mahout/cf/taste/hadoop/similarity/ main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ main/java/org/apache/mahout/cf/tas...

Author: srowen
Date: Mon Apr  5 05:59:17 2010
New Revision: 930801

URL: http://svn.apache.org/viewvc?rev=930801&view=rev
Log:
Initial commit of MAHOUT-362. Refactoring to come.

Added:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CopreferredItemsMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CosineSimilarityReducer.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/UserPrefsPerItemMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPairWritable.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthArrayWritable.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthWritable.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/UserPrefArrayWritable.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/UserPrefWritable.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/UserWritable.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/ItemSimilarityTest.java
Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ItemPrefWritable.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ItemWritable.java

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ItemPrefWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ItemPrefWritable.java?rev=930801&r1=930800&r2=930801&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ItemPrefWritable.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ItemPrefWritable.java Mon Apr  5 05:59:17 2010
@@ -22,9 +22,11 @@ import java.io.DataOutput;
 import java.io.IOException;
 
 import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.mahout.common.RandomUtils;
 
 /** A {@link Writable} encapsulating an item ID and a preference value. */
-public final class ItemPrefWritable extends ItemWritable {
+public final class ItemPrefWritable extends ItemWritable implements WritableComparable<ItemPrefWritable> {
   
   private float prefValue;
   
@@ -62,5 +64,20 @@ public final class ItemPrefWritable exte
     writable.readFields(in);
     return writable;
   }
+
+  @Override
+  public int hashCode() {
+    return super.hashCode() ^ RandomUtils.hashFloat(prefValue);
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (!(o instanceof ItemPrefWritable)) {
+      return false;
+    }
+    ItemPrefWritable other = (ItemPrefWritable) o;
+    return getItemID() == other.getItemID() && prefValue == other.getPrefValue();
+    
+  }
   
 }
\ No newline at end of file

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ItemWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ItemWritable.java?rev=930801&r1=930800&r2=930801&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ItemWritable.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ItemWritable.java Mon Apr  5 05:59:17 2010
@@ -17,14 +17,16 @@
 
 package org.apache.mahout.cf.taste.hadoop;
 
-import org.apache.hadoop.io.Writable;
-
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
 
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.mahout.common.RandomUtils;
+
 /** A {@link Writable} encapsulating an item ID. */
-public class ItemWritable implements Writable {
+public class ItemWritable implements WritableComparable<ItemWritable> {
 
   private long itemID;
 
@@ -60,4 +62,20 @@ public class ItemWritable implements Wri
     return writable;
   }
 
+  @Override
+  public int compareTo(ItemWritable other) {
+    long otherItemID = other.getItemID();
+    return itemID < otherItemID ? -1 : itemID > otherItemID ? 1 : 0;
+  }
+
+  @Override
+  public int hashCode() {
+    return RandomUtils.hashLong(itemID);
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    return o instanceof ItemWritable && (itemID == ((ItemWritable) o).getItemID());
+  }
+
 }
\ No newline at end of file

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CopreferredItemsMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CopreferredItemsMapper.java?rev=930801&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CopreferredItemsMapper.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CopreferredItemsMapper.java Mon Apr  5 05:59:17 2010
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.similarity.item;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPairWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthArrayWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.UserWritable;
+
+/**
+ * map out each pair of items that appears in the same user-vector together with the multiplied vector lengths
+ * of the associated item vectors
+ */
+public final  class CopreferredItemsMapper
+    extends Mapper<UserWritable,ItemPrefWithLengthArrayWritable,ItemPairWritable,FloatWritable> {
+
+  @Override
+  protected void map(UserWritable user, ItemPrefWithLengthArrayWritable itemPrefsArray, Context context)
+      throws IOException, InterruptedException {
+
+    ItemPrefWithLengthWritable[] itemPrefs = itemPrefsArray.getItemPrefs();
+
+    for (int n = 0; n < itemPrefs.length; n++) {
+      ItemPrefWithLengthWritable itemN = itemPrefs[n];
+      long itemNID = itemN.getItemID();
+      double itemNLength = itemN.getLength();
+      float itemNValue = itemN.getPrefValue();
+      for (int m = n + 1; m < itemPrefs.length; m++) {
+        ItemPrefWithLengthWritable itemM = itemPrefs[m];
+        long itemAID = Math.min(itemNID, itemM.getItemID());
+        long itemBID = Math.max(itemNID, itemM.getItemID());
+        ItemPairWritable pair = new ItemPairWritable(itemAID, itemBID, itemNLength * itemM.getLength());
+        context.write(pair, new FloatWritable(itemNValue * itemM.getPrefValue()));
+      }
+    }
+
+  }
+
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CosineSimilarityReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CosineSimilarityReducer.java?rev=930801&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CosineSimilarityReducer.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CosineSimilarityReducer.java Mon Apr  5 05:59:17 2010
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.similarity.item;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.mahout.cf.taste.hadoop.ItemItemWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPairWritable;
+
+/**
+ * Finally compute the cosine for each item-pair
+ */
+public final class CosineSimilarityReducer
+    extends Reducer<ItemPairWritable,FloatWritable,ItemItemWritable,DoubleWritable> {
+
+  @Override
+  protected void reduce(ItemPairWritable pair, Iterable<FloatWritable> numeratorSummands, Context context)
+      throws IOException, InterruptedException {
+
+    double numerator = 0.0;
+
+    for (FloatWritable nummeratorSummand : numeratorSummands) {
+      numerator += nummeratorSummand.get();
+    }
+
+    double denominator = pair.getMultipliedLength();
+
+    double cosine = numerator / denominator;
+
+    context.write(pair.getItemItemWritable(), new DoubleWritable(cosine));
+  }
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java?rev=930801&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java Mon Apr  5 05:59:17 2010
@@ -0,0 +1,205 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.similarity.item;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
+import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.mahout.cf.taste.hadoop.ItemItemWritable;
+import org.apache.mahout.cf.taste.hadoop.ItemWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPairWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthArrayWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.UserPrefArrayWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.UserPrefWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.UserWritable;
+import org.apache.mahout.common.AbstractJob;
+
+/**
+ * <p>Runs a completely distributed computation of the cosine distance of the itemvectors of the user-item-matrix
+ *  as a series of mapreduces.</p>
+ *
+ * <p>Algorithm used is a slight modification from the algorithm described in
+ * http://www.umiacs.umd.edu/~jimmylin/publications/Elsayed_etal_ACL2008_short.pdf</p>
+ *
+ * <pre>
+ * Example:
+ *
+ * user-item-matrix:
+ *
+ *                  Game   Mouse    PC
+ *          Peter     0       1      2
+ *          Paul      1       0      1
+ *
+ * Input:
+ *
+ *  (Peter,Mouse,1)
+ *  (Peter,PC,2)
+ *  (Paul,Game,1)
+ *  (Paul,PC,1)
+ *
+ * Step 1: Create the item-vectors
+ *
+ *  Game  -> (Paul,1)
+ *  Mouse -> (Peter,1)
+ *  PC    -> (Peter,2),(Paul,1)
+ *
+ * Step 2: Compute the length of the item vectors, store it with the item, create the user-vectors
+ *
+ *  Peter -> (Mouse,1,1),(PC,2.236,2)
+ *  Paul  -> (Game,1,1),(PC,2.236,2)
+ *
+ * Step 3: Compute the pairwise cosine for all item pairs that have been co-rated by at least one user
+ *
+ *  Mouse,PC  -> 1 * 2 / (1 * 2.236)
+ *  Game,PC   -> 1 * 1 / (1 * 2.236)
+ *
+ * </pre>
+ *
+ * <p>Command line arguments specific to this class are:</p>
+ *
+ * <ol>
+ * <li>-Dmapred.input.dir=(path): Directory containing a text file containing the entries of the user-item-matrix in
+ * the form userID,itemID,preference
+ * computed, one per line</li>
+ * <li>-Dmapred.output.dir=(path): output path where the computations output should go</li>
+ * </ol>
+ *
+ *
+ * <p>General command line options are documented in {@link AbstractJob}.</p>
+ * <p>Please consider supplying a --tempDir parameter for this job, as is needs to write some intermediate files</p>
+ *
+ * <p>Note that because of how Hadoop parses arguments, all "-D" arguments must appear before all other
+ * arguments.</p>
+ */
+public final class ItemSimilarityJob extends AbstractJob {
+
+  @Override
+  public int run(String[] args) throws Exception {
+
+    Map<String,String> parsedArgs = AbstractJob.parseArguments(args);
+
+    if (parsedArgs == null) {
+      return -1;
+    }
+
+    Configuration originalConf = getConf();
+    String inputPath = originalConf.get("mapred.input.dir");
+    String outputPath = originalConf.get("mapred.output.dir");
+    String tempDirPath = parsedArgs.get("--tempDir");
+
+    String itemVectorsPath = tempDirPath + "/itemVectors";
+    String userVectorsPath = tempDirPath + "/userVectors";
+
+    Job itemVectors = createJob(originalConf, "itemVectors", inputPath, itemVectorsPath, UserPrefsPerItemMapper.class,
+        ItemWritable.class, UserPrefWritable.class, ToItemVectorReducer.class, ItemWritable.class,
+        UserPrefArrayWritable.class, TextInputFormat.class, SequenceFileOutputFormat.class, true);
+
+    itemVectors.waitForCompletion(true);
+
+    Job userVectors = createJob(originalConf, "userVectors", itemVectorsPath, userVectorsPath,
+        PreferredItemsPerUserMapper.class, UserWritable.class, ItemPrefWithLengthWritable.class,
+        PreferredItemsPerUserReducer.class, UserWritable.class, ItemPrefWithLengthArrayWritable.class);
+
+    userVectors.waitForCompletion(true);
+
+    Job similarity = createJob(originalConf, "similarity", userVectorsPath, outputPath,
+        CopreferredItemsMapper.class, ItemPairWritable.class, FloatWritable.class, CosineSimilarityReducer.class,
+        ItemItemWritable.class, DoubleWritable.class, SequenceFileInputFormat.class, TextOutputFormat.class, false);
+
+    similarity.waitForCompletion(true);
+
+    return 0;
+  }
+
+  public static void main(String[] args) throws Exception {
+    ToolRunner.run(new Configuration(), new ItemSimilarityJob(), args);
+  }
+
+  protected static Job createJob(Configuration conf,
+                                 String jobName,
+                                 String inputPath,
+                                 String outputPath,
+                                 Class<? extends Mapper> mapperClass,
+                                 Class<? extends Writable> mapKeyOutClass,
+                                 Class<? extends Writable> mapValueOutClass,
+                                 Class<? extends Reducer> reducerClass,
+                                 Class<? extends Writable> keyOutClass,
+                                 Class<? extends Writable> valueOutClass) throws IOException {
+    return createJob(conf, jobName, inputPath, outputPath, mapperClass, mapKeyOutClass,
+        mapValueOutClass, reducerClass, keyOutClass, valueOutClass, SequenceFileInputFormat.class,
+        SequenceFileOutputFormat.class, true);
+  }
+
+  protected static Job createJob(Configuration conf,
+                                 String jobName,
+                                 String inputPath,
+                                 String outputPath,
+                                 Class<? extends Mapper> mapperClass,
+                                 Class<? extends Writable> mapKeyOutClass,
+                                 Class<? extends Writable> mapValueOutClass,
+                                 Class<? extends Reducer> reducerClass,
+                                 Class<? extends Writable> keyOutClass,
+                                 Class<? extends Writable> valueOutClass,
+                                 Class<? extends FileInputFormat> fileInputFormatClass,
+                                 Class<? extends FileOutputFormat> fileOutputFormatClass,
+                                 boolean compress) throws IOException {
+
+    Job job = new Job(conf, jobName);
+
+    FileSystem fs = FileSystem.get(conf);
+
+    Path inputPathPath = new Path(inputPath).makeQualified(fs);
+    Path outputPathPath = new Path(outputPath).makeQualified(fs);
+
+    FileInputFormat.setInputPaths(job, inputPathPath);
+    job.setInputFormatClass(fileInputFormatClass);
+
+    job.setMapperClass(mapperClass);
+    job.setMapOutputKeyClass(mapKeyOutClass);
+    job.setMapOutputValueClass(mapValueOutClass);
+
+    job.setReducerClass(reducerClass);
+    job.setOutputKeyClass(keyOutClass);
+    job.setOutputValueClass(valueOutClass);
+
+
+    FileOutputFormat.setOutputPath(job, outputPathPath);
+    FileOutputFormat.setCompressOutput(job, compress);
+    job.setOutputFormatClass(fileOutputFormatClass);
+
+    return job;
+  }
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java?rev=930801&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java Mon Apr  5 05:59:17 2010
@@ -0,0 +1,58 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.similarity.item;
+
+import java.io.IOException;
+
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.mahout.cf.taste.hadoop.ItemWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.UserPrefArrayWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.UserPrefWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.UserWritable;
+
+/**
+ * for each item-vector, we compute its length here and map out all entries with the user as key,
+ * so we can create the user-vectors in the reducer
+ */
+public final class PreferredItemsPerUserMapper
+    extends Mapper<ItemWritable,UserPrefArrayWritable,UserWritable,ItemPrefWithLengthWritable> {
+
+  @Override
+  protected void map(ItemWritable item, UserPrefArrayWritable userPrefsArray, Context context)
+      throws IOException, InterruptedException {
+
+    UserPrefWritable[] userPrefs = userPrefsArray.getUserPrefs();
+
+    double length = 0.0;
+    for (UserPrefWritable userPref : userPrefs) {
+      double value = userPref.getPrefValue();
+      length += value * value;
+    }
+
+    length = Math.sqrt(length);
+
+    for (UserPrefWritable userPref : userPrefs) {
+      context.write(new UserWritable(userPref.getUserID()),
+          new ItemPrefWithLengthWritable(item.getItemID(), length, userPref.getPrefValue()));
+    }
+
+  }
+
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java?rev=930801&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java Mon Apr  5 05:59:17 2010
@@ -0,0 +1,47 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.similarity.item;
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthArrayWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.UserWritable;
+
+public final class PreferredItemsPerUserReducer
+    extends Reducer<UserWritable,ItemPrefWithLengthWritable,UserWritable,ItemPrefWithLengthArrayWritable> {
+
+  @Override
+  protected void reduce(UserWritable user, Iterable<ItemPrefWithLengthWritable> itemPrefs, Context context)
+      throws IOException, InterruptedException {
+
+    Set<ItemPrefWithLengthWritable> itemPrefsWithLength = new HashSet<ItemPrefWithLengthWritable>();
+
+    for (ItemPrefWithLengthWritable itemPrefWithLength : itemPrefs) {
+      itemPrefsWithLength.add(itemPrefWithLength.deepCopy());
+    }
+
+    context.write(user, new ItemPrefWithLengthArrayWritable(
+        itemPrefsWithLength.toArray(new ItemPrefWithLengthWritable[itemPrefsWithLength.size()])));
+  }
+
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java?rev=930801&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java Mon Apr  5 05:59:17 2010
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.similarity.item;
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.mahout.cf.taste.hadoop.ItemWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.UserPrefArrayWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.UserPrefWritable;
+
+/**
+ * For each single item, collect all users with their preferences
+ * (thereby building the item vectors of the user-item-matrix)
+ */
+public final class ToItemVectorReducer
+    extends Reducer<ItemWritable,UserPrefWritable,ItemWritable,UserPrefArrayWritable> {
+
+  @Override
+  protected void reduce(ItemWritable item, Iterable<UserPrefWritable> userPrefs, Context context)
+      throws IOException, InterruptedException {
+
+    Set<UserPrefWritable> collectedUserPrefs = new HashSet<UserPrefWritable>();
+
+    for (UserPrefWritable userPref : userPrefs) {
+      collectedUserPrefs.add(userPref.deepCopy());
+    }
+
+    context.write(item, new UserPrefArrayWritable(
+        collectedUserPrefs.toArray(new UserPrefWritable[collectedUserPrefs.size()])));
+  }
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/UserPrefsPerItemMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/UserPrefsPerItemMapper.java?rev=930801&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/UserPrefsPerItemMapper.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/UserPrefsPerItemMapper.java Mon Apr  5 05:59:17 2010
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.similarity.item;
+
+import java.io.IOException;
+import java.util.regex.Pattern;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.mahout.cf.taste.hadoop.ItemWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.UserPrefWritable;
+
+/**
+ * Read an entry from the preferences file and map it out with the item as key and the user with her preference
+ * as value
+ */
+public final class UserPrefsPerItemMapper extends Mapper<LongWritable,Text,ItemWritable,UserPrefWritable> {
+
+  private static final Pattern COMMA = Pattern.compile(",");
+
+  @Override
+  protected void map(LongWritable key, Text value, Context context)
+      throws IOException, InterruptedException {
+
+    String[] tokens = COMMA.split(value.toString());
+
+    long userID = Long.parseLong(tokens[0]);
+    long itemID = Long.parseLong(tokens[1]);
+    float pref = Float.parseFloat(tokens[2]);
+
+    context.write(new ItemWritable(itemID), new UserPrefWritable(userID,pref));
+  }
+
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPairWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPairWritable.java?rev=930801&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPairWritable.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPairWritable.java Mon Apr  5 05:59:17 2010
@@ -0,0 +1,90 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.similarity.item.writables;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.mahout.cf.taste.hadoop.ItemItemWritable;
+
+/**
+ * A {@link WritableComparable} encapsulating two items together with their
+ * multiplied vector lengths
+ */
+public final class ItemPairWritable implements WritableComparable<ItemPairWritable> {
+
+  private ItemItemWritable itemItemWritable;
+  private double multipliedLength;
+
+  public ItemPairWritable() {
+  }
+
+  public ItemPairWritable(long itemAID, long itemBID, double multipliedLength) {
+    this.itemItemWritable = new ItemItemWritable(itemAID, itemBID);
+    this.multipliedLength = multipliedLength;
+  }
+
+  public long getItemAID() {
+    return itemItemWritable.getItemAID();
+  }
+
+  public long getItemBID() {
+    return itemItemWritable.getItemBID();
+  }
+
+  public ItemItemWritable getItemItemWritable() {
+    return itemItemWritable;
+  }
+
+  public double getMultipliedLength() {
+    return multipliedLength;
+  }
+
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    itemItemWritable = ItemItemWritable.read(in);
+    multipliedLength = in.readDouble();
+  }
+
+  @Override
+  public void write(DataOutput out) throws IOException {
+    itemItemWritable.write(out);
+    out.writeDouble(multipliedLength);
+  }
+
+  @Override
+  public int compareTo(ItemPairWritable other) {
+    return itemItemWritable.compareTo(other.getItemItemWritable());
+  }
+
+  @Override
+  public int hashCode() {
+    return itemItemWritable.hashCode();
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (o instanceof ItemPairWritable) {
+      return itemItemWritable.equals(((ItemPairWritable) o).getItemItemWritable());
+    }
+    return false;
+  }
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthArrayWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthArrayWritable.java?rev=930801&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthArrayWritable.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthArrayWritable.java Mon Apr  5 05:59:17 2010
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.similarity.item.writables;
+
+import org.apache.hadoop.io.ArrayWritable;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * An {@link ArrayWritable} holding {@link ItemPrefWithLengthWritable}s
+ *
+ * Used as user-vector
+ */
+public final class ItemPrefWithLengthArrayWritable extends ArrayWritable {
+
+  public ItemPrefWithLengthArrayWritable() {
+    super(ItemPrefWithLengthWritable.class);
+  }
+
+  public ItemPrefWithLengthArrayWritable(ItemPrefWithLengthWritable[] itemPrefs) {
+    super(ItemPrefWithLengthWritable.class, itemPrefs);
+  }
+
+  public ItemPrefWithLengthWritable[] getItemPrefs() {
+    Writable[] writables = get();
+    ItemPrefWithLengthWritable[] itemPrefs = new ItemPrefWithLengthWritable[writables.length];
+    for (int n=0; n<writables.length; n++) {
+      itemPrefs[n] = (ItemPrefWithLengthWritable)writables[n];
+    }
+    return itemPrefs;
+  }
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthWritable.java?rev=930801&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthWritable.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthWritable.java Mon Apr  5 05:59:17 2010
@@ -0,0 +1,92 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.similarity.item.writables;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.Writable;
+import org.apache.mahout.common.RandomUtils;
+
+/**
+ * A {@link Writable} encapsulating the preference for an item
+ * stored along with the length of the item-vector
+ *
+ */
+public final class ItemPrefWithLengthWritable implements Writable {
+
+  private long itemID;
+  private double length;
+  private float prefValue;
+
+  public ItemPrefWithLengthWritable() {
+  // do nothing
+  }
+
+  public ItemPrefWithLengthWritable(long itemID, double length, float prefValue) {
+    this.itemID = itemID;
+    this.length = length;
+    this.prefValue = prefValue;
+  }
+
+  public long getItemID() {
+    return itemID;
+  }
+
+  public double getLength() {
+    return length;
+  }
+
+  public float getPrefValue() {
+    return prefValue;
+  }
+
+  public ItemPrefWithLengthWritable deepCopy() {
+    return new ItemPrefWithLengthWritable(itemID, length, prefValue);
+  }
+
+  @Override
+  public void write(DataOutput out) throws IOException {
+    out.writeLong(itemID);
+    out.writeDouble(length);
+    out.writeFloat(prefValue);
+  }
+
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    itemID = in.readLong();
+    length = in.readDouble();
+    prefValue = in.readFloat();
+  }
+
+  @Override
+  public int hashCode() {
+    return RandomUtils.hashLong(itemID) + 31 * RandomUtils.hashDouble(length) + 31 * RandomUtils.hashFloat(prefValue);
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (o instanceof ItemPrefWithLengthWritable) {
+      ItemPrefWithLengthWritable other = (ItemPrefWithLengthWritable) o;
+      return (itemID == other.getItemID() && length == other.getLength() && prefValue == other.getPrefValue());
+    }
+    return false;
+  }
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/UserPrefArrayWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/UserPrefArrayWritable.java?rev=930801&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/UserPrefArrayWritable.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/UserPrefArrayWritable.java Mon Apr  5 05:59:17 2010
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.similarity.item.writables;
+
+import org.apache.hadoop.io.ArrayWritable;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * An {@link ArrayWritable} holding {@link UserPrefWritable}s
+ *
+ * Used to represent an item-vector
+ */
+public final class UserPrefArrayWritable extends ArrayWritable {
+
+  public UserPrefArrayWritable() {
+    super(UserPrefWritable.class);
+  }
+
+  public UserPrefArrayWritable(UserPrefWritable[] userPrefs) {
+    super(UserPrefWritable.class, userPrefs);
+  }
+
+  public UserPrefWritable[] getUserPrefs() {
+    Writable[] writables = get();
+    UserPrefWritable[] userPrefs = new UserPrefWritable[writables.length];
+    for (int n=0; n<writables.length; n++) {
+      userPrefs[n] = (UserPrefWritable) writables[n];
+    }
+    return userPrefs;
+  }
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/UserPrefWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/UserPrefWritable.java?rev=930801&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/UserPrefWritable.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/UserPrefWritable.java Mon Apr  5 05:59:17 2010
@@ -0,0 +1,76 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.similarity.item.writables;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.Writable;
+
+/**
+ * A {@link Writable} encapsulating an item ID together with a preference value.
+ *
+ * Used as entry in an item-vector
+ */
+public final class UserPrefWritable extends UserWritable {
+
+  private float prefValue;
+
+  public UserPrefWritable() {
+  }
+
+  public UserPrefWritable(long userID, float prefValue) {
+    super(userID);
+    this.prefValue = prefValue;
+  }
+
+  public float getPrefValue() {
+    return prefValue;
+  }
+
+  public UserPrefWritable deepCopy() {
+    return new UserPrefWritable(getUserID(), prefValue);
+  }
+
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    super.readFields(in);
+    prefValue =  in.readFloat();
+  }
+
+  @Override
+  public void write(DataOutput out) throws IOException {
+   super.write(out);
+   out.writeFloat(prefValue);
+  }
+
+  @Override
+  public int hashCode() {
+    return super.hashCode();
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (o instanceof UserPrefWritable) {
+      UserWritable other = (UserWritable) o;
+      return super.equals(other);
+    }
+    return false;
+  }
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/UserWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/UserWritable.java?rev=930801&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/UserWritable.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/UserWritable.java Mon Apr  5 05:59:17 2010
@@ -0,0 +1,82 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.similarity.item.writables;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.mahout.common.RandomUtils;
+
+/** A {@link WritableComparable} encapsulating a user ID. */
+public final class UserWritable implements WritableComparable<UserWritable> {
+
+  private long userID;
+
+  public UserWritable() {
+    // do nothing
+  }
+
+  public UserWritable(long userID) {
+    this.userID = userID;
+  }
+
+  public long getUserID() {
+    return userID;
+  }
+
+  @Override
+  public void write(DataOutput out) throws IOException {
+    out.writeLong(userID);
+  }
+
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    userID = in.readLong();
+  }
+
+  @Override
+  public int compareTo(UserWritable other) {
+    return compare(userID, other.getUserID());
+  }
+
+  private static int compare(long a, long b) {
+    return a < b ? -1 : a > b ? 1 : 0;
+  }
+
+  @Override
+  public int hashCode() {
+    return RandomUtils.hashLong(userID);
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (o instanceof UserWritable) {
+      return (userID == ((UserWritable) o).getUserID());
+    }
+    return false;
+  }
+
+  public static UserWritable read(DataInput in) throws IOException {
+    UserWritable writable = new UserWritable();
+    writable.readFields(in);
+    return writable;
+  }
+
+}

Added: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/ItemSimilarityTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/ItemSimilarityTest.java?rev=930801&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/ItemSimilarityTest.java (added)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/ItemSimilarityTest.java Mon Apr  5 05:59:17 2010
@@ -0,0 +1,313 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.similarity;
+
+import static org.easymock.EasyMock.eq;
+import static org.easymock.EasyMock.expect;
+import static org.easymock.classextension.EasyMock.createMock;
+import static org.easymock.classextension.EasyMock.replay;
+import static org.easymock.classextension.EasyMock.verify;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.mahout.cf.taste.hadoop.ItemItemWritable;
+import org.apache.mahout.cf.taste.hadoop.ItemWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.CopreferredItemsMapper;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.CosineSimilarityReducer;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.ItemSimilarityJob;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.PreferredItemsPerUserMapper;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.PreferredItemsPerUserReducer;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.ToItemVectorReducer;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.UserPrefsPerItemMapper;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPairWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthArrayWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.UserPrefArrayWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.UserPrefWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.UserWritable;
+import org.apache.mahout.common.MahoutTestCase;
+import org.easymock.IArgumentMatcher;
+import org.easymock.classextension.EasyMock;
+
+/**
+ * Unit tests for the mappers and reducers in org.apache.mahout.cf.taste.hadoop.similarity
+ * Integration test with a mini-file at the end
+ *
+ */
+@SuppressWarnings("unchecked")
+public class ItemSimilarityTest extends MahoutTestCase {
+
+
+  public void testUserPrefsPerItemMapper() throws Exception {
+    Mapper.Context ctx = createMock(Mapper.Context.class);
+    ctx.write(new ItemWritable(34l), new UserPrefWritable(12l, 2.3f));
+    replay(ctx);
+
+    new UserPrefsPerItemMapper().map(new LongWritable(), new Text("12,34,2.3"), ctx);
+
+    verify(ctx);
+  }
+
+  public void testToItemVectorReducer() throws Exception {
+
+    List<UserPrefWritable> userPrefs = Arrays.asList(new UserPrefWritable(34l, 1f), new UserPrefWritable(56l, 2f));
+
+    Reducer.Context ctx = createMock(Reducer.Context.class);
+
+    ctx.write(eq(new ItemWritable(12l)), equalToUserPrefs(userPrefs));
+
+    replay(ctx);
+
+    new ToItemVectorReducer().reduce(new ItemWritable(12l), userPrefs, ctx);
+
+    verify(ctx);
+  }
+
+  static UserPrefArrayWritable equalToUserPrefs(final Collection<UserPrefWritable> prefsToCheck) {
+    EasyMock.reportMatcher(new IArgumentMatcher() {
+      @Override
+      public boolean matches(Object argument) {
+        if (argument instanceof UserPrefArrayWritable) {
+          UserPrefArrayWritable userPrefArray = (UserPrefArrayWritable) argument;
+          Set<UserPrefWritable> set = new HashSet<UserPrefWritable>();
+          for (UserPrefWritable userPref : userPrefArray.getUserPrefs()) {
+            set.add(userPref);
+          }
+
+          if (set.size() != prefsToCheck.size()) {
+            return false;
+          }
+
+          for (UserPrefWritable prefToCheck : prefsToCheck) {
+            if (!set.contains(prefToCheck)) {
+              return false;
+            }
+          }
+          return true;
+        }
+        return false;
+      }
+
+      @Override
+      public void appendTo(StringBuffer buffer) {}
+    });
+
+    return null;
+  }
+
+  public void testPreferredItemsPerUserMapper() throws Exception {
+    Mapper.Context ctx = createMock(Mapper.Context.class);
+    UserPrefArrayWritable userPrefs = createMock(UserPrefArrayWritable.class);
+
+    expect(userPrefs.getUserPrefs())
+        .andReturn(new UserPrefWritable[] { new UserPrefWritable(12l, 2f), new UserPrefWritable(56l, 3f) });
+
+    double length = Math.sqrt(Math.pow(2f, 2) + Math.pow(3f, 2));
+
+    ctx.write(new UserWritable(12l), new ItemPrefWithLengthWritable(34l, length, 2f));
+    ctx.write(new UserWritable(56l), new ItemPrefWithLengthWritable(34l, length, 3f));
+
+    replay(ctx, userPrefs);
+
+    new PreferredItemsPerUserMapper().map(new ItemWritable(34l), userPrefs, ctx);
+
+    verify(ctx, userPrefs);
+  }
+
+  public void testPreferredItemsPerUserReducer() throws Exception {
+
+    List<ItemPrefWithLengthWritable> itemPrefs =
+        Arrays.asList(new ItemPrefWithLengthWritable(34l, 5d, 1f), new ItemPrefWithLengthWritable(56l, 7d, 2f));
+
+    Reducer.Context ctx = createMock(Reducer.Context.class);
+
+    ctx.write(eq(new UserWritable(12l)), equalToItemPrefs(itemPrefs));
+
+    replay(ctx);
+
+    new PreferredItemsPerUserReducer().reduce(new UserWritable(12l), itemPrefs, ctx);
+
+    verify(ctx);
+  }
+
+  static ItemPrefWithLengthArrayWritable equalToItemPrefs(final Collection<ItemPrefWithLengthWritable> prefsToCheck) {
+    EasyMock.reportMatcher(new IArgumentMatcher() {
+      @Override
+      public boolean matches(Object argument) {
+        if (argument instanceof ItemPrefWithLengthArrayWritable) {
+          ItemPrefWithLengthArrayWritable itemPrefArray = (ItemPrefWithLengthArrayWritable) argument;
+          Set<ItemPrefWithLengthWritable> set = new HashSet<ItemPrefWithLengthWritable>();
+          for (ItemPrefWithLengthWritable itemPref : itemPrefArray.getItemPrefs()) {
+            set.add(itemPref);
+          }
+
+          if (set.size() != prefsToCheck.size()) {
+            return false;
+          }
+
+          for (ItemPrefWithLengthWritable prefToCheck : prefsToCheck) {
+            if (!set.contains(prefToCheck)) {
+              return false;
+            }
+          }
+          return true;
+        }
+        return false;
+      }
+
+      @Override
+      public void appendTo(StringBuffer buffer) {}
+    });
+
+    return null;
+  }
+
+  public void testCopreferredItemsMapper() throws Exception {
+    Mapper.Context ctx = createMock(Mapper.Context.class);
+    ItemPrefWithLengthArrayWritable itemPrefs = createMock(ItemPrefWithLengthArrayWritable.class);
+
+    expect(itemPrefs.getItemPrefs()).andReturn(new ItemPrefWithLengthWritable[] {
+        new ItemPrefWithLengthWritable(34l, 2d, 1f), new ItemPrefWithLengthWritable(56l, 3d, 2f),
+        new ItemPrefWithLengthWritable(78l, 4d, 3f) });
+
+    ctx.write(new ItemPairWritable(34l, 56l, 6d), new FloatWritable(2f));
+    ctx.write(new ItemPairWritable(34l, 78l, 8d), new FloatWritable(3f));
+    ctx.write(new ItemPairWritable(56l, 78l, 12d), new FloatWritable(6f));
+
+    replay(ctx, itemPrefs);
+
+    new CopreferredItemsMapper().map(new UserWritable(), itemPrefs, ctx);
+
+    verify(ctx, itemPrefs);
+  }
+
+  public void testCosineSimilarityReducer() throws Exception {
+    Reducer.Context ctx = createMock(Reducer.Context.class);
+
+    ctx.write(new ItemItemWritable(12l, 34l), new DoubleWritable(0.5d));
+
+    replay(ctx);
+
+    new CosineSimilarityReducer().reduce(new ItemPairWritable(12l, 34l, 20d),
+        Arrays.asList(new FloatWritable(5f), new FloatWritable(5f)), ctx);
+
+    verify(ctx);
+  }
+
+  public void testCompleteJob() throws Exception {
+
+    String tmpDirPath = System.getProperty("java.io.tmpdir")+"/"+ItemSimilarityTest.class.getCanonicalName();
+    File tmpDir = new File(tmpDirPath);
+
+    try {
+      if (tmpDir.exists()) {
+        recursiveDelete(tmpDir);
+      } else {
+        tmpDir.mkdirs();
+      }
+
+      /* user-item-matrix
+
+                   Game   Mouse   PC    Disk
+           Jane     0       1      2      0
+           Paul     1       0      1      0
+           Fred     0       0      0      1
+       */
+
+      BufferedWriter writer = new BufferedWriter(new FileWriter(tmpDirPath+"/prefs.txt"));
+      try {
+        writer.write("1,2,1\n" +
+                     "1,3,2\n" +
+                     "2,1,1\n" +
+                     "2,3,1\n" +
+                     "3,4,1\n");
+      } finally {
+        writer.close();
+      }
+
+      ItemSimilarityJob similarityJob = new ItemSimilarityJob();
+
+      Configuration conf = new Configuration();
+      conf.set("mapred.input.dir", tmpDirPath+"/prefs.txt");
+      conf.set("mapred.output.dir", tmpDirPath+"/output");
+
+      similarityJob.setConf(conf);
+
+      similarityJob.run(new String[] { "--tempDir", tmpDirPath+"/tmp"});
+
+      BufferedReader reader = new BufferedReader(new FileReader(tmpDirPath+"/output/part-r-00000"));
+
+      String line = null;
+      int currentLine = 1;
+      while ( (line = reader.readLine()) != null) {
+
+        String[] tokens = line.split("\t");
+
+        long itemAID = Long.parseLong(tokens[0]);
+        long itemBID = Long.parseLong(tokens[1]);
+        double similarity = Double.parseDouble(tokens[2]);
+
+        if (currentLine == 1) {
+          assertEquals(1l, itemAID);
+          assertEquals(3l, itemBID);
+          assertEquals(0.45, similarity, 0.01);
+        }
+
+        if (currentLine == 2) {
+          assertEquals(2l, itemAID);
+          assertEquals(3l, itemBID);
+          assertEquals(0.89, similarity, 0.01);
+        }
+
+        currentLine++;
+      }
+
+      int linesWritten = currentLine-1;
+      assertEquals(2, linesWritten);
+
+    } finally {
+      recursiveDelete(tmpDir);
+    }
+  }
+
+  static void recursiveDelete(File fileOrDir) {
+    if (fileOrDir.isDirectory()) {
+      for (File innerFile : fileOrDir.listFiles()) {
+        recursiveDelete(innerFile);
+      }
+    }
+    fileOrDir.delete();
+  }
+
+}