You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2010/04/05 07:59:18 UTC
svn commit: r930801 - in /lucene/mahout/trunk/core/src:
main/java/org/apache/mahout/cf/taste/hadoop/
main/java/org/apache/mahout/cf/taste/hadoop/similarity/
main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/
main/java/org/apache/mahout/cf/tas...
Author: srowen
Date: Mon Apr 5 05:59:17 2010
New Revision: 930801
URL: http://svn.apache.org/viewvc?rev=930801&view=rev
Log:
Initial commit of MAHOUT-362. Refactoring to come.
Added:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CopreferredItemsMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CosineSimilarityReducer.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/UserPrefsPerItemMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPairWritable.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthArrayWritable.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthWritable.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/UserPrefArrayWritable.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/UserPrefWritable.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/UserWritable.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/ItemSimilarityTest.java
Modified:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ItemPrefWritable.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ItemWritable.java
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ItemPrefWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ItemPrefWritable.java?rev=930801&r1=930800&r2=930801&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ItemPrefWritable.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ItemPrefWritable.java Mon Apr 5 05:59:17 2010
@@ -22,9 +22,11 @@ import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.mahout.common.RandomUtils;
/** A {@link Writable} encapsulating an item ID and a preference value. */
-public final class ItemPrefWritable extends ItemWritable {
+public final class ItemPrefWritable extends ItemWritable implements WritableComparable<ItemPrefWritable> {
private float prefValue;
@@ -62,5 +64,20 @@ public final class ItemPrefWritable exte
writable.readFields(in);
return writable;
}
+
+ @Override
+ public int hashCode() {
+ return super.hashCode() ^ RandomUtils.hashFloat(prefValue);
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (!(o instanceof ItemPrefWritable)) {
+ return false;
+ }
+ ItemPrefWritable other = (ItemPrefWritable) o;
+ return getItemID() == other.getItemID() && prefValue == other.getPrefValue();
+
+ }
}
\ No newline at end of file
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ItemWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ItemWritable.java?rev=930801&r1=930800&r2=930801&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ItemWritable.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ItemWritable.java Mon Apr 5 05:59:17 2010
@@ -17,14 +17,16 @@
package org.apache.mahout.cf.taste.hadoop;
-import org.apache.hadoop.io.Writable;
-
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.mahout.common.RandomUtils;
+
/** A {@link Writable} encapsulating an item ID. */
-public class ItemWritable implements Writable {
+public class ItemWritable implements WritableComparable<ItemWritable> {
private long itemID;
@@ -60,4 +62,20 @@ public class ItemWritable implements Wri
return writable;
}
+ @Override
+ public int compareTo(ItemWritable other) {
+ long otherItemID = other.getItemID();
+ return itemID < otherItemID ? -1 : itemID > otherItemID ? 1 : 0;
+ }
+
+ @Override
+ public int hashCode() {
+ return RandomUtils.hashLong(itemID);
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ return o instanceof ItemWritable && (itemID == ((ItemWritable) o).getItemID());
+ }
+
}
\ No newline at end of file
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CopreferredItemsMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CopreferredItemsMapper.java?rev=930801&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CopreferredItemsMapper.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CopreferredItemsMapper.java Mon Apr 5 05:59:17 2010
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.similarity.item;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPairWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthArrayWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.UserWritable;
+
+/**
+ * map out each pair of items that appears in the same user-vector together with the multiplied vector lengths
+ * of the associated item vectors
+ */
+public final class CopreferredItemsMapper
+ extends Mapper<UserWritable,ItemPrefWithLengthArrayWritable,ItemPairWritable,FloatWritable> {
+
+ @Override
+ protected void map(UserWritable user, ItemPrefWithLengthArrayWritable itemPrefsArray, Context context)
+ throws IOException, InterruptedException {
+
+ ItemPrefWithLengthWritable[] itemPrefs = itemPrefsArray.getItemPrefs();
+
+ for (int n = 0; n < itemPrefs.length; n++) {
+ ItemPrefWithLengthWritable itemN = itemPrefs[n];
+ long itemNID = itemN.getItemID();
+ double itemNLength = itemN.getLength();
+ float itemNValue = itemN.getPrefValue();
+ for (int m = n + 1; m < itemPrefs.length; m++) {
+ ItemPrefWithLengthWritable itemM = itemPrefs[m];
+ long itemAID = Math.min(itemNID, itemM.getItemID());
+ long itemBID = Math.max(itemNID, itemM.getItemID());
+ ItemPairWritable pair = new ItemPairWritable(itemAID, itemBID, itemNLength * itemM.getLength());
+ context.write(pair, new FloatWritable(itemNValue * itemM.getPrefValue()));
+ }
+ }
+
+ }
+
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CosineSimilarityReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CosineSimilarityReducer.java?rev=930801&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CosineSimilarityReducer.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CosineSimilarityReducer.java Mon Apr 5 05:59:17 2010
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.similarity.item;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.mahout.cf.taste.hadoop.ItemItemWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPairWritable;
+
+/**
+ * Finally compute the cosine for each item-pair
+ */
+public final class CosineSimilarityReducer
+ extends Reducer<ItemPairWritable,FloatWritable,ItemItemWritable,DoubleWritable> {
+
+ @Override
+ protected void reduce(ItemPairWritable pair, Iterable<FloatWritable> numeratorSummands, Context context)
+ throws IOException, InterruptedException {
+
+ double numerator = 0.0;
+
+ for (FloatWritable nummeratorSummand : numeratorSummands) {
+ numerator += nummeratorSummand.get();
+ }
+
+ double denominator = pair.getMultipliedLength();
+
+ double cosine = numerator / denominator;
+
+ context.write(pair.getItemItemWritable(), new DoubleWritable(cosine));
+ }
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java?rev=930801&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java Mon Apr 5 05:59:17 2010
@@ -0,0 +1,205 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.similarity.item;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
+import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.mahout.cf.taste.hadoop.ItemItemWritable;
+import org.apache.mahout.cf.taste.hadoop.ItemWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPairWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthArrayWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.UserPrefArrayWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.UserPrefWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.UserWritable;
+import org.apache.mahout.common.AbstractJob;
+
+/**
+ * <p>Runs a completely distributed computation of the cosine distance of the itemvectors of the user-item-matrix
+ * as a series of mapreduces.</p>
+ *
+ * <p>Algorithm used is a slight modification from the algorithm described in
+ * http://www.umiacs.umd.edu/~jimmylin/publications/Elsayed_etal_ACL2008_short.pdf</p>
+ *
+ * <pre>
+ * Example:
+ *
+ * user-item-matrix:
+ *
+ * Game Mouse PC
+ * Peter 0 1 2
+ * Paul 1 0 1
+ *
+ * Input:
+ *
+ * (Peter,Mouse,1)
+ * (Peter,PC,2)
+ * (Paul,Game,1)
+ * (Paul,PC,1)
+ *
+ * Step 1: Create the item-vectors
+ *
+ * Game -> (Paul,1)
+ * Mouse -> (Peter,1)
+ * PC -> (Peter,2),(Paul,1)
+ *
+ * Step 2: Compute the length of the item vectors, store it with the item, create the user-vectors
+ *
+ * Peter -> (Mouse,1,1),(PC,2.236,2)
+ * Paul -> (Game,1,1),(PC,2.236,2)
+ *
+ * Step 3: Compute the pairwise cosine for all item pairs that have been co-rated by at least one user
+ *
+ * Mouse,PC -> 1 * 2 / (1 * 2.236)
+ * Game,PC -> 1 * 1 / (1 * 2.236)
+ *
+ * </pre>
+ *
+ * <p>Command line arguments specific to this class are:</p>
+ *
+ * <ol>
+ * <li>-Dmapred.input.dir=(path): Directory containing a text file containing the entries of the user-item-matrix in
+ * the form userID,itemID,preference
+ * computed, one per line</li>
+ * <li>-Dmapred.output.dir=(path): output path where the computations output should go</li>
+ * </ol>
+ *
+ *
+ * <p>General command line options are documented in {@link AbstractJob}.</p>
+ * <p>Please consider supplying a --tempDir parameter for this job, as is needs to write some intermediate files</p>
+ *
+ * <p>Note that because of how Hadoop parses arguments, all "-D" arguments must appear before all other
+ * arguments.</p>
+ */
+public final class ItemSimilarityJob extends AbstractJob {
+
+ @Override
+ public int run(String[] args) throws Exception {
+
+ Map<String,String> parsedArgs = AbstractJob.parseArguments(args);
+
+ if (parsedArgs == null) {
+ return -1;
+ }
+
+ Configuration originalConf = getConf();
+ String inputPath = originalConf.get("mapred.input.dir");
+ String outputPath = originalConf.get("mapred.output.dir");
+ String tempDirPath = parsedArgs.get("--tempDir");
+
+ String itemVectorsPath = tempDirPath + "/itemVectors";
+ String userVectorsPath = tempDirPath + "/userVectors";
+
+ Job itemVectors = createJob(originalConf, "itemVectors", inputPath, itemVectorsPath, UserPrefsPerItemMapper.class,
+ ItemWritable.class, UserPrefWritable.class, ToItemVectorReducer.class, ItemWritable.class,
+ UserPrefArrayWritable.class, TextInputFormat.class, SequenceFileOutputFormat.class, true);
+
+ itemVectors.waitForCompletion(true);
+
+ Job userVectors = createJob(originalConf, "userVectors", itemVectorsPath, userVectorsPath,
+ PreferredItemsPerUserMapper.class, UserWritable.class, ItemPrefWithLengthWritable.class,
+ PreferredItemsPerUserReducer.class, UserWritable.class, ItemPrefWithLengthArrayWritable.class);
+
+ userVectors.waitForCompletion(true);
+
+ Job similarity = createJob(originalConf, "similarity", userVectorsPath, outputPath,
+ CopreferredItemsMapper.class, ItemPairWritable.class, FloatWritable.class, CosineSimilarityReducer.class,
+ ItemItemWritable.class, DoubleWritable.class, SequenceFileInputFormat.class, TextOutputFormat.class, false);
+
+ similarity.waitForCompletion(true);
+
+ return 0;
+ }
+
+ public static void main(String[] args) throws Exception {
+ ToolRunner.run(new Configuration(), new ItemSimilarityJob(), args);
+ }
+
+ protected static Job createJob(Configuration conf,
+ String jobName,
+ String inputPath,
+ String outputPath,
+ Class<? extends Mapper> mapperClass,
+ Class<? extends Writable> mapKeyOutClass,
+ Class<? extends Writable> mapValueOutClass,
+ Class<? extends Reducer> reducerClass,
+ Class<? extends Writable> keyOutClass,
+ Class<? extends Writable> valueOutClass) throws IOException {
+ return createJob(conf, jobName, inputPath, outputPath, mapperClass, mapKeyOutClass,
+ mapValueOutClass, reducerClass, keyOutClass, valueOutClass, SequenceFileInputFormat.class,
+ SequenceFileOutputFormat.class, true);
+ }
+
+ protected static Job createJob(Configuration conf,
+ String jobName,
+ String inputPath,
+ String outputPath,
+ Class<? extends Mapper> mapperClass,
+ Class<? extends Writable> mapKeyOutClass,
+ Class<? extends Writable> mapValueOutClass,
+ Class<? extends Reducer> reducerClass,
+ Class<? extends Writable> keyOutClass,
+ Class<? extends Writable> valueOutClass,
+ Class<? extends FileInputFormat> fileInputFormatClass,
+ Class<? extends FileOutputFormat> fileOutputFormatClass,
+ boolean compress) throws IOException {
+
+ Job job = new Job(conf, jobName);
+
+ FileSystem fs = FileSystem.get(conf);
+
+ Path inputPathPath = new Path(inputPath).makeQualified(fs);
+ Path outputPathPath = new Path(outputPath).makeQualified(fs);
+
+ FileInputFormat.setInputPaths(job, inputPathPath);
+ job.setInputFormatClass(fileInputFormatClass);
+
+ job.setMapperClass(mapperClass);
+ job.setMapOutputKeyClass(mapKeyOutClass);
+ job.setMapOutputValueClass(mapValueOutClass);
+
+ job.setReducerClass(reducerClass);
+ job.setOutputKeyClass(keyOutClass);
+ job.setOutputValueClass(valueOutClass);
+
+
+ FileOutputFormat.setOutputPath(job, outputPathPath);
+ FileOutputFormat.setCompressOutput(job, compress);
+ job.setOutputFormatClass(fileOutputFormatClass);
+
+ return job;
+ }
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java?rev=930801&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java Mon Apr 5 05:59:17 2010
@@ -0,0 +1,58 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.similarity.item;
+
+import java.io.IOException;
+
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.mahout.cf.taste.hadoop.ItemWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.UserPrefArrayWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.UserPrefWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.UserWritable;
+
+/**
+ * for each item-vector, we compute its length here and map out all entries with the user as key,
+ * so we can create the user-vectors in the reducer
+ */
+public final class PreferredItemsPerUserMapper
+ extends Mapper<ItemWritable,UserPrefArrayWritable,UserWritable,ItemPrefWithLengthWritable> {
+
+ @Override
+ protected void map(ItemWritable item, UserPrefArrayWritable userPrefsArray, Context context)
+ throws IOException, InterruptedException {
+
+ UserPrefWritable[] userPrefs = userPrefsArray.getUserPrefs();
+
+ double length = 0.0;
+ for (UserPrefWritable userPref : userPrefs) {
+ double value = userPref.getPrefValue();
+ length += value * value;
+ }
+
+ length = Math.sqrt(length);
+
+ for (UserPrefWritable userPref : userPrefs) {
+ context.write(new UserWritable(userPref.getUserID()),
+ new ItemPrefWithLengthWritable(item.getItemID(), length, userPref.getPrefValue()));
+ }
+
+ }
+
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java?rev=930801&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java Mon Apr 5 05:59:17 2010
@@ -0,0 +1,47 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.similarity.item;
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthArrayWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.UserWritable;
+
+public final class PreferredItemsPerUserReducer
+ extends Reducer<UserWritable,ItemPrefWithLengthWritable,UserWritable,ItemPrefWithLengthArrayWritable> {
+
+ @Override
+ protected void reduce(UserWritable user, Iterable<ItemPrefWithLengthWritable> itemPrefs, Context context)
+ throws IOException, InterruptedException {
+
+ Set<ItemPrefWithLengthWritable> itemPrefsWithLength = new HashSet<ItemPrefWithLengthWritable>();
+
+ for (ItemPrefWithLengthWritable itemPrefWithLength : itemPrefs) {
+ itemPrefsWithLength.add(itemPrefWithLength.deepCopy());
+ }
+
+ context.write(user, new ItemPrefWithLengthArrayWritable(
+ itemPrefsWithLength.toArray(new ItemPrefWithLengthWritable[itemPrefsWithLength.size()])));
+ }
+
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java?rev=930801&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java Mon Apr 5 05:59:17 2010
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.similarity.item;
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.mahout.cf.taste.hadoop.ItemWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.UserPrefArrayWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.UserPrefWritable;
+
+/**
+ * For each single item, collect all users with their preferences
+ * (thereby building the item vectors of the user-item-matrix)
+ */
+public final class ToItemVectorReducer
+ extends Reducer<ItemWritable,UserPrefWritable,ItemWritable,UserPrefArrayWritable> {
+
+ @Override
+ protected void reduce(ItemWritable item, Iterable<UserPrefWritable> userPrefs, Context context)
+ throws IOException, InterruptedException {
+
+ Set<UserPrefWritable> collectedUserPrefs = new HashSet<UserPrefWritable>();
+
+ for (UserPrefWritable userPref : userPrefs) {
+ collectedUserPrefs.add(userPref.deepCopy());
+ }
+
+ context.write(item, new UserPrefArrayWritable(
+ collectedUserPrefs.toArray(new UserPrefWritable[collectedUserPrefs.size()])));
+ }
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/UserPrefsPerItemMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/UserPrefsPerItemMapper.java?rev=930801&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/UserPrefsPerItemMapper.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/UserPrefsPerItemMapper.java Mon Apr 5 05:59:17 2010
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.similarity.item;
+
+import java.io.IOException;
+import java.util.regex.Pattern;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.mahout.cf.taste.hadoop.ItemWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.UserPrefWritable;
+
+/**
+ * Read an entry from the preferences file and map it out with the item as key and the user with her preference
+ * as value
+ */
+public final class UserPrefsPerItemMapper extends Mapper<LongWritable,Text,ItemWritable,UserPrefWritable> {
+
+ private static final Pattern COMMA = Pattern.compile(",");
+
+ @Override
+ protected void map(LongWritable key, Text value, Context context)
+ throws IOException, InterruptedException {
+
+ String[] tokens = COMMA.split(value.toString());
+
+ long userID = Long.parseLong(tokens[0]);
+ long itemID = Long.parseLong(tokens[1]);
+ float pref = Float.parseFloat(tokens[2]);
+
+ context.write(new ItemWritable(itemID), new UserPrefWritable(userID,pref));
+ }
+
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPairWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPairWritable.java?rev=930801&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPairWritable.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPairWritable.java Mon Apr 5 05:59:17 2010
@@ -0,0 +1,90 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.similarity.item.writables;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.mahout.cf.taste.hadoop.ItemItemWritable;
+
+/**
+ * A {@link WritableComparable} encapsulating two items together with their
+ * multiplied vector lengths
+ */
+public final class ItemPairWritable implements WritableComparable<ItemPairWritable> {
+
+ private ItemItemWritable itemItemWritable;
+ private double multipliedLength;
+
+ public ItemPairWritable() {
+ }
+
+ public ItemPairWritable(long itemAID, long itemBID, double multipliedLength) {
+ this.itemItemWritable = new ItemItemWritable(itemAID, itemBID);
+ this.multipliedLength = multipliedLength;
+ }
+
+ public long getItemAID() {
+ return itemItemWritable.getItemAID();
+ }
+
+ public long getItemBID() {
+ return itemItemWritable.getItemBID();
+ }
+
+ public ItemItemWritable getItemItemWritable() {
+ return itemItemWritable;
+ }
+
+ public double getMultipliedLength() {
+ return multipliedLength;
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ itemItemWritable = ItemItemWritable.read(in);
+ multipliedLength = in.readDouble();
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ itemItemWritable.write(out);
+ out.writeDouble(multipliedLength);
+ }
+
+ @Override
+ public int compareTo(ItemPairWritable other) {
+ return itemItemWritable.compareTo(other.getItemItemWritable());
+ }
+
+ @Override
+ public int hashCode() {
+ return itemItemWritable.hashCode();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (o instanceof ItemPairWritable) {
+ return itemItemWritable.equals(((ItemPairWritable) o).getItemItemWritable());
+ }
+ return false;
+ }
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthArrayWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthArrayWritable.java?rev=930801&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthArrayWritable.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthArrayWritable.java Mon Apr 5 05:59:17 2010
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.similarity.item.writables;
+
+import org.apache.hadoop.io.ArrayWritable;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * An {@link ArrayWritable} holding {@link ItemPrefWithLengthWritable}s
+ *
+ * Used as user-vector
+ */
+public final class ItemPrefWithLengthArrayWritable extends ArrayWritable {
+
+ public ItemPrefWithLengthArrayWritable() {
+ super(ItemPrefWithLengthWritable.class);
+ }
+
+ public ItemPrefWithLengthArrayWritable(ItemPrefWithLengthWritable[] itemPrefs) {
+ super(ItemPrefWithLengthWritable.class, itemPrefs);
+ }
+
+ public ItemPrefWithLengthWritable[] getItemPrefs() {
+ Writable[] writables = get();
+ ItemPrefWithLengthWritable[] itemPrefs = new ItemPrefWithLengthWritable[writables.length];
+ for (int n=0; n<writables.length; n++) {
+ itemPrefs[n] = (ItemPrefWithLengthWritable)writables[n];
+ }
+ return itemPrefs;
+ }
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthWritable.java?rev=930801&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthWritable.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthWritable.java Mon Apr 5 05:59:17 2010
@@ -0,0 +1,92 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.similarity.item.writables;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.Writable;
+import org.apache.mahout.common.RandomUtils;
+
+/**
+ * A {@link Writable} encapsulating the preference for an item
+ * stored along with the length of the item-vector
+ *
+ */
+public final class ItemPrefWithLengthWritable implements Writable {
+
+ private long itemID;
+ private double length;
+ private float prefValue;
+
+ public ItemPrefWithLengthWritable() {
+ // do nothing
+ }
+
+ public ItemPrefWithLengthWritable(long itemID, double length, float prefValue) {
+ this.itemID = itemID;
+ this.length = length;
+ this.prefValue = prefValue;
+ }
+
+ public long getItemID() {
+ return itemID;
+ }
+
+ public double getLength() {
+ return length;
+ }
+
+ public float getPrefValue() {
+ return prefValue;
+ }
+
+ public ItemPrefWithLengthWritable deepCopy() {
+ return new ItemPrefWithLengthWritable(itemID, length, prefValue);
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ out.writeLong(itemID);
+ out.writeDouble(length);
+ out.writeFloat(prefValue);
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ itemID = in.readLong();
+ length = in.readDouble();
+ prefValue = in.readFloat();
+ }
+
+ @Override
+ public int hashCode() {
+ return RandomUtils.hashLong(itemID) + 31 * RandomUtils.hashDouble(length) + 31 * RandomUtils.hashFloat(prefValue);
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (o instanceof ItemPrefWithLengthWritable) {
+ ItemPrefWithLengthWritable other = (ItemPrefWithLengthWritable) o;
+ return (itemID == other.getItemID() && length == other.getLength() && prefValue == other.getPrefValue());
+ }
+ return false;
+ }
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/UserPrefArrayWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/UserPrefArrayWritable.java?rev=930801&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/UserPrefArrayWritable.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/UserPrefArrayWritable.java Mon Apr 5 05:59:17 2010
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.similarity.item.writables;
+
+import org.apache.hadoop.io.ArrayWritable;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * An {@link ArrayWritable} holding {@link UserPrefWritable}s
+ *
+ * Used to represent an item-vector
+ */
+public final class UserPrefArrayWritable extends ArrayWritable {
+
+ public UserPrefArrayWritable() {
+ super(UserPrefWritable.class);
+ }
+
+ public UserPrefArrayWritable(UserPrefWritable[] userPrefs) {
+ super(UserPrefWritable.class, userPrefs);
+ }
+
+ public UserPrefWritable[] getUserPrefs() {
+ Writable[] writables = get();
+ UserPrefWritable[] userPrefs = new UserPrefWritable[writables.length];
+ for (int n=0; n<writables.length; n++) {
+ userPrefs[n] = (UserPrefWritable) writables[n];
+ }
+ return userPrefs;
+ }
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/UserPrefWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/UserPrefWritable.java?rev=930801&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/UserPrefWritable.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/UserPrefWritable.java Mon Apr 5 05:59:17 2010
@@ -0,0 +1,76 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.similarity.item.writables;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.Writable;
+
+/**
+ * A {@link Writable} encapsulating an item ID together with a preference value.
+ *
+ * Used as entry in an item-vector
+ */
+public final class UserPrefWritable extends UserWritable {
+
+ private float prefValue;
+
+ public UserPrefWritable() {
+ }
+
+ public UserPrefWritable(long userID, float prefValue) {
+ super(userID);
+ this.prefValue = prefValue;
+ }
+
+ public float getPrefValue() {
+ return prefValue;
+ }
+
+ public UserPrefWritable deepCopy() {
+ return new UserPrefWritable(getUserID(), prefValue);
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ super.readFields(in);
+ prefValue = in.readFloat();
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ super.write(out);
+ out.writeFloat(prefValue);
+ }
+
+ @Override
+ public int hashCode() {
+ return super.hashCode();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (o instanceof UserPrefWritable) {
+ UserWritable other = (UserWritable) o;
+ return super.equals(other);
+ }
+ return false;
+ }
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/UserWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/UserWritable.java?rev=930801&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/UserWritable.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/UserWritable.java Mon Apr 5 05:59:17 2010
@@ -0,0 +1,82 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.similarity.item.writables;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.mahout.common.RandomUtils;
+
+/** A {@link WritableComparable} encapsulating a user ID. */
+public final class UserWritable implements WritableComparable<UserWritable> {
+
+ private long userID;
+
+ public UserWritable() {
+ // do nothing
+ }
+
+ public UserWritable(long userID) {
+ this.userID = userID;
+ }
+
+ public long getUserID() {
+ return userID;
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ out.writeLong(userID);
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ userID = in.readLong();
+ }
+
+ @Override
+ public int compareTo(UserWritable other) {
+ return compare(userID, other.getUserID());
+ }
+
+ private static int compare(long a, long b) {
+ return a < b ? -1 : a > b ? 1 : 0;
+ }
+
+ @Override
+ public int hashCode() {
+ return RandomUtils.hashLong(userID);
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (o instanceof UserWritable) {
+ return (userID == ((UserWritable) o).getUserID());
+ }
+ return false;
+ }
+
+ public static UserWritable read(DataInput in) throws IOException {
+ UserWritable writable = new UserWritable();
+ writable.readFields(in);
+ return writable;
+ }
+
+}
Added: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/ItemSimilarityTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/ItemSimilarityTest.java?rev=930801&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/ItemSimilarityTest.java (added)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/ItemSimilarityTest.java Mon Apr 5 05:59:17 2010
@@ -0,0 +1,313 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.similarity;
+
+import static org.easymock.EasyMock.eq;
+import static org.easymock.EasyMock.expect;
+import static org.easymock.classextension.EasyMock.createMock;
+import static org.easymock.classextension.EasyMock.replay;
+import static org.easymock.classextension.EasyMock.verify;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.mahout.cf.taste.hadoop.ItemItemWritable;
+import org.apache.mahout.cf.taste.hadoop.ItemWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.CopreferredItemsMapper;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.CosineSimilarityReducer;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.ItemSimilarityJob;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.PreferredItemsPerUserMapper;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.PreferredItemsPerUserReducer;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.ToItemVectorReducer;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.UserPrefsPerItemMapper;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPairWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthArrayWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.UserPrefArrayWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.UserPrefWritable;
+import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.UserWritable;
+import org.apache.mahout.common.MahoutTestCase;
+import org.easymock.IArgumentMatcher;
+import org.easymock.classextension.EasyMock;
+
+/**
+ * Unit tests for the mappers and reducers in org.apache.mahout.cf.taste.hadoop.similarity
+ * Integration test with a mini-file at the end
+ *
+ */
+@SuppressWarnings("unchecked")
+public class ItemSimilarityTest extends MahoutTestCase {
+
+
+ public void testUserPrefsPerItemMapper() throws Exception {
+ Mapper.Context ctx = createMock(Mapper.Context.class);
+ ctx.write(new ItemWritable(34l), new UserPrefWritable(12l, 2.3f));
+ replay(ctx);
+
+ new UserPrefsPerItemMapper().map(new LongWritable(), new Text("12,34,2.3"), ctx);
+
+ verify(ctx);
+ }
+
+ public void testToItemVectorReducer() throws Exception {
+
+ List<UserPrefWritable> userPrefs = Arrays.asList(new UserPrefWritable(34l, 1f), new UserPrefWritable(56l, 2f));
+
+ Reducer.Context ctx = createMock(Reducer.Context.class);
+
+ ctx.write(eq(new ItemWritable(12l)), equalToUserPrefs(userPrefs));
+
+ replay(ctx);
+
+ new ToItemVectorReducer().reduce(new ItemWritable(12l), userPrefs, ctx);
+
+ verify(ctx);
+ }
+
+ static UserPrefArrayWritable equalToUserPrefs(final Collection<UserPrefWritable> prefsToCheck) {
+ EasyMock.reportMatcher(new IArgumentMatcher() {
+ @Override
+ public boolean matches(Object argument) {
+ if (argument instanceof UserPrefArrayWritable) {
+ UserPrefArrayWritable userPrefArray = (UserPrefArrayWritable) argument;
+ Set<UserPrefWritable> set = new HashSet<UserPrefWritable>();
+ for (UserPrefWritable userPref : userPrefArray.getUserPrefs()) {
+ set.add(userPref);
+ }
+
+ if (set.size() != prefsToCheck.size()) {
+ return false;
+ }
+
+ for (UserPrefWritable prefToCheck : prefsToCheck) {
+ if (!set.contains(prefToCheck)) {
+ return false;
+ }
+ }
+ return true;
+ }
+ return false;
+ }
+
+ @Override
+ public void appendTo(StringBuffer buffer) {}
+ });
+
+ return null;
+ }
+
+ public void testPreferredItemsPerUserMapper() throws Exception {
+ Mapper.Context ctx = createMock(Mapper.Context.class);
+ UserPrefArrayWritable userPrefs = createMock(UserPrefArrayWritable.class);
+
+ expect(userPrefs.getUserPrefs())
+ .andReturn(new UserPrefWritable[] { new UserPrefWritable(12l, 2f), new UserPrefWritable(56l, 3f) });
+
+ double length = Math.sqrt(Math.pow(2f, 2) + Math.pow(3f, 2));
+
+ ctx.write(new UserWritable(12l), new ItemPrefWithLengthWritable(34l, length, 2f));
+ ctx.write(new UserWritable(56l), new ItemPrefWithLengthWritable(34l, length, 3f));
+
+ replay(ctx, userPrefs);
+
+ new PreferredItemsPerUserMapper().map(new ItemWritable(34l), userPrefs, ctx);
+
+ verify(ctx, userPrefs);
+ }
+
+ public void testPreferredItemsPerUserReducer() throws Exception {
+
+ List<ItemPrefWithLengthWritable> itemPrefs =
+ Arrays.asList(new ItemPrefWithLengthWritable(34l, 5d, 1f), new ItemPrefWithLengthWritable(56l, 7d, 2f));
+
+ Reducer.Context ctx = createMock(Reducer.Context.class);
+
+ ctx.write(eq(new UserWritable(12l)), equalToItemPrefs(itemPrefs));
+
+ replay(ctx);
+
+ new PreferredItemsPerUserReducer().reduce(new UserWritable(12l), itemPrefs, ctx);
+
+ verify(ctx);
+ }
+
+ static ItemPrefWithLengthArrayWritable equalToItemPrefs(final Collection<ItemPrefWithLengthWritable> prefsToCheck) {
+ EasyMock.reportMatcher(new IArgumentMatcher() {
+ @Override
+ public boolean matches(Object argument) {
+ if (argument instanceof ItemPrefWithLengthArrayWritable) {
+ ItemPrefWithLengthArrayWritable itemPrefArray = (ItemPrefWithLengthArrayWritable) argument;
+ Set<ItemPrefWithLengthWritable> set = new HashSet<ItemPrefWithLengthWritable>();
+ for (ItemPrefWithLengthWritable itemPref : itemPrefArray.getItemPrefs()) {
+ set.add(itemPref);
+ }
+
+ if (set.size() != prefsToCheck.size()) {
+ return false;
+ }
+
+ for (ItemPrefWithLengthWritable prefToCheck : prefsToCheck) {
+ if (!set.contains(prefToCheck)) {
+ return false;
+ }
+ }
+ return true;
+ }
+ return false;
+ }
+
+ @Override
+ public void appendTo(StringBuffer buffer) {}
+ });
+
+ return null;
+ }
+
+ public void testCopreferredItemsMapper() throws Exception {
+ Mapper.Context ctx = createMock(Mapper.Context.class);
+ ItemPrefWithLengthArrayWritable itemPrefs = createMock(ItemPrefWithLengthArrayWritable.class);
+
+ expect(itemPrefs.getItemPrefs()).andReturn(new ItemPrefWithLengthWritable[] {
+ new ItemPrefWithLengthWritable(34l, 2d, 1f), new ItemPrefWithLengthWritable(56l, 3d, 2f),
+ new ItemPrefWithLengthWritable(78l, 4d, 3f) });
+
+ ctx.write(new ItemPairWritable(34l, 56l, 6d), new FloatWritable(2f));
+ ctx.write(new ItemPairWritable(34l, 78l, 8d), new FloatWritable(3f));
+ ctx.write(new ItemPairWritable(56l, 78l, 12d), new FloatWritable(6f));
+
+ replay(ctx, itemPrefs);
+
+ new CopreferredItemsMapper().map(new UserWritable(), itemPrefs, ctx);
+
+ verify(ctx, itemPrefs);
+ }
+
+ public void testCosineSimilarityReducer() throws Exception {
+ Reducer.Context ctx = createMock(Reducer.Context.class);
+
+ ctx.write(new ItemItemWritable(12l, 34l), new DoubleWritable(0.5d));
+
+ replay(ctx);
+
+ new CosineSimilarityReducer().reduce(new ItemPairWritable(12l, 34l, 20d),
+ Arrays.asList(new FloatWritable(5f), new FloatWritable(5f)), ctx);
+
+ verify(ctx);
+ }
+
+ public void testCompleteJob() throws Exception {
+
+ String tmpDirPath = System.getProperty("java.io.tmpdir")+"/"+ItemSimilarityTest.class.getCanonicalName();
+ File tmpDir = new File(tmpDirPath);
+
+ try {
+ if (tmpDir.exists()) {
+ recursiveDelete(tmpDir);
+ } else {
+ tmpDir.mkdirs();
+ }
+
+ /* user-item-matrix
+
+ Game Mouse PC Disk
+ Jane 0 1 2 0
+ Paul 1 0 1 0
+ Fred 0 0 0 1
+ */
+
+ BufferedWriter writer = new BufferedWriter(new FileWriter(tmpDirPath+"/prefs.txt"));
+ try {
+ writer.write("1,2,1\n" +
+ "1,3,2\n" +
+ "2,1,1\n" +
+ "2,3,1\n" +
+ "3,4,1\n");
+ } finally {
+ writer.close();
+ }
+
+ ItemSimilarityJob similarityJob = new ItemSimilarityJob();
+
+ Configuration conf = new Configuration();
+ conf.set("mapred.input.dir", tmpDirPath+"/prefs.txt");
+ conf.set("mapred.output.dir", tmpDirPath+"/output");
+
+ similarityJob.setConf(conf);
+
+ similarityJob.run(new String[] { "--tempDir", tmpDirPath+"/tmp"});
+
+ BufferedReader reader = new BufferedReader(new FileReader(tmpDirPath+"/output/part-r-00000"));
+
+ String line = null;
+ int currentLine = 1;
+ while ( (line = reader.readLine()) != null) {
+
+ String[] tokens = line.split("\t");
+
+ long itemAID = Long.parseLong(tokens[0]);
+ long itemBID = Long.parseLong(tokens[1]);
+ double similarity = Double.parseDouble(tokens[2]);
+
+ if (currentLine == 1) {
+ assertEquals(1l, itemAID);
+ assertEquals(3l, itemBID);
+ assertEquals(0.45, similarity, 0.01);
+ }
+
+ if (currentLine == 2) {
+ assertEquals(2l, itemAID);
+ assertEquals(3l, itemBID);
+ assertEquals(0.89, similarity, 0.01);
+ }
+
+ currentLine++;
+ }
+
+ int linesWritten = currentLine-1;
+ assertEquals(2, linesWritten);
+
+ } finally {
+ recursiveDelete(tmpDir);
+ }
+ }
+
+ static void recursiveDelete(File fileOrDir) {
+ if (fileOrDir.isDirectory()) {
+ for (File innerFile : fileOrDir.listFiles()) {
+ recursiveDelete(innerFile);
+ }
+ }
+ fileOrDir.delete();
+ }
+
+}