You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2008/05/13 02:24:24 UTC
svn commit: r655701 - in
/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop:
./ RecommendedItemsWritable.java RecommenderJob.java RecommenderMapper.java
ReducerMetrics.java
Author: srowen
Date: Mon May 12 17:24:23 2008
New Revision: 655701
URL: http://svn.apache.org/viewvc?rev=655701&view=rev
Log:
First checkin of Hadoop-ified Recommender framework
Added:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommenderJob.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommenderMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ReducerMetrics.java
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java?rev=655701&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java Mon May 12 17:24:23 2008
@@ -0,0 +1,102 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop;
+
+import org.apache.hadoop.io.Writable;
+import org.apache.mahout.cf.taste.impl.model.GenericItem;
+import org.apache.mahout.cf.taste.impl.recommender.GenericRecommendedItem;
+import org.apache.mahout.cf.taste.model.Item;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.EOFException;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * A {@link Writable} which encapsulates a list of {@link RecommendedItem}s. This is
+ * the mapper (and reducer) output, and represents items recommended to a user.
+ * The first item is the one whose estimated preference is highest.
+ */
+public final class RecommendedItemsWritable implements Writable {
+
+ private List<RecommendedItem> recommended;
+
+ public RecommendedItemsWritable() {
+ // do nothing
+ }
+
+ public RecommendedItemsWritable(List<RecommendedItem> recommended) {
+ this.recommended = recommended;
+ }
+
+ public List<RecommendedItem> getRecommendedItems() {
+ return recommended;
+ }
+
+ public void write(DataOutput out) throws IOException {
+ for (RecommendedItem item : recommended) {
+ out.writeUTF(item.getItem().getID().toString());
+ out.writeDouble(item.getValue());
+ }
+
+ }
+
+ public void readFields(DataInput in) throws IOException {
+ recommended = new ArrayList<RecommendedItem>();
+ try {
+ do {
+ String itemID = in.readUTF();
+ double value = in.readDouble();
+ Item item = new GenericItem<String>(itemID);
+ RecommendedItem recommendedItem = new GenericRecommendedItem(item, value);
+ recommended.add(recommendedItem);
+ } while (true);
+ } catch (EOFException eofe) {
+ // continue; done
+ }
+ }
+
+ public static RecommendedItemsWritable read(DataInput in) throws IOException {
+ RecommendedItemsWritable writable = new RecommendedItemsWritable();
+ writable.readFields(in);
+ return writable;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder result = new StringBuilder();
+ result.append('[');
+ boolean first = true;
+ for (RecommendedItem item : recommended) {
+ if (first) {
+ first = false;
+ } else {
+ result.append(',');
+ }
+ result.append(item.getItem().getID().toString());
+ result.append(':');
+ result.append(item.getValue());
+ }
+ result.append(']');
+ return result.toString();
+ }
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommenderJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommenderJob.java?rev=655701&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommenderJob.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommenderJob.java Mon May 12 17:24:23 2008
@@ -0,0 +1,100 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.TextInputFormat;
+import org.apache.hadoop.mapred.TextOutputFormat;
+import org.apache.hadoop.mapred.lib.IdentityReducer;
+import org.apache.mahout.cf.taste.recommender.Recommender;
+
+import java.io.IOException;
+
+/**
+ * <p>This class configures and runs a {@link RecommenderMapper} using Hadoop.</p>
+ *
+ * <p>Command line arguments are:</p>
+ * <ol>
+ * <li>Fully-qualified class name of {@link Recommender} to use to make recommendations.
+ * Note that it must have a no-arg constructor.</li>
+ * <li>Number of recommendations to compute per user</li>
+ * <li>Location of a text file containing user IDs for which recommendations should be computed,
+ * one per line</li>
+ * <li>Location of a data model file containing preference data, suitable for use with
+ * {@link org.apache.mahout.cf.taste.impl.model.file.FileDataModel}</li>
+ * <li>Output path where reducer output should go</li>
+ * <li>Number of mapper tasks to use</li>
+ * </ol>
+ *
+ * <p>Example:</p>
+ *
+ * <p><code>org.apache.mahout.cf.taste.impl.recommender.slopeone.SlopeOneRecommender 10 path/to/users.txt
+ * path/to/data.csv path/to/reducerOutputDir 5</code></p>
+ *
+ * <p>TODO I am not a bit sure this works yet in a real distributed environment.</p>
+ */
+public final class RecommenderJob {
+
+ public static void main(String[] args) throws IOException {
+ String recommendClassName = args[0];
+ int recommendationsPerUser = Integer.parseInt(args[1]);
+ String userIDFile = args[2];
+ String dataModelFile = args[3];
+ String outputPath = args[4];
+ int numMappers = Integer.parseInt(args[5]);
+ JobConf jobConf =
+ buildJobConf(recommendClassName, recommendationsPerUser, userIDFile, dataModelFile, outputPath, numMappers);
+ JobClient.runJob(jobConf);
+ }
+
+ public static JobConf buildJobConf(String recommendClassName,
+ int recommendationsPerUser,
+ String userIDFile,
+ String dataModelFile,
+ String outputPath,
+ int numMappers) {
+ JobConf jobConf = new JobConf(Recommender.class);
+
+ jobConf.set(RecommenderMapper.RECOMMENDER_CLASS_NAME, recommendClassName);
+ jobConf.set(RecommenderMapper.RECOMMENDATIONS_PER_USER, String.valueOf(recommendationsPerUser));
+ jobConf.set(RecommenderMapper.DATA_MODEL_FILE, dataModelFile);
+
+ jobConf.setJobName(RecommenderJob.class.getSimpleName());
+
+ jobConf.setInputFormat(TextInputFormat.class);
+ jobConf.setInputPath(new Path(userIDFile));
+
+ jobConf.setNumMapTasks(numMappers);
+ jobConf.setMapperClass(RecommenderMapper.class);
+ jobConf.setMapOutputKeyClass(Text.class);
+ jobConf.setMapOutputValueClass(RecommendedItemsWritable.class);
+
+ jobConf.setNumReduceTasks(1);
+ jobConf.setReducerClass(IdentityReducer.class);
+ jobConf.setOutputKeyClass(Text.class);
+ jobConf.setOutputValueClass(RecommendedItemsWritable.class);
+
+ jobConf.setOutputFormat(TextOutputFormat.class);
+ jobConf.setOutputPath(new Path(outputPath));
+ return jobConf;
+ }
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommenderMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommenderMapper.java?rev=655701&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommenderMapper.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommenderMapper.java Mon May 12 17:24:23 2008
@@ -0,0 +1,106 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.cf.taste.recommender.Recommender;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
+import java.util.List;
+
+/**
+ * <p>The {@link Mapper} which takes as input a file of user IDs (treated as Strings, note), and
+ * for each unique user ID, computes recommendations with the configured {@link Recommender}.
+ * The results are output as {@link RecommendedItemsWritable}.</p>
+ *
+ * <p>Note that there is no corresponding {@link org.apache.hadoop.mapred.Reducer}; this
+ * implementation can only partially take advantage of the mapreduce paradigm and only
+ * really leverages it for easy parallelization.</p>
+ */
+public final class RecommenderMapper
+ extends MapReduceBase
+ implements Mapper<LongWritable, Text, Text, RecommendedItemsWritable> {
+
+ static final String RECOMMENDER_CLASS_NAME = "recommenderClassName";
+ static final String RECOMMENDATIONS_PER_USER = "recommendadtionsPerUser";
+ static final String DATA_MODEL_FILE = "dataModelFile";
+
+ private Recommender recommender;
+ private int recommendationsPerUser;
+
+ public void map(LongWritable key,
+ Text value,
+ OutputCollector<Text, RecommendedItemsWritable> output,
+ Reporter reporter) throws IOException {
+ String userID = value.toString();
+ List<RecommendedItem> recommendedItems;
+ try {
+ recommendedItems = recommender.recommend(userID, recommendationsPerUser);
+ } catch (TasteException te) {
+ throw new RuntimeException(te);
+ }
+ RecommendedItemsWritable writable = new RecommendedItemsWritable(recommendedItems);
+ output.collect(new Text(userID), writable);
+ reporter.incrCounter(ReducerMetrics.USERS_PROCESSED, 1L);
+ reporter.incrCounter(ReducerMetrics.RECOMMENDATIONS_MADE, recommendedItems.size());
+ }
+
+ @Override
+ public void configure(JobConf jobConf) {
+ String dataModelFile = jobConf.get(DATA_MODEL_FILE);
+ FileDataModel fileDataModel;
+ try {
+ fileDataModel = new FileDataModel(new File(dataModelFile));
+ } catch (FileNotFoundException fnfe) {
+ throw new RuntimeException(fnfe);
+ }
+ String recommenderClassName = jobConf.get(RECOMMENDER_CLASS_NAME);
+ try {
+ Class<? extends Recommender> recommenderClass =
+ (Class<? extends Recommender>) Class.forName(recommenderClassName);
+ Constructor<? extends Recommender> constructor = recommenderClass.getConstructor(DataModel.class);
+ recommender = constructor.newInstance(fileDataModel);
+ } catch (NoSuchMethodException nsme) {
+ throw new RuntimeException(nsme);
+ } catch (ClassNotFoundException cnfe) {
+ throw new RuntimeException(cnfe);
+ } catch (InstantiationException ie) {
+ throw new RuntimeException(ie);
+ } catch (IllegalAccessException iae) {
+ throw new RuntimeException(iae);
+ } catch (InvocationTargetException ite) {
+ throw new RuntimeException(ite.getCause());
+ }
+ recommendationsPerUser = Integer.parseInt(jobConf.get(RECOMMENDATIONS_PER_USER));
+ }
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ReducerMetrics.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ReducerMetrics.java?rev=655701&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ReducerMetrics.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ReducerMetrics.java Mon May 12 17:24:23 2008
@@ -0,0 +1,30 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop;
+
+/**
+ * Custom metrics collected by {@link RecommenderMapper}.
+ */
+public enum ReducerMetrics {
+
+ /** Number of unique users for which recommendations were produced */
+ USERS_PROCESSED,
+ /** Number of items recommended to those users */
+ RECOMMENDATIONS_MADE
+
+}