You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2008/05/13 02:24:24 UTC

svn commit: r655701 - in /lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop: ./ RecommendedItemsWritable.java RecommenderJob.java RecommenderMapper.java ReducerMetrics.java

Author: srowen
Date: Mon May 12 17:24:23 2008
New Revision: 655701

URL: http://svn.apache.org/viewvc?rev=655701&view=rev
Log:
First checkin of Hadoop-ified Recommender framework

Added:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommenderJob.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommenderMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ReducerMetrics.java

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java?rev=655701&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java Mon May 12 17:24:23 2008
@@ -0,0 +1,102 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop;
+
+import org.apache.hadoop.io.Writable;
+import org.apache.mahout.cf.taste.impl.model.GenericItem;
+import org.apache.mahout.cf.taste.impl.recommender.GenericRecommendedItem;
+import org.apache.mahout.cf.taste.model.Item;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.EOFException;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * A {@link Writable} which encapsulates a list of {@link RecommendedItem}s. This is
+ * the mapper (and reducer) output, and represents items recommended to a user.
+ * The first item is the one whose estimated preference is highest.
+ */
+public final class RecommendedItemsWritable implements Writable {
+
+  private List<RecommendedItem> recommended;
+
+  public RecommendedItemsWritable() {
+    // do nothing
+  }
+
+  public RecommendedItemsWritable(List<RecommendedItem> recommended) {
+    this.recommended = recommended;
+  }
+
+  public List<RecommendedItem> getRecommendedItems() {
+    return recommended;
+  }
+
+  public void write(DataOutput out) throws IOException {
+    for (RecommendedItem item : recommended) {
+      out.writeUTF(item.getItem().getID().toString());
+      out.writeDouble(item.getValue());
+    }
+
+  }
+
+  public void readFields(DataInput in) throws IOException {
+    recommended = new ArrayList<RecommendedItem>();
+    try {
+      do {
+        String itemID = in.readUTF();
+        double value = in.readDouble();
+        Item item = new GenericItem<String>(itemID);
+        RecommendedItem recommendedItem = new GenericRecommendedItem(item, value);
+        recommended.add(recommendedItem);
+      } while (true);
+    } catch (EOFException eofe) {
+      // continue; done
+    }
+  }
+
+  public static RecommendedItemsWritable read(DataInput in) throws IOException {
+    RecommendedItemsWritable writable = new RecommendedItemsWritable();
+    writable.readFields(in);
+    return writable;
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder result = new StringBuilder();
+    result.append('[');
+    boolean first = true;
+    for (RecommendedItem item : recommended) {
+      if (first) {
+        first = false;
+      } else {
+        result.append(',');
+      }
+      result.append(item.getItem().getID().toString());
+      result.append(':');
+      result.append(item.getValue());
+    }
+    result.append(']');
+    return result.toString();
+  }
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommenderJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommenderJob.java?rev=655701&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommenderJob.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommenderJob.java Mon May 12 17:24:23 2008
@@ -0,0 +1,100 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.TextInputFormat;
+import org.apache.hadoop.mapred.TextOutputFormat;
+import org.apache.hadoop.mapred.lib.IdentityReducer;
+import org.apache.mahout.cf.taste.recommender.Recommender;
+
+import java.io.IOException;
+
+/**
+ * <p>This class configures and runs a {@link RecommenderMapper} using Hadoop.</p>
+ *
+ * <p>Command line arguments are:</p>
+ * <ol>
+ *  <li>Fully-qualified class name of {@link Recommender} to use to make recommendations.
+ *   Note that it must have a no-arg constructor.</li>
+ *  <li>Number of recommendations to compute per user</li>
+ *  <li>Location of a text file containing user IDs for which recommendations should be computed,
+ *   one per line</li>
+ *  <li>Location of a data model file containing preference data, suitable for use with
+ *   {@link org.apache.mahout.cf.taste.impl.model.file.FileDataModel}</li>
+ *  <li>Output path where reducer output should go</li>
+ *  <li>Number of mapper tasks to use</li>
+ * </ol>
+ *
+ * <p>Example:</p>
+ *
+ * <p><code>org.apache.mahout.cf.taste.impl.recommender.slopeone.SlopeOneRecommender 10 path/to/users.txt
+ *  path/to/data.csv path/to/reducerOutputDir 5</code></p>
+ *
+ * <p>TODO I am not a bit sure this works yet in a real distributed environment.</p>
+ */
+public final class RecommenderJob {
+
+  public static void main(String[] args) throws IOException {
+    String recommendClassName = args[0];
+    int recommendationsPerUser = Integer.parseInt(args[1]);
+    String userIDFile = args[2];
+    String dataModelFile = args[3];
+    String outputPath = args[4];
+    int numMappers = Integer.parseInt(args[5]);
+    JobConf jobConf =
+        buildJobConf(recommendClassName, recommendationsPerUser, userIDFile, dataModelFile, outputPath, numMappers);
+    JobClient.runJob(jobConf);
+  }
+
+  public static JobConf buildJobConf(String recommendClassName,
+                                     int recommendationsPerUser,
+                                     String userIDFile,
+                                     String dataModelFile,
+                                     String outputPath,
+                                     int numMappers) {
+    JobConf jobConf = new JobConf(Recommender.class);
+
+    jobConf.set(RecommenderMapper.RECOMMENDER_CLASS_NAME, recommendClassName);
+    jobConf.set(RecommenderMapper.RECOMMENDATIONS_PER_USER, String.valueOf(recommendationsPerUser));
+    jobConf.set(RecommenderMapper.DATA_MODEL_FILE, dataModelFile);
+
+    jobConf.setJobName(RecommenderJob.class.getSimpleName());
+
+    jobConf.setInputFormat(TextInputFormat.class);
+    jobConf.setInputPath(new Path(userIDFile));
+
+    jobConf.setNumMapTasks(numMappers);
+    jobConf.setMapperClass(RecommenderMapper.class);
+    jobConf.setMapOutputKeyClass(Text.class);
+    jobConf.setMapOutputValueClass(RecommendedItemsWritable.class);
+
+    jobConf.setNumReduceTasks(1);
+    jobConf.setReducerClass(IdentityReducer.class);
+    jobConf.setOutputKeyClass(Text.class);
+    jobConf.setOutputValueClass(RecommendedItemsWritable.class);
+
+    jobConf.setOutputFormat(TextOutputFormat.class);
+    jobConf.setOutputPath(new Path(outputPath));
+    return jobConf;
+  }
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommenderMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommenderMapper.java?rev=655701&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommenderMapper.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommenderMapper.java Mon May 12 17:24:23 2008
@@ -0,0 +1,106 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.cf.taste.recommender.Recommender;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
+import java.util.List;
+
+/**
+ * <p>The {@link Mapper} which takes as input a file of user IDs (treated as Strings, note), and
+ * for each unique user ID, computes recommendations with the configured {@link Recommender}.
+ * The results are output as {@link RecommendedItemsWritable}.</p>
+ *
+ * <p>Note that there is no corresponding {@link org.apache.hadoop.mapred.Reducer}; this
+ * implementation can only partially take advantage of the mapreduce paradigm and only
+ * really leverages it for easy parallelization.</p>
+ */
+public final class RecommenderMapper
+    extends MapReduceBase
+    implements Mapper<LongWritable, Text, Text, RecommendedItemsWritable> {
+
+  static final String RECOMMENDER_CLASS_NAME = "recommenderClassName";
+  static final String RECOMMENDATIONS_PER_USER = "recommendadtionsPerUser";
+  static final String DATA_MODEL_FILE = "dataModelFile";
+
+  private Recommender recommender;
+  private int recommendationsPerUser;
+
+  public void map(LongWritable key,
+                  Text value,
+                  OutputCollector<Text, RecommendedItemsWritable> output,
+                  Reporter reporter) throws IOException {
+    String userID = value.toString();
+    List<RecommendedItem> recommendedItems;
+    try {
+      recommendedItems = recommender.recommend(userID, recommendationsPerUser);
+    } catch (TasteException te) {
+      throw new RuntimeException(te);
+    }
+    RecommendedItemsWritable writable = new RecommendedItemsWritable(recommendedItems);
+    output.collect(new Text(userID), writable);
+    reporter.incrCounter(ReducerMetrics.USERS_PROCESSED, 1L);
+    reporter.incrCounter(ReducerMetrics.RECOMMENDATIONS_MADE, recommendedItems.size());
+  }
+
+  @Override
+  public void configure(JobConf jobConf) {
+    String dataModelFile = jobConf.get(DATA_MODEL_FILE);
+    FileDataModel fileDataModel;
+    try {
+      fileDataModel = new FileDataModel(new File(dataModelFile));
+    } catch (FileNotFoundException fnfe) {
+      throw new RuntimeException(fnfe);
+    }
+    String recommenderClassName = jobConf.get(RECOMMENDER_CLASS_NAME);
+    try {
+      Class<? extends Recommender> recommenderClass =
+          (Class<? extends Recommender>) Class.forName(recommenderClassName);
+      Constructor<? extends Recommender> constructor = recommenderClass.getConstructor(DataModel.class);
+      recommender = constructor.newInstance(fileDataModel);
+    } catch (NoSuchMethodException nsme) {
+      throw new RuntimeException(nsme);
+    } catch (ClassNotFoundException cnfe) {
+      throw new RuntimeException(cnfe);
+    } catch (InstantiationException ie) {
+      throw new RuntimeException(ie);
+    } catch (IllegalAccessException iae) {
+      throw new RuntimeException(iae);
+    } catch (InvocationTargetException ite) {
+      throw new RuntimeException(ite.getCause());
+    }
+    recommendationsPerUser = Integer.parseInt(jobConf.get(RECOMMENDATIONS_PER_USER));
+  }
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ReducerMetrics.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ReducerMetrics.java?rev=655701&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ReducerMetrics.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ReducerMetrics.java Mon May 12 17:24:23 2008
@@ -0,0 +1,30 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop;
+
+/**
+ * Custom metrics collected by {@link RecommenderMapper}.
+ */
+public enum ReducerMetrics {
+
+  /** Number of unique users for which recommendations were produced */
+  USERS_PROCESSED,
+  /** Number of items recommended to those users */
+  RECOMMENDATIONS_MADE
+
+}