You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2009/12/13 11:01:47 UTC

svn commit: r890039 - in /lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop: AbstractJob.java item/RecommenderJob.java item/RecommenderMapper.java pseudo/RecommenderJob.java slopeone/SlopeOneAverageDiffsJob.java

Author: srowen
Date: Sun Dec 13 10:01:46 2009
New Revision: 890039

URL: http://svn.apache.org/viewvc?rev=890039&view=rev
Log:
Add default param values, usersFile arg

Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/AbstractJob.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/AbstractJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/AbstractJob.java?rev=890039&r1=890038&r2=890039&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/AbstractJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/AbstractJob.java Sun Dec 13 10:01:46 2009
@@ -17,6 +17,7 @@
 
 package org.apache.mahout.cf.taste.hadoop;
 
+import org.apache.commons.cli2.Argument;
 import org.apache.commons.cli2.CommandLine;
 import org.apache.commons.cli2.Group;
 import org.apache.commons.cli2.Option;
@@ -63,19 +64,35 @@
     this.configuration = configuration;
   }
 
-  protected static Option buildOption(String name, String shortName, String description, boolean required) {
+  protected static Option buildOption(String name, String shortName, String description) {
+    return buildOption(name, shortName, description, true, null);
+  }
+
+  protected static Option buildOption(String name, String shortName, String description, String defaultValue) {
+    return buildOption(name, shortName, description, false, defaultValue);    
+  }
+
+  private static Option buildOption(String name,
+                                    String shortName,
+                                    String description,
+                                    boolean required,
+                                    String defaultValue) {
+    ArgumentBuilder argBuilder = new ArgumentBuilder().withName(name).withMinimum(1).withMaximum(1);
+    if (defaultValue != null) {
+      argBuilder = argBuilder.withDefault(defaultValue);
+    }
+    Argument arg = argBuilder.create();
     return new DefaultOptionBuilder().withLongName(name).withRequired(required)
-      .withShortName(shortName).withArgument(new ArgumentBuilder().withName(name).withMinimum(1)
-      .withMaximum(1).create()).withDescription(description).create();
+      .withShortName(shortName).withArgument(arg).withDescription(description).create();
   }
 
-  protected static Map<String,Object> parseArguments(String[] args, Option... extraOpts) {
+  protected static Map<String,String> parseArguments(String[] args, Option... extraOpts) {
 
     Option inputOpt = DefaultOptionCreator.inputOption().create();
-    Option tempDirOpt = buildOption("tempDir", "t", "Intermediate output directory", false);
+    Option tempDirOpt = buildOption("tempDir", "t", "Intermediate output directory", "temp");
     Option outputOpt = DefaultOptionCreator.outputOption().create();
     Option helpOpt = DefaultOptionCreator.helpOption();
-    Option jarFileOpt = buildOption("jarFile", "m", "Implementation jar", true);
+    Option jarFileOpt = buildOption("jarFile", "m", "Implementation jar");
 
     GroupBuilder gBuilder = new GroupBuilder().withName("Options")
       .withOption(inputOpt)
@@ -106,19 +123,26 @@
       return null;
     }
 
-    Map<String,Object> result = new HashMap<String,Object>();
-    result.put(inputOpt.getPreferredName(), cmdLine.getValue(inputOpt));
-    result.put(tempDirOpt.getPreferredName(), cmdLine.getValue(tempDirOpt));
-    result.put(outputOpt.getPreferredName(), cmdLine.getValue(outputOpt));
-    result.put(helpOpt.getPreferredName(), cmdLine.getValue(helpOpt));
-    result.put(jarFileOpt.getPreferredName(), cmdLine.getValue(jarFileOpt));
+    Map<String,String> result = new HashMap<String,String>();
+    maybePut(result, cmdLine, inputOpt);
+    maybePut(result, cmdLine, tempDirOpt);
+    maybePut(result, cmdLine, outputOpt);
+    maybePut(result, cmdLine, helpOpt);
+    maybePut(result, cmdLine, jarFileOpt);
     for (Option opt : extraOpts) {
-      result.put(opt.getPreferredName(), cmdLine.getValue(opt));
+      maybePut(result, cmdLine, opt);
     }
 
     return result;    
   }
 
+  private static void maybePut(Map<String,String> args, CommandLine cmdLine, Option opt) {
+    Object value = cmdLine.getValue(opt);
+    if (value != null) {
+      args.put(opt.getPreferredName(), value.toString());
+    }
+  }
+
   protected static JobConf prepareJobConf(String inputPath,
                                           String outputPath,
                                           String jarFile,

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java?rev=890039&r1=890038&r2=890039&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java Sun Dec 13 10:01:46 2009
@@ -61,15 +61,18 @@
   @Override
   public int run(String[] args) throws IOException {
 
-    Option numReccomendationsOpt = buildOption("numRecommendations", "n", "Number of recommendations per user", true);
+    Option numReccomendationsOpt = buildOption("numRecommendations", "n", "Number of recommendations per user", "10");
+    Option usersFileOpt = buildOption("usersFile", "n", "Number of recommendations per user", null);
 
-    Map<String,Object> parsedArgs = parseArguments(args, numReccomendationsOpt);
+    Map<String,String> parsedArgs = parseArguments(args, numReccomendationsOpt, usersFileOpt);
+
+    String inputPath = parsedArgs.get("--input");
+    String tempDirPath = parsedArgs.get("--tempDir");
+    String outputPath = parsedArgs.get("--output");
+    String jarFile = parsedArgs.get("--jarFile");
+    int recommendationsPerUser = Integer.parseInt(parsedArgs.get("--numRecommendations"));
+    String usersFile = parsedArgs.get("--usersFile");
 
-    String inputPath = parsedArgs.get("--input").toString();
-    String tempDirPath = parsedArgs.get("--tempDir").toString();
-    String outputPath = parsedArgs.get("--output").toString();
-    String jarFile = parsedArgs.get("--jarFile").toString();
-    int recommendationsPerUser = Integer.parseInt((String) parsedArgs.get("--numRecommendations"));
     String userVectorPath = tempDirPath + "/userVectors";
     String itemIDIndexPath = tempDirPath + "/itemIDIndex";
     String cooccurrencePath = tempDirPath + "/cooccurrence";
@@ -127,6 +130,7 @@
     recommenderConf.set(RecommenderMapper.COOCCURRENCE_PATH, cooccurrencePath);
     recommenderConf.set(RecommenderMapper.ITEMID_INDEX_PATH, itemIDIndexPath);    
     recommenderConf.setInt(RecommenderMapper.RECOMMENDATIONS_PER_USER, recommendationsPerUser);
+    recommenderConf.set(RecommenderMapper.USERS_FILE, usersFile);
     recommenderConf.setClass("mapred.output.compression.codec", GzipCodec.class, CompressionCodec.class);
     JobClient.runJob(recommenderConf);
     return 0;

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderMapper.java?rev=890039&r1=890038&r2=890039&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderMapper.java Sun Dec 13 10:01:46 2009
@@ -18,6 +18,7 @@
 package org.apache.mahout.cf.taste.hadoop.item;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
@@ -31,13 +32,16 @@
 import org.apache.mahout.cf.taste.hadoop.MapFilesMap;
 import org.apache.mahout.cf.taste.hadoop.RecommendedItemsWritable;
 import org.apache.mahout.cf.taste.impl.common.Cache;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
 import org.apache.mahout.cf.taste.impl.common.Retriever;
 import org.apache.mahout.cf.taste.impl.recommender.GenericRecommendedItem;
 import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.common.FileLineIterable;
 import org.apache.mahout.matrix.SparseVector;
 import org.apache.mahout.matrix.Vector;
 
 import java.io.IOException;
+import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Iterator;
@@ -52,11 +56,13 @@
   static final String COOCCURRENCE_PATH = "cooccurrencePath";
   static final String ITEMID_INDEX_PATH = "itemIDIndexPath";
   static final String RECOMMENDATIONS_PER_USER = "recommendationsPerUser";
+  static final String USERS_FILE = "usersFile";
 
   private int recommendationsPerUser;
   private MapFilesMap<IntWritable,LongWritable> indexItemIDMap;
   private MapFilesMap<IntWritable,Vector> cooccurrenceColumnMap;
   private Cache<IntWritable,Vector> cooccurrenceColumnCache;
+  private FastIDSet usersToRecommendFor;
 
   @Override
   public void configure(JobConf jobConf) {
@@ -67,6 +73,17 @@
       recommendationsPerUser = jobConf.getInt(RECOMMENDATIONS_PER_USER, 10);
       indexItemIDMap = new MapFilesMap<IntWritable,LongWritable>(fs, itemIDIndexPath, new Configuration());
       cooccurrenceColumnMap = new MapFilesMap<IntWritable,Vector>(fs, cooccurrencePath, new Configuration());
+      String usersFilePathString = jobConf.get(USERS_FILE);
+      if (usersFilePathString == null) {
+        usersToRecommendFor = null;
+      } else {
+        usersToRecommendFor = new FastIDSet();
+        Path usersFilePath = new Path(usersFilePathString).makeQualified(fs);
+        FSDataInputStream in = fs.open(usersFilePath);
+        for (String line : new FileLineIterable(in)) {
+          usersToRecommendFor.add(Long.parseLong(line));
+        }
+      }
     } catch (IOException ioe) {
       throw new IllegalStateException(ioe);
     }
@@ -79,6 +96,10 @@
                   OutputCollector<LongWritable, RecommendedItemsWritable> output,
                   Reporter reporter) throws IOException {
 
+    if (usersToRecommendFor != null && !usersToRecommendFor.contains(userID.get())) {
+      return;
+    }
+
     Iterator<Vector.Element> userVectorIterator = userVector.iterateNonZero();
     Vector recommendationVector = new SparseVector(Integer.MAX_VALUE, 1000);
     while (userVectorIterator.hasNext()) {

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java?rev=890039&r1=890038&r2=890039&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java Sun Dec 13 10:01:46 2009
@@ -93,16 +93,16 @@
   public int run(String[] args) throws IOException {
 
     Option recommendClassOpt =
-        buildOption("recommenderClassName", "r", "Name of recommender class to instantiate", true);
+        buildOption("recommenderClassName", "r", "Name of recommender class to instantiate");
     Option numReccomendationsOpt =
-        buildOption("numRecommendations", "n", "Number of recommendations per user", true);
+        buildOption("numRecommendations", "n", "Number of recommendations per user");
 
-    Map<String,Object> parsedArgs = parseArguments(args, recommendClassOpt, numReccomendationsOpt);
-    String inputFile = parsedArgs.get("--input").toString();
-    String outputPath = parsedArgs.get("--output").toString();
-    String jarFile = parsedArgs.get("--jarFile").toString();
+    Map<String,String> parsedArgs = parseArguments(args, recommendClassOpt, numReccomendationsOpt);
+    String inputFile = parsedArgs.get("--input");
+    String outputPath = parsedArgs.get("--output");
+    String jarFile = parsedArgs.get("--jarFile");
 
-    String recommendClassName = parsedArgs.get("--recommenderClassName").toString();
+    String recommendClassName = parsedArgs.get("--recommenderClassName");
     int recommendationsPerUser = Integer.parseInt((String) parsedArgs.get("--numRecommendations"));
 
     JobConf jobConf = prepareJobConf(inputFile,

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java?rev=890039&r1=890038&r2=890039&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java Sun Dec 13 10:01:46 2009
@@ -42,12 +42,12 @@
   @Override
   public int run(String[] args) throws IOException {
 
-    Map<String,Object> parsedArgs = parseArguments(args);
+    Map<String,String> parsedArgs = parseArguments(args);
 
-    String prefsFile = parsedArgs.get("--input").toString();
-    String outputPath = parsedArgs.get("--output").toString();
-    String jarFile = parsedArgs.get("--jarFile").toString();
-    String averagesOutputPath = parsedArgs.get("--tempDir").toString();
+    String prefsFile = parsedArgs.get("--input");
+    String outputPath = parsedArgs.get("--output");
+    String jarFile = parsedArgs.get("--jarFile");
+    String averagesOutputPath = parsedArgs.get("--tempDir");
 
     JobConf prefsToDiffsJobConf = prepareJobConf(prefsFile,
                                                  averagesOutputPath,