You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2009/12/13 11:01:47 UTC
svn commit: r890039 - in
/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop:
AbstractJob.java item/RecommenderJob.java item/RecommenderMapper.java
pseudo/RecommenderJob.java slopeone/SlopeOneAverageDiffsJob.java
Author: srowen
Date: Sun Dec 13 10:01:46 2009
New Revision: 890039
URL: http://svn.apache.org/viewvc?rev=890039&view=rev
Log:
Add default param values, usersFile arg
Modified:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/AbstractJob.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/AbstractJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/AbstractJob.java?rev=890039&r1=890038&r2=890039&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/AbstractJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/AbstractJob.java Sun Dec 13 10:01:46 2009
@@ -17,6 +17,7 @@
package org.apache.mahout.cf.taste.hadoop;
+import org.apache.commons.cli2.Argument;
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
@@ -63,19 +64,35 @@
this.configuration = configuration;
}
- protected static Option buildOption(String name, String shortName, String description, boolean required) {
+ protected static Option buildOption(String name, String shortName, String description) {
+ return buildOption(name, shortName, description, true, null);
+ }
+
+ protected static Option buildOption(String name, String shortName, String description, String defaultValue) {
+ return buildOption(name, shortName, description, false, defaultValue);
+ }
+
+ private static Option buildOption(String name,
+ String shortName,
+ String description,
+ boolean required,
+ String defaultValue) {
+ ArgumentBuilder argBuilder = new ArgumentBuilder().withName(name).withMinimum(1).withMaximum(1);
+ if (defaultValue != null) {
+ argBuilder = argBuilder.withDefault(defaultValue);
+ }
+ Argument arg = argBuilder.create();
return new DefaultOptionBuilder().withLongName(name).withRequired(required)
- .withShortName(shortName).withArgument(new ArgumentBuilder().withName(name).withMinimum(1)
- .withMaximum(1).create()).withDescription(description).create();
+ .withShortName(shortName).withArgument(arg).withDescription(description).create();
}
- protected static Map<String,Object> parseArguments(String[] args, Option... extraOpts) {
+ protected static Map<String,String> parseArguments(String[] args, Option... extraOpts) {
Option inputOpt = DefaultOptionCreator.inputOption().create();
- Option tempDirOpt = buildOption("tempDir", "t", "Intermediate output directory", false);
+ Option tempDirOpt = buildOption("tempDir", "t", "Intermediate output directory", "temp");
Option outputOpt = DefaultOptionCreator.outputOption().create();
Option helpOpt = DefaultOptionCreator.helpOption();
- Option jarFileOpt = buildOption("jarFile", "m", "Implementation jar", true);
+ Option jarFileOpt = buildOption("jarFile", "m", "Implementation jar");
GroupBuilder gBuilder = new GroupBuilder().withName("Options")
.withOption(inputOpt)
@@ -106,19 +123,26 @@
return null;
}
- Map<String,Object> result = new HashMap<String,Object>();
- result.put(inputOpt.getPreferredName(), cmdLine.getValue(inputOpt));
- result.put(tempDirOpt.getPreferredName(), cmdLine.getValue(tempDirOpt));
- result.put(outputOpt.getPreferredName(), cmdLine.getValue(outputOpt));
- result.put(helpOpt.getPreferredName(), cmdLine.getValue(helpOpt));
- result.put(jarFileOpt.getPreferredName(), cmdLine.getValue(jarFileOpt));
+ Map<String,String> result = new HashMap<String,String>();
+ maybePut(result, cmdLine, inputOpt);
+ maybePut(result, cmdLine, tempDirOpt);
+ maybePut(result, cmdLine, outputOpt);
+ maybePut(result, cmdLine, helpOpt);
+ maybePut(result, cmdLine, jarFileOpt);
for (Option opt : extraOpts) {
- result.put(opt.getPreferredName(), cmdLine.getValue(opt));
+ maybePut(result, cmdLine, opt);
}
return result;
}
+ private static void maybePut(Map<String,String> args, CommandLine cmdLine, Option opt) {
+ Object value = cmdLine.getValue(opt);
+ if (value != null) {
+ args.put(opt.getPreferredName(), value.toString());
+ }
+ }
+
protected static JobConf prepareJobConf(String inputPath,
String outputPath,
String jarFile,
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java?rev=890039&r1=890038&r2=890039&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java Sun Dec 13 10:01:46 2009
@@ -61,15 +61,18 @@
@Override
public int run(String[] args) throws IOException {
- Option numReccomendationsOpt = buildOption("numRecommendations", "n", "Number of recommendations per user", true);
+ Option numReccomendationsOpt = buildOption("numRecommendations", "n", "Number of recommendations per user", "10");
+ Option usersFileOpt = buildOption("usersFile", "n", "Number of recommendations per user", null);
- Map<String,Object> parsedArgs = parseArguments(args, numReccomendationsOpt);
+ Map<String,String> parsedArgs = parseArguments(args, numReccomendationsOpt, usersFileOpt);
+
+ String inputPath = parsedArgs.get("--input");
+ String tempDirPath = parsedArgs.get("--tempDir");
+ String outputPath = parsedArgs.get("--output");
+ String jarFile = parsedArgs.get("--jarFile");
+ int recommendationsPerUser = Integer.parseInt(parsedArgs.get("--numRecommendations"));
+ String usersFile = parsedArgs.get("--usersFile");
- String inputPath = parsedArgs.get("--input").toString();
- String tempDirPath = parsedArgs.get("--tempDir").toString();
- String outputPath = parsedArgs.get("--output").toString();
- String jarFile = parsedArgs.get("--jarFile").toString();
- int recommendationsPerUser = Integer.parseInt((String) parsedArgs.get("--numRecommendations"));
String userVectorPath = tempDirPath + "/userVectors";
String itemIDIndexPath = tempDirPath + "/itemIDIndex";
String cooccurrencePath = tempDirPath + "/cooccurrence";
@@ -127,6 +130,7 @@
recommenderConf.set(RecommenderMapper.COOCCURRENCE_PATH, cooccurrencePath);
recommenderConf.set(RecommenderMapper.ITEMID_INDEX_PATH, itemIDIndexPath);
recommenderConf.setInt(RecommenderMapper.RECOMMENDATIONS_PER_USER, recommendationsPerUser);
+ recommenderConf.set(RecommenderMapper.USERS_FILE, usersFile);
recommenderConf.setClass("mapred.output.compression.codec", GzipCodec.class, CompressionCodec.class);
JobClient.runJob(recommenderConf);
return 0;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderMapper.java?rev=890039&r1=890038&r2=890039&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderMapper.java Sun Dec 13 10:01:46 2009
@@ -18,6 +18,7 @@
package org.apache.mahout.cf.taste.hadoop.item;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
@@ -31,13 +32,16 @@
import org.apache.mahout.cf.taste.hadoop.MapFilesMap;
import org.apache.mahout.cf.taste.hadoop.RecommendedItemsWritable;
import org.apache.mahout.cf.taste.impl.common.Cache;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
import org.apache.mahout.cf.taste.impl.common.Retriever;
import org.apache.mahout.cf.taste.impl.recommender.GenericRecommendedItem;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.common.FileLineIterable;
import org.apache.mahout.matrix.SparseVector;
import org.apache.mahout.matrix.Vector;
import java.io.IOException;
+import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
@@ -52,11 +56,13 @@
static final String COOCCURRENCE_PATH = "cooccurrencePath";
static final String ITEMID_INDEX_PATH = "itemIDIndexPath";
static final String RECOMMENDATIONS_PER_USER = "recommendationsPerUser";
+ static final String USERS_FILE = "usersFile";
private int recommendationsPerUser;
private MapFilesMap<IntWritable,LongWritable> indexItemIDMap;
private MapFilesMap<IntWritable,Vector> cooccurrenceColumnMap;
private Cache<IntWritable,Vector> cooccurrenceColumnCache;
+ private FastIDSet usersToRecommendFor;
@Override
public void configure(JobConf jobConf) {
@@ -67,6 +73,17 @@
recommendationsPerUser = jobConf.getInt(RECOMMENDATIONS_PER_USER, 10);
indexItemIDMap = new MapFilesMap<IntWritable,LongWritable>(fs, itemIDIndexPath, new Configuration());
cooccurrenceColumnMap = new MapFilesMap<IntWritable,Vector>(fs, cooccurrencePath, new Configuration());
+ String usersFilePathString = jobConf.get(USERS_FILE);
+ if (usersFilePathString == null) {
+ usersToRecommendFor = null;
+ } else {
+ usersToRecommendFor = new FastIDSet();
+ Path usersFilePath = new Path(usersFilePathString).makeQualified(fs);
+ FSDataInputStream in = fs.open(usersFilePath);
+ for (String line : new FileLineIterable(in)) {
+ usersToRecommendFor.add(Long.parseLong(line));
+ }
+ }
} catch (IOException ioe) {
throw new IllegalStateException(ioe);
}
@@ -79,6 +96,10 @@
OutputCollector<LongWritable, RecommendedItemsWritable> output,
Reporter reporter) throws IOException {
+ if (usersToRecommendFor != null && !usersToRecommendFor.contains(userID.get())) {
+ return;
+ }
+
Iterator<Vector.Element> userVectorIterator = userVector.iterateNonZero();
Vector recommendationVector = new SparseVector(Integer.MAX_VALUE, 1000);
while (userVectorIterator.hasNext()) {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java?rev=890039&r1=890038&r2=890039&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java Sun Dec 13 10:01:46 2009
@@ -93,16 +93,16 @@
public int run(String[] args) throws IOException {
Option recommendClassOpt =
- buildOption("recommenderClassName", "r", "Name of recommender class to instantiate", true);
+ buildOption("recommenderClassName", "r", "Name of recommender class to instantiate");
Option numReccomendationsOpt =
- buildOption("numRecommendations", "n", "Number of recommendations per user", true);
+ buildOption("numRecommendations", "n", "Number of recommendations per user");
- Map<String,Object> parsedArgs = parseArguments(args, recommendClassOpt, numReccomendationsOpt);
- String inputFile = parsedArgs.get("--input").toString();
- String outputPath = parsedArgs.get("--output").toString();
- String jarFile = parsedArgs.get("--jarFile").toString();
+ Map<String,String> parsedArgs = parseArguments(args, recommendClassOpt, numReccomendationsOpt);
+ String inputFile = parsedArgs.get("--input");
+ String outputPath = parsedArgs.get("--output");
+ String jarFile = parsedArgs.get("--jarFile");
- String recommendClassName = parsedArgs.get("--recommenderClassName").toString();
+ String recommendClassName = parsedArgs.get("--recommenderClassName");
int recommendationsPerUser = Integer.parseInt((String) parsedArgs.get("--numRecommendations"));
JobConf jobConf = prepareJobConf(inputFile,
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java?rev=890039&r1=890038&r2=890039&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java Sun Dec 13 10:01:46 2009
@@ -42,12 +42,12 @@
@Override
public int run(String[] args) throws IOException {
- Map<String,Object> parsedArgs = parseArguments(args);
+ Map<String,String> parsedArgs = parseArguments(args);
- String prefsFile = parsedArgs.get("--input").toString();
- String outputPath = parsedArgs.get("--output").toString();
- String jarFile = parsedArgs.get("--jarFile").toString();
- String averagesOutputPath = parsedArgs.get("--tempDir").toString();
+ String prefsFile = parsedArgs.get("--input");
+ String outputPath = parsedArgs.get("--output");
+ String jarFile = parsedArgs.get("--jarFile");
+ String averagesOutputPath = parsedArgs.get("--tempDir");
JobConf prefsToDiffsJobConf = prepareJobConf(prefsFile,
averagesOutputPath,