You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2009/12/04 20:15:31 UTC
svn commit: r887326 - in
/lucene/mahout/trunk/core/src/main/java/org/apache/mahout: cf/taste/hadoop/
cf/taste/hadoop/item/ cf/taste/hadoop/pseudo/ cf/taste/hadoop/slopeone/
common/commandline/
Author: srowen
Date: Fri Dec 4 19:15:31 2009
New Revision: 887326
URL: http://svn.apache.org/viewvc?rev=887326&view=rev
Log:
Refactored argument and conf handling in CF Hadoop jobs.
Added:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/AbstractJob.java
Modified:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/AbstractJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/AbstractJob.java?rev=887326&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/AbstractJob.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/AbstractJob.java Fri Dec 4 19:15:31 2009
@@ -0,0 +1,149 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop;
+
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.cli2.CommandLine;
+import org.apache.commons.cli2.Group;
+import org.apache.commons.cli2.Option;
+import org.apache.commons.cli2.OptionException;
+import org.apache.commons.cli2.builder.ArgumentBuilder;
+import org.apache.commons.cli2.builder.DefaultOptionBuilder;
+import org.apache.commons.cli2.builder.GroupBuilder;
+import org.apache.commons.cli2.commandline.Parser;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputFormat;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.mahout.common.CommandLineUtil;
+import org.apache.mahout.common.commandline.DefaultOptionCreator;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+public abstract class AbstractJob {
+
+ private static final Logger log = LoggerFactory.getLogger(AbstractJob.class);
+
+ protected static Option buildOption(String name, String shortName, String description) {
+ return new DefaultOptionBuilder().withLongName(name).withRequired(true)
+ .withShortName(shortName).withArgument(new ArgumentBuilder().withName(name).withMinimum(1)
+ .withMaximum(1).create()).withDescription(description).create();
+ }
+
+ protected static Map<String,Object> parseArguments(String[] args, Option... extraOpts) {
+
+ DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
+ ArgumentBuilder abuilder = new ArgumentBuilder();
+
+ Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder).create();
+ Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder).create();
+ Option helpOpt = DefaultOptionCreator.helpOption(obuilder);
+ Option jarFileOpt = buildOption("jarFile", "m", "Implementation jar");
+
+ GroupBuilder gBuilder = new GroupBuilder().withName("Options");
+ gBuilder = gBuilder.withOption(inputOpt);
+ gBuilder = gBuilder.withOption(outputOpt);
+ gBuilder = gBuilder.withOption(helpOpt);
+ gBuilder = gBuilder.withOption(jarFileOpt);
+
+ for (Option opt : extraOpts) {
+ gBuilder = gBuilder.withOption(opt);
+ }
+
+ Group group = gBuilder.create();
+
+ CommandLine cmdLine;
+ try {
+ Parser parser = new Parser();
+ parser.setGroup(group);
+ cmdLine = parser.parse(args);
+ } catch (OptionException e) {
+ log.error(e.getMessage());
+ CommandLineUtil.printHelp(group);
+ return null;
+ }
+
+ if (cmdLine.hasOption(helpOpt)) {
+ CommandLineUtil.printHelp(group);
+ return null;
+ }
+
+ Map<String,Object> result = new HashMap<String,Object>();
+ result.put(inputOpt.getPreferredName(), cmdLine.getValue(inputOpt));
+ result.put(outputOpt.getPreferredName(), cmdLine.getValue(outputOpt));
+ result.put(helpOpt.getPreferredName(), cmdLine.getValue(helpOpt));
+ result.put(jarFileOpt.getPreferredName(), cmdLine.getValue(jarFileOpt));
+ for (Option opt : extraOpts) {
+ result.put(opt.getPreferredName(), cmdLine.getValue(opt));
+ }
+
+ return result;
+ }
+
+ protected static JobConf prepareJobConf(String inputPath,
+ String outputPath,
+ String jarFile,
+ Class<? extends InputFormat> inputFormat,
+ Class<? extends Mapper> mapper,
+ Class<? extends Writable> mapperKey,
+ Class<? extends Writable> mapperValue,
+ Class<? extends Reducer> reducer,
+ Class<? extends Writable> reducerKey,
+ Class<? extends Writable> reducerValue,
+ Class<? extends OutputFormat> outputFormat) throws IOException {
+
+ JobConf jobConf = new JobConf();
+ FileSystem fs = FileSystem.get(jobConf);
+
+ Path inputPathPath = new Path(inputPath).makeQualified(fs);
+ Path outputPathPath = new Path(outputPath).makeQualified(fs);
+
+ if (fs.exists(outputPathPath)) {
+ fs.delete(outputPathPath, true);
+ }
+
+ jobConf.set("mapred.jar", jarFile);
+ jobConf.setJar(jarFile);
+
+ jobConf.setClass("mapred.input.format.class", inputFormat, InputFormat.class);
+ jobConf.set("mapred.input.dir", StringUtils.escapeString(inputPathPath.toString()));
+
+ jobConf.setClass("mapred.mapper.class", mapper, Mapper.class);
+ jobConf.setClass("mapred.mapoutput.key.class", mapperKey, Writable.class);
+ jobConf.setClass("mapred.mapoutput.value.class", mapperValue, Writable.class);
+
+ jobConf.setClass("mapred.reducer.class", reducer, Reducer.class);
+ jobConf.setClass("mapred.output.key.class", reducerKey, Writable.class);
+ jobConf.setClass("mapred.output.value.class", reducerValue, Writable.class);
+
+ jobConf.setClass("mapred.output.format.class", outputFormat, OutputFormat.class);
+ jobConf.set("mapred.output.dir", StringUtils.escapeString(outputPathPath.toString()));
+
+ return jobConf;
+ }
+
+}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java?rev=887326&r1=887325&r2=887326&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java Fri Dec 4 19:15:31 2009
@@ -17,7 +17,9 @@
package org.apache.mahout.cf.taste.hadoop.item;
-public final class RecommenderJob {
+import org.apache.mahout.cf.taste.hadoop.AbstractJob;
+
+public final class RecommenderJob extends AbstractJob {
public static void main(String[] args) {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java?rev=887326&r1=887325&r2=887326&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java Fri Dec 4 19:15:31 2009
@@ -17,35 +17,18 @@
package org.apache.mahout.cf.taste.hadoop.pseudo;
-import org.apache.commons.cli2.CommandLine;
-import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
-import org.apache.commons.cli2.OptionException;
-import org.apache.commons.cli2.builder.ArgumentBuilder;
-import org.apache.commons.cli2.builder.DefaultOptionBuilder;
-import org.apache.commons.cli2.builder.GroupBuilder;
-import org.apache.commons.cli2.commandline.Parser;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputFormat;
-import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.mapred.lib.IdentityReducer;
-import org.apache.hadoop.util.StringUtils;
+import org.apache.mahout.cf.taste.hadoop.AbstractJob;
import org.apache.mahout.cf.taste.hadoop.RecommendedItemsWritable;
import org.apache.mahout.cf.taste.recommender.Recommender;
-import org.apache.mahout.common.CommandLineUtil;
-import org.apache.mahout.common.commandline.DefaultOptionCreator;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import java.io.IOException;
+import java.util.Map;
/**
* <p>This job runs a "pseudo-distributed" recommendation process on Hadoop.
@@ -73,7 +56,7 @@
*
* {@code
* --recommenderClassName org.apache.mahout.cf.taste.impl.recommender.slopeone.SlopeOneRecommender
- * --userRec 10 --userIdFile path/to/users.txt --dataModelFile path/to/data.csv
+ * --numRecommendations 10 --input path/to/users.txt --dataModelFile path/to/data.csv
* --output path/to/reducerOutputDir --jarFile recommender.jar
* }
*
@@ -111,114 +94,48 @@
* {@code
* hadoop jar recommender.jar org.apache.mahout.cf.taste.hadoop.RecommenderJob \
* --recommenderClassName your.project.Recommender \
- * --userRec 10 --userIdFile input/users.txt --dataModelFile input/input.csv \
+ * --numRecommendations 10 --input input/users.txt --dataModelFile input/input.csv \
* --output output --jarFile recommender.jar
* }
*/
-public final class RecommenderJob {
-
- private static final Logger log = LoggerFactory.getLogger(RecommenderJob.class);
+public final class RecommenderJob extends AbstractJob {
private RecommenderJob() {
}
public static void main(String[] args) throws Exception {
- DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
- ArgumentBuilder abuilder = new ArgumentBuilder();
- GroupBuilder gbuilder = new GroupBuilder();
-
- Option recommendClassOpt = obuilder.withLongName("recommenderClassName").withRequired(true)
- .withShortName("r").withArgument(abuilder.withName("recommenderClassName").withMinimum(1)
- .withMaximum(1).create()).withDescription("Name of recommender class to use.").create();
-
- Option userRecommendOpt = obuilder.withLongName("userRec").withRequired(true)
- .withShortName("n").withArgument(abuilder.withName("userRec").withMinimum(1)
- .withMaximum(1).create()).withDescription("Desired number of recommendations per user.").create();
-
- Option userIDFileOpt = obuilder.withLongName("userIdFile").withRequired(true)
- .withShortName("f").withArgument(abuilder.withName("userIdFile").withMinimum(1)
- .withMaximum(1).create()).withDescription("File containing user ids.").create();
-
- Option dataModelFileOpt = obuilder.withLongName("dataModelFile").withRequired(true)
- .withShortName("m").withArgument(abuilder.withName("dataModelFile").withMinimum(1)
- .withMaximum(1).create()).withDescription("File containing data model.").create();
-
- Option jarFileOpt = obuilder.withLongName("jarFile").withRequired(true)
- .withShortName("m").withArgument(abuilder.withName("jarFile").withMinimum(1)
- .withMaximum(1).create()).withDescription("Implementation jar.").create();
-
- Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder).create();
- Option helpOpt = DefaultOptionCreator.helpOption(obuilder);
-
- Group group = gbuilder.withName("Options").withOption(recommendClassOpt).withOption(userRecommendOpt)
- .withOption(userIDFileOpt).withOption(dataModelFileOpt).withOption(outputOpt)
- .withOption(jarFileOpt).withOption(helpOpt).create();
-
-
- try {
- Parser parser = new Parser();
- parser.setGroup(group);
- CommandLine cmdLine = parser.parse(args);
-
- if (cmdLine.hasOption(helpOpt)) {
- CommandLineUtil.printHelp(group);
- return;
- }
-
- String recommendClassName = cmdLine.getValue(recommendClassOpt).toString();
- int recommendationsPerUser = Integer.parseInt(cmdLine.getValue(userRecommendOpt).toString());
- String userIDFile = cmdLine.getValue(userIDFileOpt).toString();
- String dataModelFile = cmdLine.getValue(dataModelFileOpt).toString();
- String jarFile = cmdLine.getValue(jarFileOpt).toString();
- String outputPath = cmdLine.getValue(outputOpt).toString();
- JobConf jobConf =
- buildJobConf(recommendClassName, recommendationsPerUser, userIDFile, dataModelFile, jarFile, outputPath);
- JobClient.runJob(jobConf);
- } catch (OptionException e) {
- log.error(e.getMessage());
- CommandLineUtil.printHelp(group);
- }
- }
-
- public static JobConf buildJobConf(String recommendClassName,
- int recommendationsPerUser,
- String userIDFile,
- String dataModelFile,
- String jarFile,
- String outputPath) throws IOException {
-
- JobConf jobConf = new JobConf();
- FileSystem fs = FileSystem.get(jobConf);
-
- Path userIDFilePath = new Path(userIDFile).makeQualified(fs);
- Path outputPathPath = new Path(outputPath).makeQualified(fs);
-
- if (fs.exists(outputPathPath)) {
- fs.delete(outputPathPath, true);
- }
- jobConf.set("mapred.jar", jarFile);
- jobConf.setJar(jarFile);
+ Option recommendClassOpt = buildOption("recommenderClassName", "r", "Name of recommender class to instantiate");
+ Option numReccomendationsOpt = buildOption("numRecommendations", "n", "Number of recommendations per user");
+ Option dataModelFileOpt = buildOption("dataModelFile", "m", "File containing preference data");
+
+ Map<String,Object> parsedArgs = parseArguments(args, recommendClassOpt, numReccomendationsOpt, dataModelFileOpt);
+ String userIDFile = parsedArgs.get("input").toString();
+ String outputPath = parsedArgs.get("output").toString();
+ String jarFile = parsedArgs.get("jarFile").toString();
+
+ String recommendClassName = parsedArgs.get("recommenderClassName").toString();
+ int recommendationsPerUser = ((Number) parsedArgs.get("numRecommendations")).intValue();
+ String dataModelFile = parsedArgs.get("dataModelFile").toString();
+
+ JobConf jobConf = prepareJobConf(userIDFile,
+ outputPath,
+ jarFile,
+ TextInputFormat.class,
+ RecommenderMapper.class,
+ LongWritable.class,
+ RecommendedItemsWritable.class,
+ IdentityReducer.class,
+ LongWritable.class,
+ RecommendedItemsWritable.class,
+ TextOutputFormat.class);
jobConf.set(RecommenderMapper.RECOMMENDER_CLASS_NAME, recommendClassName);
jobConf.set(RecommenderMapper.RECOMMENDATIONS_PER_USER, String.valueOf(recommendationsPerUser));
jobConf.set(RecommenderMapper.DATA_MODEL_FILE, dataModelFile);
- jobConf.setClass("mapred.input.format.class", TextInputFormat.class, InputFormat.class);
- jobConf.set("mapred.input.dir", StringUtils.escapeString(userIDFilePath.toString()));
-
- jobConf.setClass("mapred.mapper.class", RecommenderMapper.class, Mapper.class);
- jobConf.setClass("mapred.mapoutput.key.class", LongWritable.class, Object.class);
- jobConf.setClass("mapred.mapoutput.value.class", RecommendedItemsWritable.class, Object.class);
-
- jobConf.setClass("mapred.reducer.class", IdentityReducer.class, Reducer.class);
- jobConf.setClass("mapred.output.key.class", LongWritable.class, Object.class);
- jobConf.setClass("mapred.output.value.class", RecommendedItemsWritable.class, Object.class);
-
- jobConf.setClass("mapred.output.format.class", TextOutputFormat.class, OutputFormat.class);
- jobConf.set("mapred.output.dir", StringUtils.escapeString(outputPathPath.toString()));
+ JobClient.runJob(jobConf);
- return jobConf;
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java?rev=887326&r1=887325&r2=887326&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java Fri Dec 4 19:15:31 2009
@@ -17,161 +17,61 @@
package org.apache.mahout.cf.taste.hadoop.slopeone;
-import org.apache.commons.cli2.CommandLine;
-import org.apache.commons.cli2.Group;
-import org.apache.commons.cli2.Option;
-import org.apache.commons.cli2.OptionException;
-import org.apache.commons.cli2.builder.ArgumentBuilder;
-import org.apache.commons.cli2.builder.DefaultOptionBuilder;
-import org.apache.commons.cli2.builder.GroupBuilder;
-import org.apache.commons.cli2.commandline.Parser;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputFormat;
-import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.mapred.lib.IdentityMapper;
-import org.apache.hadoop.util.StringUtils;
+import org.apache.mahout.cf.taste.hadoop.AbstractJob;
import org.apache.mahout.cf.taste.hadoop.ItemItemWritable;
import org.apache.mahout.cf.taste.hadoop.ItemPrefWritable;
import org.apache.mahout.cf.taste.hadoop.ToItemPrefsMapper;
-import org.apache.mahout.common.CommandLineUtil;
-import org.apache.mahout.common.commandline.DefaultOptionCreator;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import java.io.IOException;
+import java.util.Map;
-public final class SlopeOneAverageDiffsJob {
-
- /** Logger for this class. */
- private static final Logger log = LoggerFactory.getLogger(SlopeOneAverageDiffsJob.class);
+public final class SlopeOneAverageDiffsJob extends AbstractJob {
private SlopeOneAverageDiffsJob() {
}
public static void main(String[] args) throws Exception {
- DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
- ArgumentBuilder abuilder = new ArgumentBuilder();
- GroupBuilder gbuilder = new GroupBuilder();
-
- Option inputOpt = obuilder.withLongName("input").withRequired(true).withShortName("i")
- .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create())
- .withDescription("The Path for input preferences file.").create();
- Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder).create();
- Option helpOpt = DefaultOptionCreator.helpOption(obuilder);
-
- Option jarFileOpt = obuilder.withLongName("jarFile").withRequired(true)
- .withShortName("m").withArgument(abuilder.withName("jarFile").withMinimum(1)
- .withMaximum(1).create()).withDescription("Implementation jar.").create();
-
- Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt)
- .withOption(jarFileOpt).withOption(helpOpt).create();
-
-
- CommandLine cmdLine;
- try {
- Parser parser = new Parser();
- parser.setGroup(group);
- cmdLine = parser.parse(args);
- } catch (OptionException e) {
- log.error(e.getMessage());
- CommandLineUtil.printHelp(group);
- return;
- }
-
- if (cmdLine.hasOption(helpOpt)) {
- CommandLineUtil.printHelp(group);
- return;
- }
- String prefsFile = cmdLine.getValue(inputOpt).toString();
- String outputPath = cmdLine.getValue(outputOpt).toString();
+ Map<String,Object> parsedArgs = parseArguments(args);
+
+ String prefsFile = parsedArgs.get("input").toString();
+ String outputPath = parsedArgs.get("output").toString();
+ String jarFile = parsedArgs.get("jarFile").toString();
String averagesOutputPath = outputPath + "/averages";
- String jarFile = cmdLine.getValue(jarFileOpt).toString();
- JobConf prefsToDiffsJobConf = buildPrefsToDiffsJobConf(prefsFile, averagesOutputPath, jarFile);
+ JobConf prefsToDiffsJobConf = prepareJobConf(prefsFile,
+ averagesOutputPath,
+ jarFile,
+ TextInputFormat.class,
+ ToItemPrefsMapper.class,
+ LongWritable.class,
+ ItemPrefWritable.class,
+ SlopeOnePrefsToDiffsReducer.class,
+ ItemItemWritable.class,
+ FloatWritable.class,
+ SequenceFileOutputFormat.class);
JobClient.runJob(prefsToDiffsJobConf);
- JobConf diffsToAveragesJobConf = buildDiffsToAveragesJobConf(averagesOutputPath, outputPath, jarFile);
+ JobConf diffsToAveragesJobConf = prepareJobConf(averagesOutputPath,
+ outputPath,
+ jarFile,
+ SequenceFileInputFormat.class,
+ IdentityMapper.class,
+ ItemItemWritable.class,
+ FloatWritable.class,
+ SlopeOneDiffsToAveragesReducer.class,
+ ItemItemWritable.class,
+ FloatWritable.class,
+ TextOutputFormat.class);
JobClient.runJob(diffsToAveragesJobConf);
}
- private static JobConf buildPrefsToDiffsJobConf(String inputPath,
- String outputPath,
- String jarFile) throws IOException {
-
- JobConf jobConf = new JobConf();
- FileSystem fs = FileSystem.get(jobConf);
-
- Path prefsFilePath = new Path(inputPath).makeQualified(fs);
- Path outputPathPath = new Path(outputPath).makeQualified(fs);
-
- if (fs.exists(outputPathPath)) {
- fs.delete(outputPathPath, true);
- }
-
- jobConf.set("mapred.jar", jarFile);
- jobConf.setJar(jarFile);
-
- jobConf.setClass("mapred.input.format.class", TextInputFormat.class, InputFormat.class);
- jobConf.set("mapred.input.dir", StringUtils.escapeString(prefsFilePath.toString()));
-
- jobConf.setClass("mapred.mapper.class", ToItemPrefsMapper.class, Mapper.class);
- jobConf.setClass("mapred.mapoutput.key.class", LongWritable.class, Object.class);
- jobConf.setClass("mapred.mapoutput.value.class", ItemPrefWritable.class, Object.class);
-
- jobConf.setClass("mapred.reducer.class", SlopeOnePrefsToDiffsReducer.class, Reducer.class);
- jobConf.setClass("mapred.output.key.class", ItemItemWritable.class, Object.class);
- jobConf.setClass("mapred.output.value.class", FloatWritable.class, Object.class);
-
- jobConf.setClass("mapred.output.format.class", SequenceFileOutputFormat.class, OutputFormat.class);
- jobConf.set("mapred.output.dir", StringUtils.escapeString(outputPathPath.toString()));
-
- return jobConf;
- }
-
- private static JobConf buildDiffsToAveragesJobConf(String inputPath,
- String outputPath,
- String jarFile) throws IOException {
-
- JobConf jobConf = new JobConf();
- FileSystem fs = FileSystem.get(jobConf);
-
- Path prefsFilePath = new Path(inputPath).makeQualified(fs);
- Path outputPathPath = new Path(outputPath).makeQualified(fs);
-
- if (fs.exists(outputPathPath)) {
- fs.delete(outputPathPath, true);
- }
-
- jobConf.set("mapred.jar", jarFile);
- jobConf.setJar(jarFile);
-
- jobConf.setClass("mapred.input.format.class", SequenceFileInputFormat.class, InputFormat.class);
- jobConf.set("mapred.input.dir", StringUtils.escapeString(prefsFilePath.toString()));
-
- jobConf.setClass("mapred.mapper.class", IdentityMapper.class, Mapper.class);
- jobConf.setClass("mapred.mapoutput.key.class", ItemItemWritable.class, Object.class);
- jobConf.setClass("mapred.mapoutput.value.class", FloatWritable.class, Object.class);
-
- jobConf.setClass("mapred.reducer.class", SlopeOneDiffsToAveragesReducer.class, Reducer.class);
- jobConf.setClass("mapred.output.key.class", ItemItemWritable.class, Object.class);
- jobConf.setClass("mapred.output.value.class", FloatWritable.class, Object.class);
-
- jobConf.setClass("mapred.output.format.class", TextOutputFormat.class, OutputFormat.class);
- jobConf.set("mapred.output.dir", StringUtils.escapeString(outputPathPath.toString()));
-
- return jobConf;
- }
-
}
\ No newline at end of file
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java?rev=887326&r1=887325&r2=887326&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java Fri Dec 4 19:15:31 2009
@@ -21,7 +21,8 @@
import org.apache.commons.cli2.builder.ArgumentBuilder;
import org.apache.commons.cli2.builder.DefaultOptionBuilder;
-public class DefaultOptionCreator {
+public final class DefaultOptionCreator {
+
private DefaultOptionCreator() {
}
@@ -29,8 +30,8 @@
* Returns a default command line option for convergence delta specification.
*/
public static DefaultOptionBuilder convergenceOption(
- final DefaultOptionBuilder obuilder, final ArgumentBuilder abuilder) {
- return obuilder.withLongName("convergencedelta")
+ DefaultOptionBuilder obuilder, ArgumentBuilder abuilder) {
+ return obuilder.withLongName("convergenceDelta")
.withRequired(true).withShortName("v").withArgument(
abuilder.withName("convergenceDelta").withMinimum(1).withMaximum(1)
.create()).withDescription("The convergence delta value.");
@@ -39,8 +40,8 @@
/**
* Returns a default command line option for output directory specification.
*/
- public static DefaultOptionBuilder outputOption(final DefaultOptionBuilder obuilder,
- final ArgumentBuilder abuilder) {
+ public static DefaultOptionBuilder outputOption(DefaultOptionBuilder obuilder,
+ ArgumentBuilder abuilder) {
return obuilder.withLongName("output").withRequired(true)
.withShortName("o").withArgument(
abuilder.withName("output").withMinimum(1).withMaximum(1).create())
@@ -50,8 +51,8 @@
/**
* Returns a default command line option for input directory specification.
*/
- public static DefaultOptionBuilder inputOption(final DefaultOptionBuilder obuilder,
- final ArgumentBuilder abuilder) {
+ public static DefaultOptionBuilder inputOption(DefaultOptionBuilder obuilder,
+ ArgumentBuilder abuilder) {
return obuilder
.withLongName("input")
.withRequired(true)
@@ -59,7 +60,7 @@
.withArgument(
abuilder.withName("input").withMinimum(1).withMaximum(1).create())
.withDescription(
- "The Path for input Vectors. Must be a SequenceFile of Writable, Vector.");
+ "Path to job input directory");
}
/**
@@ -74,7 +75,7 @@
.withArgument(
abuilder.withName("k").withMinimum(1).withMaximum(1).create())
.withDescription(
- "The k in k-Means. If specified, then a random selection of k Vectors will be chosen as the Centroid and written to the clusters output path.")
+ "The k in k-Means. k random Vectors will be chosen as the Centroid and written to the clusters output path.")
.withShortName("k");
}