You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2009/12/04 20:15:31 UTC

svn commit: r887326 - in /lucene/mahout/trunk/core/src/main/java/org/apache/mahout: cf/taste/hadoop/ cf/taste/hadoop/item/ cf/taste/hadoop/pseudo/ cf/taste/hadoop/slopeone/ common/commandline/

Author: srowen
Date: Fri Dec  4 19:15:31 2009
New Revision: 887326

URL: http://svn.apache.org/viewvc?rev=887326&view=rev
Log:
Refactored argument and conf handling in CF Hadoop jobs.

Added:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/AbstractJob.java
Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/AbstractJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/AbstractJob.java?rev=887326&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/AbstractJob.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/AbstractJob.java Fri Dec  4 19:15:31 2009
@@ -0,0 +1,149 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop;
+
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.cli2.CommandLine;
+import org.apache.commons.cli2.Group;
+import org.apache.commons.cli2.Option;
+import org.apache.commons.cli2.OptionException;
+import org.apache.commons.cli2.builder.ArgumentBuilder;
+import org.apache.commons.cli2.builder.DefaultOptionBuilder;
+import org.apache.commons.cli2.builder.GroupBuilder;
+import org.apache.commons.cli2.commandline.Parser;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputFormat;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.mahout.common.CommandLineUtil;
+import org.apache.mahout.common.commandline.DefaultOptionCreator;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+public abstract class AbstractJob {
+
+  private static final Logger log = LoggerFactory.getLogger(AbstractJob.class);
+
+  protected static Option buildOption(String name, String shortName, String description) {
+    return new DefaultOptionBuilder().withLongName(name).withRequired(true)
+      .withShortName(shortName).withArgument(new ArgumentBuilder().withName(name).withMinimum(1)
+      .withMaximum(1).create()).withDescription(description).create();
+  }
+
+  protected static Map<String,Object> parseArguments(String[] args, Option... extraOpts) {
+
+    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
+    ArgumentBuilder abuilder = new ArgumentBuilder();
+
+    Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder).create();
+    Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder).create();
+    Option helpOpt = DefaultOptionCreator.helpOption(obuilder);
+    Option jarFileOpt = buildOption("jarFile", "m", "Implementation jar");
+
+    GroupBuilder gBuilder = new GroupBuilder().withName("Options");
+    gBuilder = gBuilder.withOption(inputOpt);
+    gBuilder = gBuilder.withOption(outputOpt);
+    gBuilder = gBuilder.withOption(helpOpt);
+    gBuilder = gBuilder.withOption(jarFileOpt);
+
+    for (Option opt : extraOpts) {
+      gBuilder = gBuilder.withOption(opt);
+    }
+
+    Group group = gBuilder.create();
+
+    CommandLine cmdLine;
+    try {
+      Parser parser = new Parser();
+      parser.setGroup(group);
+      cmdLine = parser.parse(args);
+    } catch (OptionException e) {
+      log.error(e.getMessage());
+      CommandLineUtil.printHelp(group);
+      return null;
+    }
+
+    if (cmdLine.hasOption(helpOpt)) {
+      CommandLineUtil.printHelp(group);
+      return null;
+    }
+
+    Map<String,Object> result = new HashMap<String,Object>();
+    result.put(inputOpt.getPreferredName(), cmdLine.getValue(inputOpt));
+    result.put(outputOpt.getPreferredName(), cmdLine.getValue(outputOpt));
+    result.put(helpOpt.getPreferredName(), cmdLine.getValue(helpOpt));
+    result.put(jarFileOpt.getPreferredName(), cmdLine.getValue(jarFileOpt));
+    for (Option opt : extraOpts) {
+      result.put(opt.getPreferredName(), cmdLine.getValue(opt));
+    }
+
+    return result;    
+  }
+
+  protected static JobConf prepareJobConf(String inputPath,
+                                          String outputPath,
+                                          String jarFile,
+                                          Class<? extends InputFormat> inputFormat,
+                                          Class<? extends Mapper> mapper,
+                                          Class<? extends Writable> mapperKey,
+                                          Class<? extends Writable> mapperValue,
+                                          Class<? extends Reducer> reducer,
+                                          Class<? extends Writable> reducerKey,
+                                          Class<? extends Writable> reducerValue,
+                                          Class<? extends OutputFormat> outputFormat) throws IOException {
+
+    JobConf jobConf = new JobConf();
+    FileSystem fs = FileSystem.get(jobConf);
+
+    Path inputPathPath = new Path(inputPath).makeQualified(fs);
+    Path outputPathPath = new Path(outputPath).makeQualified(fs);
+
+    if (fs.exists(outputPathPath)) {
+      fs.delete(outputPathPath, true);
+    }
+
+    jobConf.set("mapred.jar", jarFile);
+    jobConf.setJar(jarFile);
+
+    jobConf.setClass("mapred.input.format.class", inputFormat, InputFormat.class);
+    jobConf.set("mapred.input.dir", StringUtils.escapeString(inputPathPath.toString()));
+
+    jobConf.setClass("mapred.mapper.class", mapper, Mapper.class);
+    jobConf.setClass("mapred.mapoutput.key.class", mapperKey, Writable.class);
+    jobConf.setClass("mapred.mapoutput.value.class", mapperValue, Writable.class);
+
+    jobConf.setClass("mapred.reducer.class", reducer, Reducer.class);
+    jobConf.setClass("mapred.output.key.class", reducerKey, Writable.class);
+    jobConf.setClass("mapred.output.value.class", reducerValue, Writable.class);
+
+    jobConf.setClass("mapred.output.format.class", outputFormat, OutputFormat.class);
+    jobConf.set("mapred.output.dir", StringUtils.escapeString(outputPathPath.toString()));
+
+    return jobConf;
+  }
+
+}

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java?rev=887326&r1=887325&r2=887326&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java Fri Dec  4 19:15:31 2009
@@ -17,7 +17,9 @@
 
 package org.apache.mahout.cf.taste.hadoop.item;
 
-public final class RecommenderJob {
+import org.apache.mahout.cf.taste.hadoop.AbstractJob;
+
+public final class RecommenderJob extends AbstractJob {
 
   public static void main(String[] args) {
     

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java?rev=887326&r1=887325&r2=887326&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java Fri Dec  4 19:15:31 2009
@@ -17,35 +17,18 @@
 
 package org.apache.mahout.cf.taste.hadoop.pseudo;
 
-import org.apache.commons.cli2.CommandLine;
-import org.apache.commons.cli2.Group;
 import org.apache.commons.cli2.Option;
-import org.apache.commons.cli2.OptionException;
-import org.apache.commons.cli2.builder.ArgumentBuilder;
-import org.apache.commons.cli2.builder.DefaultOptionBuilder;
-import org.apache.commons.cli2.builder.GroupBuilder;
-import org.apache.commons.cli2.commandline.Parser;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.mapred.InputFormat;
 import org.apache.hadoop.mapred.JobClient;
 import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputFormat;
-import org.apache.hadoop.mapred.Reducer;
 import org.apache.hadoop.mapred.TextInputFormat;
 import org.apache.hadoop.mapred.TextOutputFormat;
 import org.apache.hadoop.mapred.lib.IdentityReducer;
-import org.apache.hadoop.util.StringUtils;
+import org.apache.mahout.cf.taste.hadoop.AbstractJob;
 import org.apache.mahout.cf.taste.hadoop.RecommendedItemsWritable;
 import org.apache.mahout.cf.taste.recommender.Recommender;
-import org.apache.mahout.common.CommandLineUtil;
-import org.apache.mahout.common.commandline.DefaultOptionCreator;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
-import java.io.IOException;
+import java.util.Map;
 
 /**
  * <p>This job runs a "pseudo-distributed" recommendation process on Hadoop.
@@ -73,7 +56,7 @@
  *
  * {@code
  * --recommenderClassName org.apache.mahout.cf.taste.impl.recommender.slopeone.SlopeOneRecommender
- * --userRec 10 --userIdFile path/to/users.txt --dataModelFile path/to/data.csv
+ * --numRecommendations 10 --input path/to/users.txt --dataModelFile path/to/data.csv
  * --output path/to/reducerOutputDir --jarFile recommender.jar
  * }
  *
@@ -111,114 +94,48 @@
  * {@code
  * hadoop jar recommender.jar org.apache.mahout.cf.taste.hadoop.RecommenderJob \
  *   --recommenderClassName your.project.Recommender \
- *   --userRec 10 --userIdFile input/users.txt --dataModelFile input/input.csv \
+ *   --numRecommendations 10 --input input/users.txt --dataModelFile input/input.csv \
  *   --output output --jarFile recommender.jar
  * }
  */
-public final class RecommenderJob {
-
-  private static final Logger log = LoggerFactory.getLogger(RecommenderJob.class);
+public final class RecommenderJob extends AbstractJob {
 
   private RecommenderJob() {
   }
 
   public static void main(String[] args) throws Exception {
-    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
-    ArgumentBuilder abuilder = new ArgumentBuilder();
-    GroupBuilder gbuilder = new GroupBuilder();
-
-    Option recommendClassOpt = obuilder.withLongName("recommenderClassName").withRequired(true)
-      .withShortName("r").withArgument(abuilder.withName("recommenderClassName").withMinimum(1)
-      .withMaximum(1).create()).withDescription("Name of recommender class to use.").create();
-
-    Option userRecommendOpt = obuilder.withLongName("userRec").withRequired(true)
-      .withShortName("n").withArgument(abuilder.withName("userRec").withMinimum(1)
-      .withMaximum(1).create()).withDescription("Desired number of recommendations per user.").create();
-
-    Option userIDFileOpt = obuilder.withLongName("userIdFile").withRequired(true)
-      .withShortName("f").withArgument(abuilder.withName("userIdFile").withMinimum(1)
-      .withMaximum(1).create()).withDescription("File containing user ids.").create();
-
-    Option dataModelFileOpt = obuilder.withLongName("dataModelFile").withRequired(true)
-      .withShortName("m").withArgument(abuilder.withName("dataModelFile").withMinimum(1)
-      .withMaximum(1).create()).withDescription("File containing data model.").create();
-
-    Option jarFileOpt = obuilder.withLongName("jarFile").withRequired(true)
-      .withShortName("m").withArgument(abuilder.withName("jarFile").withMinimum(1)
-      .withMaximum(1).create()).withDescription("Implementation jar.").create();
-
-    Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder).create();
-    Option helpOpt = DefaultOptionCreator.helpOption(obuilder);
-
-    Group group = gbuilder.withName("Options").withOption(recommendClassOpt).withOption(userRecommendOpt)
-      .withOption(userIDFileOpt).withOption(dataModelFileOpt).withOption(outputOpt)
-      .withOption(jarFileOpt).withOption(helpOpt).create();
-
-
-    try {
-      Parser parser = new Parser();
-      parser.setGroup(group);
-      CommandLine cmdLine = parser.parse(args);
-
-      if (cmdLine.hasOption(helpOpt)) {
-        CommandLineUtil.printHelp(group);
-        return;
-      }
-
-      String recommendClassName = cmdLine.getValue(recommendClassOpt).toString();
-      int recommendationsPerUser = Integer.parseInt(cmdLine.getValue(userRecommendOpt).toString());
-      String userIDFile = cmdLine.getValue(userIDFileOpt).toString();
-      String dataModelFile = cmdLine.getValue(dataModelFileOpt).toString();
-      String jarFile = cmdLine.getValue(jarFileOpt).toString();
-      String outputPath = cmdLine.getValue(outputOpt).toString();
-      JobConf jobConf =
-          buildJobConf(recommendClassName, recommendationsPerUser, userIDFile, dataModelFile, jarFile, outputPath);
-      JobClient.runJob(jobConf);
-    } catch (OptionException e) {
-      log.error(e.getMessage());
-      CommandLineUtil.printHelp(group);
-    }
-  }
-
-  public static JobConf buildJobConf(String recommendClassName,
-                                     int recommendationsPerUser,
-                                     String userIDFile,
-                                     String dataModelFile,
-                                     String jarFile,
-                                     String outputPath) throws IOException {
-
-    JobConf jobConf = new JobConf();
-    FileSystem fs = FileSystem.get(jobConf);
-
-    Path userIDFilePath = new Path(userIDFile).makeQualified(fs);
-    Path outputPathPath = new Path(outputPath).makeQualified(fs);
-
-    if (fs.exists(outputPathPath)) {
-      fs.delete(outputPathPath, true);
-    }
 
-    jobConf.set("mapred.jar", jarFile);
-    jobConf.setJar(jarFile);
+    Option recommendClassOpt = buildOption("recommenderClassName", "r", "Name of recommender class to instantiate");
+    Option numReccomendationsOpt = buildOption("numRecommendations", "n", "Number of recommendations per user");
+    Option dataModelFileOpt = buildOption("dataModelFile", "m", "File containing preference data");
+
+    Map<String,Object> parsedArgs = parseArguments(args, recommendClassOpt, numReccomendationsOpt, dataModelFileOpt);
+    String userIDFile = parsedArgs.get("input").toString();
+    String outputPath = parsedArgs.get("output").toString();
+    String jarFile = parsedArgs.get("jarFile").toString();
+
+    String recommendClassName = parsedArgs.get("recommenderClassName").toString();
+    int recommendationsPerUser = ((Number) parsedArgs.get("numRecommendations")).intValue();
+    String dataModelFile = parsedArgs.get("dataModelFile").toString();
+
+    JobConf jobConf = prepareJobConf(userIDFile,
+                                     outputPath,
+                                     jarFile,
+                                     TextInputFormat.class,
+                                     RecommenderMapper.class,
+                                     LongWritable.class,
+                                     RecommendedItemsWritable.class,
+                                     IdentityReducer.class,
+                                     LongWritable.class,
+                                     RecommendedItemsWritable.class,
+                                     TextOutputFormat.class);
 
     jobConf.set(RecommenderMapper.RECOMMENDER_CLASS_NAME, recommendClassName);
     jobConf.set(RecommenderMapper.RECOMMENDATIONS_PER_USER, String.valueOf(recommendationsPerUser));
     jobConf.set(RecommenderMapper.DATA_MODEL_FILE, dataModelFile);
 
-    jobConf.setClass("mapred.input.format.class", TextInputFormat.class, InputFormat.class);
-    jobConf.set("mapred.input.dir", StringUtils.escapeString(userIDFilePath.toString()));
-
-    jobConf.setClass("mapred.mapper.class", RecommenderMapper.class, Mapper.class);
-    jobConf.setClass("mapred.mapoutput.key.class", LongWritable.class, Object.class);
-    jobConf.setClass("mapred.mapoutput.value.class", RecommendedItemsWritable.class, Object.class);
-
-    jobConf.setClass("mapred.reducer.class", IdentityReducer.class, Reducer.class);
-    jobConf.setClass("mapred.output.key.class", LongWritable.class, Object.class);
-    jobConf.setClass("mapred.output.value.class", RecommendedItemsWritable.class, Object.class);
-
-    jobConf.setClass("mapred.output.format.class", TextOutputFormat.class, OutputFormat.class);
-    jobConf.set("mapred.output.dir", StringUtils.escapeString(outputPathPath.toString()));
+    JobClient.runJob(jobConf);
 
-    return jobConf;
   }
 
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java?rev=887326&r1=887325&r2=887326&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java Fri Dec  4 19:15:31 2009
@@ -17,161 +17,61 @@
 
 package org.apache.mahout.cf.taste.hadoop.slopeone;
 
-import org.apache.commons.cli2.CommandLine;
-import org.apache.commons.cli2.Group;
-import org.apache.commons.cli2.Option;
-import org.apache.commons.cli2.OptionException;
-import org.apache.commons.cli2.builder.ArgumentBuilder;
-import org.apache.commons.cli2.builder.DefaultOptionBuilder;
-import org.apache.commons.cli2.builder.GroupBuilder;
-import org.apache.commons.cli2.commandline.Parser;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.FloatWritable;
 import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.mapred.InputFormat;
 import org.apache.hadoop.mapred.JobClient;
 import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputFormat;
-import org.apache.hadoop.mapred.Reducer;
 import org.apache.hadoop.mapred.SequenceFileInputFormat;
 import org.apache.hadoop.mapred.SequenceFileOutputFormat;
 import org.apache.hadoop.mapred.TextInputFormat;
 import org.apache.hadoop.mapred.TextOutputFormat;
 import org.apache.hadoop.mapred.lib.IdentityMapper;
-import org.apache.hadoop.util.StringUtils;
+import org.apache.mahout.cf.taste.hadoop.AbstractJob;
 import org.apache.mahout.cf.taste.hadoop.ItemItemWritable;
 import org.apache.mahout.cf.taste.hadoop.ItemPrefWritable;
 import org.apache.mahout.cf.taste.hadoop.ToItemPrefsMapper;
-import org.apache.mahout.common.CommandLineUtil;
-import org.apache.mahout.common.commandline.DefaultOptionCreator;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
-import java.io.IOException;
+import java.util.Map;
 
-public final class SlopeOneAverageDiffsJob {
-
-  /** Logger for this class. */
-  private static final Logger log = LoggerFactory.getLogger(SlopeOneAverageDiffsJob.class);
+public final class SlopeOneAverageDiffsJob extends AbstractJob {
 
   private SlopeOneAverageDiffsJob() {
   }
 
   public static void main(String[] args) throws Exception {
-    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
-    ArgumentBuilder abuilder = new ArgumentBuilder();
-    GroupBuilder gbuilder = new GroupBuilder();
-
-    Option inputOpt = obuilder.withLongName("input").withRequired(true).withShortName("i")
-      .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create())
-      .withDescription("The Path for input preferences file.").create();
-    Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder).create();
-    Option helpOpt = DefaultOptionCreator.helpOption(obuilder);
-
-    Option jarFileOpt = obuilder.withLongName("jarFile").withRequired(true)
-      .withShortName("m").withArgument(abuilder.withName("jarFile").withMinimum(1)
-      .withMaximum(1).create()).withDescription("Implementation jar.").create();
-
-    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt)
-        .withOption(jarFileOpt).withOption(helpOpt).create();
-
-
-    CommandLine cmdLine;
-    try {
-      Parser parser = new Parser();
-      parser.setGroup(group);
-      cmdLine = parser.parse(args);
-    } catch (OptionException e) {
-      log.error(e.getMessage());
-      CommandLineUtil.printHelp(group);
-      return;
-    }
-
-    if (cmdLine.hasOption(helpOpt)) {
-      CommandLineUtil.printHelp(group);
-      return;
-    }
 
-    String prefsFile = cmdLine.getValue(inputOpt).toString();
-    String outputPath = cmdLine.getValue(outputOpt).toString();
+    Map<String,Object> parsedArgs = parseArguments(args);
+
+    String prefsFile = parsedArgs.get("input").toString();
+    String outputPath = parsedArgs.get("output").toString();
+    String jarFile = parsedArgs.get("jarFile").toString();
     String averagesOutputPath = outputPath + "/averages";
-    String jarFile = cmdLine.getValue(jarFileOpt).toString();
 
-    JobConf prefsToDiffsJobConf = buildPrefsToDiffsJobConf(prefsFile, averagesOutputPath, jarFile);
+    JobConf prefsToDiffsJobConf = prepareJobConf(prefsFile,
+                                                 averagesOutputPath,
+                                                 jarFile,
+                                                 TextInputFormat.class,
+                                                 ToItemPrefsMapper.class,
+                                                 LongWritable.class,
+                                                 ItemPrefWritable.class,
+                                                 SlopeOnePrefsToDiffsReducer.class,
+                                                 ItemItemWritable.class,
+                                                 FloatWritable.class,
+                                                 SequenceFileOutputFormat.class);
     JobClient.runJob(prefsToDiffsJobConf);
 
-    JobConf diffsToAveragesJobConf = buildDiffsToAveragesJobConf(averagesOutputPath, outputPath, jarFile);
+    JobConf diffsToAveragesJobConf = prepareJobConf(averagesOutputPath,
+                                                    outputPath,
+                                                    jarFile,
+                                                    SequenceFileInputFormat.class,
+                                                    IdentityMapper.class,
+                                                    ItemItemWritable.class,
+                                                    FloatWritable.class,
+                                                    SlopeOneDiffsToAveragesReducer.class,
+                                                    ItemItemWritable.class,
+                                                    FloatWritable.class,
+                                                    TextOutputFormat.class);
     JobClient.runJob(diffsToAveragesJobConf);
   }
 
-  private static JobConf buildPrefsToDiffsJobConf(String inputPath,
-                                                  String outputPath,
-                                                  String jarFile) throws IOException {
-
-    JobConf jobConf = new JobConf();
-    FileSystem fs = FileSystem.get(jobConf);
-
-    Path prefsFilePath = new Path(inputPath).makeQualified(fs);
-    Path outputPathPath = new Path(outputPath).makeQualified(fs);
-
-    if (fs.exists(outputPathPath)) {
-      fs.delete(outputPathPath, true);
-    }
-
-    jobConf.set("mapred.jar", jarFile);
-    jobConf.setJar(jarFile);
-
-    jobConf.setClass("mapred.input.format.class", TextInputFormat.class, InputFormat.class);
-    jobConf.set("mapred.input.dir", StringUtils.escapeString(prefsFilePath.toString()));
-
-    jobConf.setClass("mapred.mapper.class", ToItemPrefsMapper.class, Mapper.class);
-    jobConf.setClass("mapred.mapoutput.key.class", LongWritable.class, Object.class);
-    jobConf.setClass("mapred.mapoutput.value.class", ItemPrefWritable.class, Object.class);
-
-    jobConf.setClass("mapred.reducer.class", SlopeOnePrefsToDiffsReducer.class, Reducer.class);
-    jobConf.setClass("mapred.output.key.class", ItemItemWritable.class, Object.class);
-    jobConf.setClass("mapred.output.value.class", FloatWritable.class, Object.class);
-
-    jobConf.setClass("mapred.output.format.class", SequenceFileOutputFormat.class, OutputFormat.class);
-    jobConf.set("mapred.output.dir", StringUtils.escapeString(outputPathPath.toString()));
-
-    return jobConf;
-  }
-
-  private static JobConf buildDiffsToAveragesJobConf(String inputPath,
-                                                     String outputPath,
-                                                     String jarFile) throws IOException {
-
-    JobConf jobConf = new JobConf();
-    FileSystem fs = FileSystem.get(jobConf);
-
-    Path prefsFilePath = new Path(inputPath).makeQualified(fs);
-    Path outputPathPath = new Path(outputPath).makeQualified(fs);
-
-    if (fs.exists(outputPathPath)) {
-      fs.delete(outputPathPath, true);
-    }
-
-    jobConf.set("mapred.jar", jarFile);
-    jobConf.setJar(jarFile);
-
-    jobConf.setClass("mapred.input.format.class", SequenceFileInputFormat.class, InputFormat.class);
-    jobConf.set("mapred.input.dir", StringUtils.escapeString(prefsFilePath.toString()));
-
-    jobConf.setClass("mapred.mapper.class", IdentityMapper.class, Mapper.class);
-    jobConf.setClass("mapred.mapoutput.key.class", ItemItemWritable.class, Object.class);
-    jobConf.setClass("mapred.mapoutput.value.class", FloatWritable.class, Object.class);
-
-    jobConf.setClass("mapred.reducer.class", SlopeOneDiffsToAveragesReducer.class, Reducer.class);
-    jobConf.setClass("mapred.output.key.class", ItemItemWritable.class, Object.class);
-    jobConf.setClass("mapred.output.value.class", FloatWritable.class, Object.class);
-
-    jobConf.setClass("mapred.output.format.class", TextOutputFormat.class, OutputFormat.class);
-    jobConf.set("mapred.output.dir", StringUtils.escapeString(outputPathPath.toString()));
-
-    return jobConf;
-  }
-
 }
\ No newline at end of file

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java?rev=887326&r1=887325&r2=887326&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java Fri Dec  4 19:15:31 2009
@@ -21,7 +21,8 @@
 import org.apache.commons.cli2.builder.ArgumentBuilder;
 import org.apache.commons.cli2.builder.DefaultOptionBuilder;
 
-public class DefaultOptionCreator {
+public final class DefaultOptionCreator {
+
   private DefaultOptionCreator() {
   }
 
@@ -29,8 +30,8 @@
    * Returns a default command line option for convergence delta specification.
    */
   public static DefaultOptionBuilder convergenceOption(
-      final DefaultOptionBuilder obuilder, final ArgumentBuilder abuilder) {
-    return obuilder.withLongName("convergencedelta")
+      DefaultOptionBuilder obuilder, ArgumentBuilder abuilder) {
+    return obuilder.withLongName("convergenceDelta")
         .withRequired(true).withShortName("v").withArgument(
             abuilder.withName("convergenceDelta").withMinimum(1).withMaximum(1)
                 .create()).withDescription("The convergence delta value.");
@@ -39,8 +40,8 @@
   /**
    * Returns a default command line option for output directory specification.
    */
-  public static DefaultOptionBuilder outputOption(final DefaultOptionBuilder obuilder,
-      final ArgumentBuilder abuilder) {
+  public static DefaultOptionBuilder outputOption(DefaultOptionBuilder obuilder,
+      ArgumentBuilder abuilder) {
     return obuilder.withLongName("output").withRequired(true)
         .withShortName("o").withArgument(
             abuilder.withName("output").withMinimum(1).withMaximum(1).create())
@@ -50,8 +51,8 @@
   /**
    * Returns a default command line option for input directory specification.
    */
-  public static DefaultOptionBuilder inputOption(final DefaultOptionBuilder obuilder,
-      final ArgumentBuilder abuilder) {
+  public static DefaultOptionBuilder inputOption(DefaultOptionBuilder obuilder,
+      ArgumentBuilder abuilder) {
     return obuilder
         .withLongName("input")
         .withRequired(true)
@@ -59,7 +60,7 @@
         .withArgument(
             abuilder.withName("input").withMinimum(1).withMaximum(1).create())
         .withDescription(
-            "The Path for input Vectors. Must be a SequenceFile of Writable, Vector.");
+            "Path to job input directory");
   }
 
   /**
@@ -74,7 +75,7 @@
         .withArgument(
             abuilder.withName("k").withMinimum(1).withMaximum(1).create())
         .withDescription(
-            "The k in k-Means.  If specified, then a random selection of k Vectors will be chosen as the Centroid and written to the clusters output path.")
+            "The k in k-Means. k random Vectors will be chosen as the Centroid and written to the clusters output path.")
         .withShortName("k");
   }