You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by gs...@apache.org on 2012/02/13 16:14:43 UTC

svn commit: r1243557 - in /mahout/trunk: core/src/main/java/org/apache/mahout/clustering/ core/src/main/java/org/apache/mahout/common/ examples/src/main/java/org/apache/mahout/cf/taste/example/email/ examples/src/main/java/org/apache/mahout/classifier/...

Author: gsingers
Date: Mon Feb 13 15:14:42 2012
New Revision: 1243557

URL: http://svn.apache.org/viewvc?rev=1243557&view=rev
Log:
MAHOUT-947: add in support for multiple options

Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/WeightedPropertyVectorWritable.java
    mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToPrefsDriver.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/email/PrepEmailVectorsDriver.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/classifier/ConfusionMatrixDumper.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/utils/MatrixDumper.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/RowIdJob.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/WeightedPropertyVectorWritable.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/WeightedPropertyVectorWritable.java?rev=1243557&r1=1243556&r2=1243557&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/WeightedPropertyVectorWritable.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/WeightedPropertyVectorWritable.java Mon Feb 13 15:14:42 2012
@@ -79,7 +79,7 @@ public class WeightedPropertyVectorWrita
   @Override
   public String toString() {
     Vector vector = getVector();
-    StringBuilder bldr = new StringBuilder("wt: ").append(getWeight());
+    StringBuilder bldr = new StringBuilder("wt: ").append(getWeight()).append(" ");
     if (properties != null && !properties.isEmpty()) {
       for (Map.Entry<Text, Text> entry : properties.entrySet()) {
         bldr.append(entry.getKey().toString()).append(": ").append(entry.getValue().toString()).append(' ');

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java?rev=1243557&r1=1243556&r2=1243557&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java Mon Feb 13 15:14:42 2012
@@ -94,17 +94,17 @@ public abstract class AbstractJob extend
   private Option outputOption;
 
   /** input path, populated by {@link #parseArguments(String[])} */
-  private Path inputPath;
-  private File inputFile;//the input represented as a file
+  protected Path inputPath;
+  protected File inputFile;//the input represented as a file
 
   /** output path, populated by {@link #parseArguments(String[]) */
-  private Path outputPath;
-  private File outputFile;//the output represented as a file
+  protected Path outputPath;
+  protected File outputFile;//the output represented as a file
 
   /** temp path, populated by {@link #parseArguments(String[]) */
-  private Path tempPath;
+  protected Path tempPath;
 
-  private Map<String, List<String>> argMap;
+  protected Map<String, List<String>> argMap;
 
   /** internal list of options that have been added */
   private final List<Option> options;
@@ -308,10 +308,22 @@ public abstract class AbstractJob extend
    *  argument values can be retrieved using {@code get(optionName)}. The
    *  names used for keys are the option name parameter prefixed by '--'.
    *
+   * @see #parseArguments(String[], boolean, boolean)  -- passes in false, false for the optional args.
    *
    */
   public Map<String, List<String>> parseArguments(String[] args) throws IOException {
+    return parseArguments(args, false, false);
+  }
 
+  /**
+   *
+   * @param args  The args to parse
+   * @param inputOptional if false, then the input option, if set, need not be present.  If true and input is an option and there is no input, then throw an error
+   * @param outputOptional if false, then the output option, if set, need not be present.  If true and output is an option and there is no output, then throw an error
+   * @return the args parsed into a map.
+   * @throws IOException
+   */
+  public Map<String, List<String>> parseArguments(String[] args, boolean inputOptional, boolean outputOptional) throws IOException{
     Option helpOpt = addOption(DefaultOptionCreator.helpOption());
     addOption("tempDir", null, "Intermediate output directory", "temp");
     addOption("startPhase", null, "First phase to run", "0");
@@ -344,7 +356,7 @@ public abstract class AbstractJob extend
     }
 
     try {
-      parseDirectories(cmdLine);
+      parseDirectories(cmdLine, inputOptional, outputOptional);
     } catch (IllegalArgumentException e) {
       log.error(e.getMessage());
       CommandLineUtil.printHelpWithGenericOptions(group);
@@ -424,7 +436,7 @@ public abstract class AbstractJob extend
    *   specified or outputOption is present and neither {@code --output}
    *   nor {@code -Dmapred.output.dir} are specified.
    */
-  protected void parseDirectories(CommandLine cmdLine) {
+  protected void parseDirectories(CommandLine cmdLine, boolean inputOptional, boolean outputOptional) {
 
     Configuration conf = getConf();
 
@@ -444,9 +456,9 @@ public abstract class AbstractJob extend
       this.outputPath = new Path(conf.get("mapred.output.dir"));
     }
 
-    Preconditions.checkArgument(inputOption == null || inputPath != null,
+    Preconditions.checkArgument(inputOptional == true || inputOption == null || inputPath != null,
         "No input specified or -Dmapred.input.dir must be provided to specify input directory");
-    Preconditions.checkArgument(outputOption == null || outputPath != null,
+    Preconditions.checkArgument(outputOptional == true || outputOption == null || outputPath != null,
         "No output specified:  or -Dmapred.output.dir must be provided to specify output directory");
   }
 

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToPrefsDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToPrefsDriver.java?rev=1243557&r1=1243556&r2=1243557&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToPrefsDriver.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToPrefsDriver.java Mon Feb 13 15:14:42 2012
@@ -87,18 +87,18 @@ public final class MailToPrefsDriver ext
     addOption("refs", "r", "The position in the input text (value) where the reference ids are located, starting from zero (0).", "1");
     addOption(buildOption("useCounts", "u", "If set, then use the number of times the user has interacted with a thread as an indication of their preference.  Otherwise, use boolean preferences.",
             false, false, "true"));
-    Map<String, String> parsedArgs = parseArguments(args);
+    Map<String, List<String>> parsedArgs = parseArguments(args);
 
     Path input = getInputPath();
     Path output = getOutputPath();
-    int chunkSize = Integer.parseInt(parsedArgs.get("--chunkSize"));
-    String separator = parsedArgs.get("--separator");
+    int chunkSize = Integer.parseInt(getOption("chunkSize"));
+    String separator = getOption("separator");
     Configuration conf = getConf();
     if (conf == null) {
       setConf(new Configuration());
       conf = getConf();
     }
-    boolean useCounts = hasOption("--useCounts");
+    boolean useCounts = hasOption("useCounts");
     AtomicInteger currentPhase = new AtomicInteger();
     int[] msgDim = new int[1];
     //TODO: mod this to not do so many passes over the data.  Dictionary creation could probably be a chain mapper
@@ -170,8 +170,8 @@ public final class MailToPrefsDriver ext
       conf.set(EmailUtility.MSG_ID_DIMENSION, String.valueOf(msgDim[0]));
       conf.set(EmailUtility.FROM_PREFIX, "fromIds-dictionary-");
       conf.set(EmailUtility.MSG_IDS_PREFIX, "msgIds-dictionary-");
-      conf.set(EmailUtility.FROM_INDEX, parsedArgs.get("--from"));
-      conf.set(EmailUtility.REFS_INDEX, parsedArgs.get("--refs"));
+      conf.set(EmailUtility.FROM_INDEX, getOption("from"));
+      conf.set(EmailUtility.REFS_INDEX, getOption("refs"));
       conf.set(EmailUtility.SEPARATOR, separator);
       conf.set(MailToRecReducer.USE_COUNTS_PREFERENCE, String.valueOf(useCounts));
       int j = 0;

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/email/PrepEmailVectorsDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/email/PrepEmailVectorsDriver.java?rev=1243557&r1=1243556&r2=1243557&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/email/PrepEmailVectorsDriver.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/email/PrepEmailVectorsDriver.java Mon Feb 13 15:14:42 2012
@@ -29,6 +29,7 @@ import org.apache.mahout.common.HadoopUt
 import org.apache.mahout.common.commandline.DefaultOptionCreator;
 import org.apache.mahout.math.VectorWritable;
 
+import java.util.List;
 import java.util.Map;
 
 /**
@@ -52,7 +53,7 @@ public class PrepEmailVectorsDriver exte
     addOption(DefaultOptionCreator.overwriteOption().create());
     addOption("maxItemsPerLabel", "mipl", "The maximum number of items per label.  Can be useful for making the training sets the same size", String.valueOf(100000));
     addOption(buildOption("useListName", "ul", "Use the name of the list as part of the label.  If not set, then just use the project name", false, false, "false"));
-    Map<String,String> parsedArgs = parseArguments(args);
+    Map<String,List<String>> parsedArgs = parseArguments(args);
     if (parsedArgs == null) {
       return -1;
     }
@@ -64,8 +65,8 @@ public class PrepEmailVectorsDriver exte
     }
     Job convertJob = prepareJob(input, output, SequenceFileInputFormat.class, PrepEmailMapper.class,
             Text.class, VectorWritable.class, PrepEmailReducer.class, Text.class, VectorWritable.class, SequenceFileOutputFormat.class);
-    convertJob.getConfiguration().set(ITEMS_PER_CLASS, parsedArgs.get("--maxItemsPerLabel"));
-    convertJob.getConfiguration().set(USE_LIST_NAME, String.valueOf(parsedArgs.containsKey("--useListName")));
+    convertJob.getConfiguration().set(ITEMS_PER_CLASS, getOption("maxItemsPerLabel"));
+    convertJob.getConfiguration().set(USE_LIST_NAME, String.valueOf(hasOption("useListName")));
 
     boolean succeeded = convertJob.waitForCompletion(true);
     return succeeded ? 0 : -1;

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java?rev=1243557&r1=1243556&r2=1243557&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java Mon Feb 13 15:14:42 2012
@@ -17,6 +17,7 @@
 
 package org.apache.mahout.clustering.syntheticcontrol.canopy;
 
+import java.util.List;
 import java.util.Map;
 
 import org.apache.hadoop.conf.Configuration;
@@ -101,7 +102,7 @@ public final class Job extends AbstractJ
     addOption(DefaultOptionCreator.t2Option().create());
     addOption(DefaultOptionCreator.overwriteOption().create());
 
-    Map<String, String> argMap = parseArguments(args);
+    Map<String, List<String>> argMap = parseArguments(args);
     if (argMap == null) {
       return -1;
     }

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java?rev=1243557&r1=1243556&r2=1243557&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java Mon Feb 13 15:14:42 2012
@@ -17,6 +17,7 @@
 
 package org.apache.mahout.clustering.syntheticcontrol.dirichlet;
 
+import java.util.List;
 import java.util.Map;
 
 import org.apache.commons.cli2.builder.ArgumentBuilder;
@@ -87,7 +88,7 @@ public final class Job extends AbstractJ
     addOption(DefaultOptionCreator.emitMostLikelyOption().create());
     addOption(DefaultOptionCreator.thresholdOption().create());
 
-    Map<String, String> argMap = parseArguments(args);
+    Map<String, List<String>> argMap = parseArguments(args);
     if (argMap == null) {
       return -1;
     }

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java?rev=1243557&r1=1243556&r2=1243557&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java Mon Feb 13 15:14:42 2012
@@ -18,6 +18,7 @@
 package org.apache.mahout.clustering.syntheticcontrol.fuzzykmeans;
 
 import java.io.IOException;
+import java.util.List;
 import java.util.Map;
 
 import org.apache.commons.cli2.builder.ArgumentBuilder;
@@ -79,7 +80,7 @@ public final class Job extends AbstractJ
     addOption(M_OPTION, M_OPTION,
         "coefficient normalization factor, must be greater than 1", true);
 
-    Map<String, String> argMap = parseArguments(args);
+    Map<String, List<String>> argMap = parseArguments(args);
     if (argMap == null) {
       return -1;
     }

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java?rev=1243557&r1=1243556&r2=1243557&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java Mon Feb 13 15:14:42 2012
@@ -18,6 +18,7 @@
 package org.apache.mahout.clustering.syntheticcontrol.kmeans;
 
 import java.io.IOException;
+import java.util.List;
 import java.util.Map;
 
 import org.apache.hadoop.conf.Configuration;
@@ -74,7 +75,7 @@ public final class Job extends AbstractJ
     addOption(DefaultOptionCreator.maxIterationsOption().create());
     addOption(DefaultOptionCreator.overwriteOption().create());
 
-    Map<String, String> argMap = parseArguments(args);
+    Map<String, List<String>> argMap = parseArguments(args);
     if (argMap == null) {
       return -1;
     }

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java?rev=1243557&r1=1243556&r2=1243557&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java Mon Feb 13 15:14:42 2012
@@ -17,6 +17,7 @@
 
 package org.apache.mahout.clustering.syntheticcontrol.meanshift;
 
+import java.util.List;
 import java.util.Map;
 
 import org.apache.hadoop.conf.Configuration;
@@ -73,7 +74,7 @@ public final class Job extends AbstractJ
     addOption(DefaultOptionCreator.t2Option().create());
     addOption(DefaultOptionCreator.clusteringOption().create());
     
-    Map<String,String> argMap = parseArguments(args);
+    Map<String,List<String>> argMap = parseArguments(args);
     if (argMap == null) {
       return -1;
     }

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/classifier/ConfusionMatrixDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/classifier/ConfusionMatrixDumper.java?rev=1243557&r1=1243556&r2=1243557&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/classifier/ConfusionMatrixDumper.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/classifier/ConfusionMatrixDumper.java Mon Feb 13 15:14:42 2012
@@ -139,13 +139,13 @@ public final class ConfusionMatrixDumper
     addOption(DefaultOptionCreator.overwriteOption().create());
     addFlag("html", null, "Create complete HTML page");
     addFlag("text", null, "Dump simple text");
-    Map<String,String> parsedArgs = parseArguments(args);
+    Map<String,List<String>> parsedArgs = parseArguments(args);
     if (parsedArgs == null) {
       return -1;
     }
     
     Path inputPath = getInputPath();
-    String outputFile = parsedArgs.containsKey("--output") ? parsedArgs.get("--output") : null;
+    String outputFile = hasOption("output") ? getOption("output") : null;
     boolean text = parsedArgs.containsKey("--text");
     boolean wrapHtml = parsedArgs.containsKey("--html");
     PrintStream out = getPrintStream(outputFile);

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/MatrixDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/MatrixDumper.java?rev=1243557&r1=1243556&r2=1243557&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/MatrixDumper.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/MatrixDumper.java Mon Feb 13 15:14:42 2012
@@ -22,6 +22,7 @@ import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.OutputStream;
 import java.io.PrintStream;
+import java.util.List;
 import java.util.Map;
 
 import org.apache.hadoop.conf.Configuration;
@@ -55,11 +56,11 @@ public final class MatrixDumper extends 
     
     addInputOption();
     addOption("output", "o", "Output path", null); // AbstractJob output feature requires param
-    Map<String, String> parsedArgs = parseArguments(args);
+    Map<String, List<String>> parsedArgs = parseArguments(args);
     if (parsedArgs == null) {
       return -1;
     }
-    String outputFile = parsedArgs.containsKey("--output") ? parsedArgs.get("--output") : null;
+    String outputFile = hasOption("output") ? getOption("output") : null;
     exportCSV(getInputPath(), outputFile, false);
     return 0;
   }

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java?rev=1243557&r1=1243556&r2=1243557&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java Mon Feb 13 15:14:42 2012
@@ -50,33 +50,27 @@ public final class SequenceFileDumper ex
   @Override
   public int run(String[] args) throws Exception {
 
-    addOption("seqFile", "s", "The Sequence File to read in", false);
-    addOption("seqDirectory", "d", "A directory containing sequence files to read", false);
-    addOption(DefaultOptionCreator.outputOption().create());
+    addInputOption();
+    addOutputOption();
     addOption("substring", "b", "The number of chars to print out per value", false);
     addOption(buildOption("count", "c", "Report the count only", false, false, null));
     addOption("numItems", "n", "Output at most <n> key value pairs", false);
     addOption(buildOption("facets", "fa", "Output the counts per key.  Note, if there are a lot of unique keys, this can take up a fair amount of memory", false, false, null));
     addOption(buildOption("quiet", "q", "Print only file contents.", false, false, null));
 
-    if (parseArguments(args) == null) {
+    if (parseArguments(args, false, true) == null) {
       return -1;
     }
 
     Path[] pathArr= null;
     Configuration conf = new Configuration();
-
-    if (getOption("seqFile") != null) {
+    Path input = getInputPath();
+    FileSystem fs = input.getFileSystem(conf);
+    if (fs.getFileStatus(input).isDir()) {
+      pathArr = FileUtil.stat2Paths(fs.listStatus(input, new OutputFilesFilter()));
+    } else {
       pathArr = new Path[1];
-      pathArr[0] = new Path(getOption("seqFile"));
-    } else if (getOption("seqDirectory") != null) {
-      Path dirPath = new Path(getOption("seqDirectory"));
-      FileSystem fs = dirPath.getFileSystem(conf);
-      pathArr = FileUtil.stat2Paths(fs.listStatus(dirPath, new OutputFilesFilter()));
-    }
-    if (pathArr == null) {
-      System.out.println("Must specify --seqFile (-s) or --seqDirectory (-d)!");      
-      return -1;
+      pathArr[0] = input;
     }
 
 
@@ -91,8 +85,9 @@ public final class SequenceFileDumper ex
     }
     try {
       for (Path path : pathArr) {
-        if (!hasOption("quiet"))
+        if (!hasOption("quiet")){
           writer.append("Input Path: ").append(String.valueOf(path)).append('\n');
+        }
 
         int sub = Integer.MAX_VALUE;
         if (hasOption("substring")) {

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java?rev=1243557&r1=1243556&r2=1243557&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java Mon Feb 13 15:14:42 2012
@@ -81,7 +81,6 @@ public final class ClusterDumper extends
   private long maxPointsPerCluster = Long.MAX_VALUE;
   private String termDictionary;
   private String dictionaryFormat;
-  private String outputFile;
   private int subString = Integer.MAX_VALUE;
   private int numTopFeatures = 10;
   private Map<Integer, List<WeightedVectorWritable>> clusterIdToPoints;
@@ -104,8 +103,8 @@ public final class ClusterDumper extends
 
   @Override
   public int run(String[] args) throws Exception {
-    addOption(SEQ_FILE_DIR_OPTION, "s", "The directory containing Sequence Files for the Clusters", true);
-    addOption(OUTPUT_OPTION, "o", "Optional output directory. Default is to output to the console.");
+    addInputOption();
+    addOutputOption();
     addOption(OUTPUT_FORMAT_OPT, "of", "The optional output format to write the results as.  Options: TEXT, CSV or GRAPH_ML", "TEXT");
     addOption(SUBSTRING_OPTION, "b", "The number of chars of the asFormatString() to print");
     addOption(NUM_WORDS_OPTION, "n", "The number of top terms to print");
@@ -121,11 +120,11 @@ public final class ClusterDumper extends
       return -1;
     }
 
-    seqFileDir = new Path(getOption(SEQ_FILE_DIR_OPTION));
+    seqFileDir = getInputPath();
     if (hasOption(POINTS_DIR_OPTION)) {
       pointsDir = new Path(getOption(POINTS_DIR_OPTION));
     }
-    outputFile = getOption(OUTPUT_OPTION);
+    outputFile = getOutputFile();
     if (hasOption(SUBSTRING_OPTION)) {
       int sub = Integer.parseInt(getOption(SUBSTRING_OPTION));
       if (sub >= 0) {
@@ -174,12 +173,12 @@ public final class ClusterDumper extends
       writer = new OutputStreamWriter(System.out);
     } else {
       shouldClose = true;
-      if (outputFile.startsWith("s3n://")) {
-        Path p = new Path(this.outputFile);
+      if (outputFile.getName().startsWith("s3n://")) {
+        Path p = outputPath;
         FileSystem fs = FileSystem.get(p.toUri(), conf);
         writer = new OutputStreamWriter(fs.create(p), Charsets.UTF_8);
       } else {
-        writer = Files.newWriter(new File(this.outputFile), Charsets.UTF_8);
+        writer = Files.newWriter(this.outputFile, Charsets.UTF_8);
       }
     }
     ClusterWriter clusterWriter = createClusterWriter(writer, dictionary);
@@ -248,13 +247,6 @@ public final class ClusterDumper extends
     }
   }
 
-  public String getOutputFile() {
-    return outputFile;
-  }
-
-  public void setOutputFile(String outputFile) {
-    this.outputFile = outputFile;
-  }
 
   public int getSubString() {
     return subString;

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/RowIdJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/RowIdJob.java?rev=1243557&r1=1243556&r2=1243557&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/RowIdJob.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/RowIdJob.java Mon Feb 13 15:14:42 2012
@@ -34,6 +34,7 @@ import org.apache.mahout.math.VectorWrit
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.util.List;
 import java.util.Map;
 
 public class RowIdJob extends AbstractJob {
@@ -45,7 +46,7 @@ public class RowIdJob extends AbstractJo
     addInputOption();
     addOutputOption();
 
-    Map<String,String> parsedArgs = parseArguments(args);
+    Map<String,List<String>> parsedArgs = parseArguments(args);
     if (parsedArgs == null) {
       return -1;
     }

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java?rev=1243557&r1=1243556&r2=1243557&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java Mon Feb 13 15:14:42 2012
@@ -92,7 +92,7 @@ public final class VectorDumper extends 
     addOption(buildOption("filter", "fi", "Only dump out those vectors whose name matches the filter." +
             "  Multiple items may be specified by repeating the argument.", true, 1, 100, false, null));
 
-    if (parseArguments(args) == null) {
+    if (parseArguments(args, false, true) == null) {
       return -1;
     }