You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by is...@apache.org on 2009/10/15 17:27:45 UTC

svn commit: r825516 - in /lucene/mahout/trunk: core/src/main/java/org/apache/mahout/cf/taste/hadoop/ core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/ core/src/main/java/org/apache/mahout/clustering/dirichlet/ core/src/main/java/o...

Author: isabel
Date: Thu Oct 15 15:27:40 2009
New Revision: 825516

URL: http://svn.apache.org/viewvc?rev=825516&view=rev
Log:
MAHOUT-138 - final changes for command line parsing in clustering
examples.

Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommenderJob.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/SlopeOneDiffsToAveragesJob.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/SlopeOnePrefsToDiffsJob.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/JobExecutor.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletJob.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyJob.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputDriver.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputDriver.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputDriver.java

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommenderJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommenderJob.java?rev=825516&r1=825515&r2=825516&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommenderJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommenderJob.java Thu Oct 15 15:27:40 2009
@@ -87,7 +87,7 @@
     Option dataModelFileOpt = obuilder.withLongName("dataModelFile").withRequired(true)
       .withShortName("m").withArgument(abuilder.withName("dataModelFile").withMinimum(1)
       .withMaximum(1).create()).withDescription("File containing data model.").create();
-    Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder);
+    Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder).create();
     Option helpOpt = DefaultOptionCreator.helpOption(obuilder);
 
     Group group = gbuilder.withName("Options").withOption(recommendClassOpt).withOption(userRecommendOpt)

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/SlopeOneDiffsToAveragesJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/SlopeOneDiffsToAveragesJob.java?rev=825516&r1=825515&r2=825516&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/SlopeOneDiffsToAveragesJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/SlopeOneDiffsToAveragesJob.java Thu Oct 15 15:27:40 2009
@@ -60,7 +60,7 @@
     Option inputOpt = obuilder.withLongName("input").withRequired(true).withShortName("i")
       .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create())
       .withDescription("The Path for input preferences file.").create();
-    Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder);
+    Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder).create();
     Option helpOpt = DefaultOptionCreator.helpOption(obuilder);
 
     Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt)

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/SlopeOnePrefsToDiffsJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/SlopeOnePrefsToDiffsJob.java?rev=825516&r1=825515&r2=825516&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/SlopeOnePrefsToDiffsJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/SlopeOnePrefsToDiffsJob.java Thu Oct 15 15:27:40 2009
@@ -62,7 +62,7 @@
     .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create())
     .withDescription("The Path for input preferences file.").create();
 
-    Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder);
+    Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder).create();
     Option helpOpt = DefaultOptionCreator.helpOption(obuilder);
 
     Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt)

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/JobExecutor.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/JobExecutor.java?rev=825516&r1=825515&r2=825516&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/JobExecutor.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/JobExecutor.java Thu Oct 15 15:27:40 2009
@@ -47,8 +47,8 @@
     ArgumentBuilder abuilder = new ArgumentBuilder();
     GroupBuilder gbuilder = new GroupBuilder();
 
-    Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder);
-    Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder);
+    Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder).create();
+    Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder).create();
     Option helpOpt = DefaultOptionCreator.helpOption(obuilder);
 
     Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt)

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java?rev=825516&r1=825515&r2=825516&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java Thu Oct 15 15:27:40 2009
@@ -68,10 +68,10 @@
     ArgumentBuilder abuilder = new ArgumentBuilder();
     GroupBuilder gbuilder = new GroupBuilder();
 
-    Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder);
-    Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder);
-    Option maxIterOpt = DefaultOptionCreator.maxIterOption(obuilder, abuilder);
-    Option topicsOpt = DefaultOptionCreator.kOption(obuilder, abuilder);
+    Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder).create();
+    Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder).create();
+    Option maxIterOpt = DefaultOptionCreator.maxIterOption(obuilder, abuilder).create();
+    Option topicsOpt = DefaultOptionCreator.kOption(obuilder, abuilder).create();
     Option helpOpt = DefaultOptionCreator.helpOption(obuilder);
 
     Option mOpt = obuilder.withLongName("alpha").withRequired(true).withShortName("m").

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletJob.java?rev=825516&r1=825515&r2=825516&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletJob.java Thu Oct 15 15:27:40 2009
@@ -47,10 +47,10 @@
     ArgumentBuilder abuilder = new ArgumentBuilder();
     GroupBuilder gbuilder = new GroupBuilder();
 
-    Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder);
-    Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder);
-    Option maxIterOpt = DefaultOptionCreator.maxIterOption(obuilder, abuilder);
-    Option topicsOpt = DefaultOptionCreator.kOption(obuilder, abuilder);
+    Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder).create();
+    Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder).create();
+    Option maxIterOpt = DefaultOptionCreator.maxIterOption(obuilder, abuilder).create();
+    Option topicsOpt = DefaultOptionCreator.kOption(obuilder, abuilder).create();
     Option helpOpt = DefaultOptionCreator.helpOption(obuilder);
 
     Option mOpt = obuilder.withLongName("alpha").withRequired(true).withShortName("m").

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java?rev=825516&r1=825515&r2=825516&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java Thu Oct 15 15:27:40 2009
@@ -49,11 +49,11 @@
     ArgumentBuilder abuilder = new ArgumentBuilder();
     GroupBuilder gbuilder = new GroupBuilder();
 
-    Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder);
-    Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder);
-    Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption(obuilder, abuilder);
-    Option measureClassOpt = DefaultOptionCreator.distanceOption(obuilder, abuilder);
-    Option maxIterOpt = DefaultOptionCreator.maxIterOption(obuilder, abuilder);
+    Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder).create();
+    Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder).create();
+    Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption(obuilder, abuilder).create();
+    Option measureClassOpt = DefaultOptionCreator.distanceOption(obuilder, abuilder).create();
+    Option maxIterOpt = DefaultOptionCreator.maxIterOption(obuilder, abuilder).create();
     Option helpOpt = DefaultOptionCreator.helpOption(obuilder);
 
     Option clustersOpt = obuilder.withLongName("clusters").withRequired(true).withShortName("c").

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java?rev=825516&r1=825515&r2=825516&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java Thu Oct 15 15:27:40 2009
@@ -53,9 +53,9 @@
     ArgumentBuilder abuilder = new ArgumentBuilder();
     GroupBuilder gbuilder = new GroupBuilder();
 
-    Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder);
-    Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder);
-    Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption(obuilder, abuilder);
+    Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder).create();
+    Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder).create();
+    Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption(obuilder, abuilder).create();
     Option helpOpt = DefaultOptionCreator.helpOption(obuilder);    
 
     Option modelOpt = obuilder.withLongName("distanceClass").withRequired(true).withShortName("d").

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyJob.java?rev=825516&r1=825515&r2=825516&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyJob.java Thu Oct 15 15:27:40 2009
@@ -50,10 +50,10 @@
     ArgumentBuilder abuilder = new ArgumentBuilder();
     GroupBuilder gbuilder = new GroupBuilder();
 
-    Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder);
-    Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder);
-    Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption(obuilder, abuilder);
-    Option maxIterOpt = DefaultOptionCreator.maxIterOption(obuilder, abuilder);
+    Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder).create();
+    Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder).create();
+    Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption(obuilder, abuilder).create();
+    Option maxIterOpt = DefaultOptionCreator.maxIterOption(obuilder, abuilder).create();
     Option helpOpt = DefaultOptionCreator.helpOption(obuilder);    
 
     Option modelOpt = obuilder.withLongName("distanceClass").withRequired(true).withShortName("d").

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java?rev=825516&r1=825515&r2=825516&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java Thu Oct 15 15:27:40 2009
@@ -28,92 +28,84 @@
   /**
    * Returns a default command line option for convergence delta specification.
    */
-  public static Option convergenceOption(
+  public static DefaultOptionBuilder convergenceOption(
       final DefaultOptionBuilder obuilder, final ArgumentBuilder abuilder) {
-    Option convergenceDeltaOpt = obuilder.withLongName("convergencedelta")
+    return obuilder.withLongName("convergencedelta")
         .withRequired(true).withShortName("v").withArgument(
             abuilder.withName("convergenceDelta").withMinimum(1).withMaximum(1)
-                .create()).withDescription("The convergence delta value.")
-        .create();
-    return convergenceDeltaOpt;
+                .create()).withDescription("The convergence delta value.");
   }
 
   /**
    * Returns a default command line option for output directory specification.
    */
-  public static Option outputOption(final DefaultOptionBuilder obuilder,
+  public static DefaultOptionBuilder outputOption(final DefaultOptionBuilder obuilder,
       final ArgumentBuilder abuilder) {
-    Option outputOpt = obuilder.withLongName("output").withRequired(true)
+    return obuilder.withLongName("output").withRequired(true)
         .withShortName("o").withArgument(
             abuilder.withName("output").withMinimum(1).withMaximum(1).create())
-        .withDescription("The directory pathname for output.").create();
-    return outputOpt;
+        .withDescription("The directory pathname for output.");
   }
 
   /**
    * Returns a default command line option for input directory specification.
    */
-  public static Option inputOption(final DefaultOptionBuilder obuilder,
+  public static DefaultOptionBuilder inputOption(final DefaultOptionBuilder obuilder,
       final ArgumentBuilder abuilder) {
-    Option inputOpt = obuilder
+    return obuilder
         .withLongName("input")
         .withRequired(true)
         .withShortName("i")
         .withArgument(
             abuilder.withName("input").withMinimum(1).withMaximum(1).create())
         .withDescription(
-            "The Path for input Vectors. Must be a SequenceFile of Writable, Vector.")
-        .create();
-    return inputOpt;
+            "The Path for input Vectors. Must be a SequenceFile of Writable, Vector.");
   }
 
   /**
    * Returns a default command line option for specification of numbers of
    * clusters to create.
    */
-  public static Option kOption(DefaultOptionBuilder obuilder,
+  public static DefaultOptionBuilder kOption(DefaultOptionBuilder obuilder,
       ArgumentBuilder abuilder) {
-    Option clustersOpt = obuilder
+    return obuilder
         .withLongName("k")
-        .withRequired(false)
+        .withRequired(true)
         .withArgument(
             abuilder.withName("k").withMinimum(1).withMaximum(1).create())
         .withDescription(
             "The k in k-Means.  If specified, then a random selection of k Vectors will be chosen as the Centroid and written to the clusters output path.")
-        .withShortName("k").create();
-    return clustersOpt;
+        .withShortName("k");
   }
 
   /**
    * Returns a default command line option for specification of max number of
    * iterations.
    */
-  public static Option maxIterOption(DefaultOptionBuilder obuilder,
+  public static DefaultOptionBuilder maxIterOption(DefaultOptionBuilder obuilder,
       ArgumentBuilder abuilder) {
-    Option maxIterOpt = obuilder
+    return obuilder
         .withLongName("maxIter")
         .withRequired(true)
         .withShortName("x")
         .withArgument(
             abuilder.withName("maxIter").withMinimum(1).withMaximum(1).create())
-        .withDescription("The maximum number of iterations.").create();
-    return maxIterOpt;
+        .withDescription("The maximum number of iterations.");
   }
 
   /**
    * Returns a default command line option for specification of distance measure
    * class to use.
    */
-  public static Option distanceOption(DefaultOptionBuilder obuilder,
+  public static DefaultOptionBuilder distanceOption(DefaultOptionBuilder obuilder,
       ArgumentBuilder abuilder) {
-    Option measureClassOpt = obuilder
+    return obuilder
         .withLongName("measure")
         .withRequired(true)
         .withShortName("d")
         .withArgument(
             abuilder.withName("measure").withMinimum(1).withMaximum(1).create())
-        .withDescription("The classname of the DistanceMeasure.").create();
-    return measureClassOpt;
+        .withDescription("The classname of the DistanceMeasure.");
   }
 
   /**

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputDriver.java?rev=825516&r1=825515&r2=825516&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputDriver.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputDriver.java Thu Oct 15 15:27:40 2009
@@ -17,6 +17,14 @@
 
 package org.apache.mahout.clustering.syntheticcontrol.canopy;
 
+import org.apache.commons.cli2.CommandLine;
+import org.apache.commons.cli2.Group;
+import org.apache.commons.cli2.Option;
+import org.apache.commons.cli2.OptionException;
+import org.apache.commons.cli2.builder.ArgumentBuilder;
+import org.apache.commons.cli2.builder.DefaultOptionBuilder;
+import org.apache.commons.cli2.builder.GroupBuilder;
+import org.apache.commons.cli2.commandline.Parser;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.FileInputFormat;
@@ -25,20 +33,53 @@
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.Reducer;
 import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.apache.log4j.Logger;
+import org.apache.mahout.common.CommandLineUtil;
+import org.apache.mahout.common.commandline.DefaultOptionCreator;
 import org.apache.mahout.matrix.Vector;
 
 import java.io.IOException;
 
 public class InputDriver {
+  /**Logger for this class.*/
+  private static final Logger LOG = Logger.getLogger(InputDriver.class);
+
   private InputDriver() {
   }
 
   public static void main(String[] args) throws IOException, ClassNotFoundException {
-    String input = args[0];
-    String output = args[1];
-    String vectorClassName = args[2];
-    Class<? extends Vector> vectorClass = (Class<? extends Vector>) Class.forName(vectorClassName);
-    runJob(input, output, vectorClass);
+    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
+    ArgumentBuilder abuilder = new ArgumentBuilder();
+    GroupBuilder gbuilder = new GroupBuilder();
+
+    Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder).withRequired(false).create();
+    Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder).withRequired(false).create();
+    Option vectorOpt = obuilder.withLongName("vector").withRequired(false).withArgument(
+        abuilder.withName("v").withMinimum(1).withMaximum(1).create()).withDescription(
+        "The vector implementation to use.").withShortName("v").create();
+
+    Option helpOpt = DefaultOptionCreator.helpOption(obuilder);
+
+    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(vectorOpt).withOption(helpOpt).create();
+
+    try {
+      Parser parser = new Parser();
+      parser.setGroup(group);
+      CommandLine cmdLine = parser.parse(args);
+      if (cmdLine.hasOption(helpOpt)) {
+        CommandLineUtil.printHelp(group);
+        return;
+      }
+
+      String input = cmdLine.getValue(inputOpt, "testdata").toString();
+      String output = cmdLine.getValue(outputOpt, "output").toString();
+      String vectorClassName = cmdLine.getValue(vectorOpt).toString();
+      Class<? extends Vector> vectorClass = (Class<? extends Vector>) Class.forName(vectorClassName);
+      runJob(input, output, vectorClass);
+    } catch (OptionException e) {
+      LOG.error("Exception parsing command line: ", e);
+      CommandLineUtil.printHelp(group);
+    }
   }
 
   public static void runJob(String input, String output, Class<? extends Vector> vectorClass) throws IOException {

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java?rev=825516&r1=825515&r2=825516&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java Thu Oct 15 15:27:40 2009
@@ -19,32 +19,92 @@
 
 import java.io.IOException;
 
+import org.apache.commons.cli2.CommandLine;
+import org.apache.commons.cli2.Group;
+import org.apache.commons.cli2.Option;
+import org.apache.commons.cli2.OptionException;
+import org.apache.commons.cli2.builder.ArgumentBuilder;
+import org.apache.commons.cli2.builder.DefaultOptionBuilder;
+import org.apache.commons.cli2.builder.GroupBuilder;
+import org.apache.commons.cli2.commandline.Parser;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.JobClient;
 import org.apache.hadoop.mapred.JobConf;
+import org.apache.log4j.Logger;
 import org.apache.mahout.clustering.canopy.CanopyClusteringJob;
 import org.apache.mahout.clustering.syntheticcontrol.Constants;
+import org.apache.mahout.common.CommandLineUtil;
 import org.apache.mahout.matrix.Vector;
-import org.apache.mahout.matrix.SparseVector;
 
 public class Job {
+  /** Logger for this class.*/
+  private static final Logger LOG = Logger.getLogger(Job.class);
+
   private Job() {
   }
 
   public static void main(String[] args) throws IOException, ClassNotFoundException {
-    if (args.length == 5) {
-      String input = args[0];
-      String output = args[1];
-      String measureClassName = args[2];
-      double t1 = Double.parseDouble(args[3]);
-      double t2 = Double.parseDouble(args[4]);
-      String vectorClassName = args[5];
-      Class<? extends Vector> vectorClass = (Class<? extends Vector>) Class.forName(vectorClassName);
-      runJob(input, output, measureClassName, t1, t2, vectorClass);
-    } else
-      runJob("testdata", "output",
-          "org.apache.mahout.common.distance.EuclideanDistanceMeasure", 80, 55, SparseVector.class);
+      DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
+      ArgumentBuilder abuilder = new ArgumentBuilder();
+      GroupBuilder gbuilder = new GroupBuilder();
+
+      Option inputOpt = obuilder.withLongName("input").withRequired(false).withArgument(
+          abuilder.withName("input").withMinimum(1).withMaximum(1).create()).
+          withDescription("The Path for input Vectors. Must be a SequenceFile of Writable, Vector").withShortName("i").create();
+      Option outputOpt = obuilder.withLongName("output").withRequired(false).withArgument(
+          abuilder.withName("output").withMinimum(1).withMaximum(1).create()).
+          withDescription("The Path to put the output in").withShortName("o").create();
+
+      Option measureClassOpt = obuilder.withLongName("distance").withRequired(false).withArgument(
+          abuilder.withName("distance").withMinimum(1).withMaximum(1).create()).
+          withDescription("The Distance Measure to use.  Default is SquaredEuclidean").withShortName("m").create();
+      Option vectorClassOpt = obuilder.withLongName("vectorClass").withRequired(false).withArgument(
+          abuilder.withName("vectorClass").withMinimum(1).withMaximum(1).create()).
+          withDescription("The Vector implementation class name.  Default is SparseVector.class").withShortName("v").create();
+
+      Option t1Opt = obuilder.withLongName("t1").withRequired(false).withArgument(
+          abuilder.withName("t1").withMinimum(1).withMaximum(1).create()).
+          withDescription("t1").withShortName("t1").create();
+      Option t2Opt = obuilder.withLongName("t2").withRequired(false).withArgument(
+          abuilder.withName("t2").withMinimum(1).withMaximum(1).create()).
+          withDescription("t2").withShortName("t2").create();
+
+
+      Option helpOpt = obuilder.withLongName("help").
+          withDescription("Print out help").withShortName("h").create();
+
+      Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt)
+          .withOption(measureClassOpt).withOption(vectorClassOpt)
+          .withOption(t1Opt).withOption(t2Opt)
+          .withOption(helpOpt).create();
+
+
+      try {
+        Parser parser = new Parser();
+        parser.setGroup(group);
+        CommandLine cmdLine = parser.parse(args);
+
+        if (cmdLine.hasOption(helpOpt)) {
+          CommandLineUtil.printHelp(group);
+          return;
+        }
+
+        String input = cmdLine.getValue(inputOpt, "testdata").toString();
+        String output = cmdLine.getValue(outputOpt, "output").toString();
+        String measureClass = cmdLine.getValue(
+            measureClassOpt, "org.apache.mahout.common.distance.EuclideanDistanceMeasure").toString();
+
+        Class<? extends Vector> vectorClass = (Class<? extends Vector>) Class.forName(
+            cmdLine.getValue(vectorClassOpt, "org.apache.mahout.matrix.SparseVector").toString());
+        double t1 = Double.parseDouble(cmdLine.getValue(t1Opt, "80").toString());
+        double t2 = Double.parseDouble(cmdLine.getValue(t2Opt, "55").toString());
+
+        runJob(input, output, measureClass, t1, t2, vectorClass);
+      } catch (OptionException e) {
+        LOG.error("Exception", e);
+        CommandLineUtil.printHelp(group);
+      }
   }
 
   /**
@@ -58,14 +118,20 @@
    * resides in a directory named "testdata", and writes output to a directory
    * named "output".
    * 
-   * @param input the String denoting the input directory path
-   * @param output the String denoting the output directory path
-   * @param measureClassName the String class name of the DistanceMeasure to use
-   * @param t1 the canopy T1 threshold
-   * @param t2 the canopy T2 threshold
+   * @param input
+   *          the String denoting the input directory path
+   * @param output
+   *          the String denoting the output directory path
+   * @param measureClassName
+   *          the String class name of the DistanceMeasure to use
+   * @param t1
+   *          the canopy T1 threshold
+   * @param t2
+   *          the canopy T2 threshold
    */
   private static void runJob(String input, String output,
-      String measureClassName, double t1, double t2, Class<? extends Vector> vectorClass) throws IOException {
+      String measureClassName, double t1, double t2,
+      Class<? extends Vector> vectorClass) throws IOException {
     JobClient client = new JobClient();
     JobConf conf = new JobConf(Job.class);
 
@@ -74,10 +140,11 @@
     FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
     if (dfs.exists(outPath))
       dfs.delete(outPath, true);
-    final String directoryContainingConvertedInput = output + Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT;
+    final String directoryContainingConvertedInput = output
+        + Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT;
     InputDriver.runJob(input, directoryContainingConvertedInput, vectorClass);
-    CanopyClusteringJob.runJob(directoryContainingConvertedInput, output, measureClassName,
-        t1, t2, vectorClass);
+    CanopyClusteringJob.runJob(directoryContainingConvertedInput, output,
+        measureClassName, t1, t2, vectorClass);
   }
 
 }

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java?rev=825516&r1=825515&r2=825516&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java Thu Oct 15 15:27:40 2009
@@ -21,9 +21,18 @@
 import java.util.ArrayList;
 import java.util.List;
 
+import org.apache.commons.cli2.CommandLine;
+import org.apache.commons.cli2.Group;
+import org.apache.commons.cli2.Option;
+import org.apache.commons.cli2.OptionException;
+import org.apache.commons.cli2.builder.ArgumentBuilder;
+import org.apache.commons.cli2.builder.DefaultOptionBuilder;
+import org.apache.commons.cli2.builder.GroupBuilder;
+import org.apache.commons.cli2.commandline.Parser;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.JobConf;
+import org.apache.log4j.Logger;
 import org.apache.mahout.clustering.dirichlet.DirichletCluster;
 import org.apache.mahout.clustering.dirichlet.DirichletDriver;
 import org.apache.mahout.clustering.dirichlet.DirichletJob;
@@ -31,37 +40,75 @@
 import org.apache.mahout.clustering.dirichlet.models.Model;
 import org.apache.mahout.clustering.kmeans.KMeansDriver;
 import org.apache.mahout.clustering.syntheticcontrol.canopy.InputDriver;
+import org.apache.mahout.common.CommandLineUtil;
+import org.apache.mahout.common.commandline.DefaultOptionCreator;
 import org.apache.mahout.matrix.Vector;
 import org.apache.mahout.matrix.SparseVector;
 
 import static org.apache.mahout.clustering.syntheticcontrol.Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT;
 
 public class Job {
-       
-    
+
+  /**Logger for this class.*/
+  private static final Logger LOG = Logger.getLogger(Job.class);
+
   private Job() {
   }
 
   public static void main(String[] args) throws IOException,
       ClassNotFoundException, InstantiationException, IllegalAccessException {
-    if (args.length == 7) {
-      String input = args[0];
-      String output = args[1];
-      String modelFactory = args[2];
-      int numClusters = Integer.parseInt(args[3]);
-      int maxIterations = Integer.parseInt(args[4]);
-      double alpha_0 = Double.parseDouble(args[5]);
-      int numReducers = Integer.parseInt(args[6]);
-      String vectorClassName = args[7];
+    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
+    ArgumentBuilder abuilder = new ArgumentBuilder();
+    GroupBuilder gbuilder = new GroupBuilder();
+
+    Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder).withRequired(false).create();
+    Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder).withRequired(false).create();
+    Option maxIterOpt = DefaultOptionCreator.maxIterOption(obuilder, abuilder).withRequired(false).create();
+    Option topicsOpt = DefaultOptionCreator.kOption(obuilder, abuilder).withRequired(false).create();
+
+    Option redOpt = obuilder.withLongName("reducerNum").withRequired(false).withArgument(
+        abuilder.withName("r").withMinimum(1).withMaximum(1).create()).withDescription(
+        "The number of reducers to use.").withShortName("r").create();
+
+    Option vectorOpt = obuilder.withLongName("vector").withRequired(false).withArgument(
+        abuilder.withName("v").withMinimum(1).withMaximum(1).create()).withDescription(
+        "The vector implementation to use.").withShortName("v").create();
+
+    Option mOpt = obuilder.withLongName("alpha").withRequired(false).withShortName("m").
+        withArgument(abuilder.withName("alpha").withMinimum(1).withMaximum(1).create()).
+        withDescription("The alpha0 value for the DirichletDistribution.").create();
+
+    Option modelOpt = obuilder.withLongName("modelClass").withRequired(false).withShortName("d").
+        withArgument(abuilder.withName("modelClass").withMinimum(1).withMaximum(1).create()).
+          withDescription("The ModelDistribution class name.").create();
+    Option helpOpt = DefaultOptionCreator.helpOption(obuilder);
+
+    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(modelOpt).
+        withOption(maxIterOpt).withOption(mOpt).withOption(topicsOpt).withOption(redOpt).withOption(helpOpt).create();
+
+    try {
+      Parser parser = new Parser();
+      parser.setGroup(group);
+      CommandLine cmdLine = parser.parse(args);
+      if (cmdLine.hasOption(helpOpt)) {
+        CommandLineUtil.printHelp(group);
+        return;
+      }
+
+      String input = cmdLine.getValue(inputOpt, "testdata").toString();
+      String output = cmdLine.getValue(outputOpt, "output").toString();
+      String modelFactory = cmdLine.getValue(modelOpt, "org.apache.mahout.clustering.syntheticcontrol.dirichlet.NormalScModelDistribution").toString();
+      int numModels = Integer.parseInt(cmdLine.getValue(topicsOpt, "10").toString());
+      int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterOpt, "5").toString());
+      double alpha_0 = Double.parseDouble(cmdLine.getValue(mOpt, "1.0").toString());
+      int numReducers = Integer.parseInt(cmdLine.getValue(redOpt, "1").toString());
+      String vectorClassName = cmdLine.getValue(vectorOpt, "org.apache.mahout.matrix.SparseVector").toString();
       Class<? extends Vector> vectorClass = (Class<? extends Vector>) Class.forName(vectorClassName);
-      runJob(input, output, modelFactory, numClusters, maxIterations, alpha_0,
-          numReducers, vectorClass);
-    } else
-      runJob(
-          "testdata",
-          "output",
-          "org.apache.mahout.clustering.syntheticcontrol.dirichlet.NormalScModelDistribution",
-          10, 5, 1.0, 1, SparseVector.class);
+      runJob(input, output, modelFactory, numModels, maxIterations, alpha_0, numReducers, vectorClass);
+    } catch (OptionException e) {
+      LOG.error("Exception parsing command line: ", e);
+      CommandLineUtil.printHelp(group);
+    }
   }
 
   /**

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java?rev=825516&r1=825515&r2=825516&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java Thu Oct 15 15:27:40 2009
@@ -21,41 +21,93 @@
 
 import java.io.IOException;
 
+import org.apache.commons.cli2.CommandLine;
+import org.apache.commons.cli2.Group;
+import org.apache.commons.cli2.Option;
+import org.apache.commons.cli2.OptionException;
+import org.apache.commons.cli2.builder.ArgumentBuilder;
+import org.apache.commons.cli2.builder.DefaultOptionBuilder;
+import org.apache.commons.cli2.builder.GroupBuilder;
+import org.apache.commons.cli2.commandline.Parser;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.JobClient;
 import org.apache.hadoop.mapred.JobConf;
+import org.apache.log4j.Logger;
 import org.apache.mahout.clustering.canopy.CanopyClusteringJob;
 import org.apache.mahout.clustering.canopy.CanopyDriver;
 import org.apache.mahout.clustering.kmeans.KMeansDriver;
+import org.apache.mahout.clustering.kmeans.RandomSeedGenerator;
 import org.apache.mahout.clustering.syntheticcontrol.canopy.InputDriver;
+import org.apache.mahout.common.CommandLineUtil;
+import org.apache.mahout.common.HadoopUtil;
+import org.apache.mahout.common.commandline.DefaultOptionCreator;
+import org.apache.mahout.common.distance.SquaredEuclideanDistanceMeasure;
 import org.apache.mahout.matrix.SparseVector;
 import org.apache.mahout.matrix.Vector;
 
 public class Job {
 
+  /** Logger for this class.*/
+  private static final Logger LOG = Logger.getLogger(Job.class);
+
   private Job() {
   }
 
   public static void main(String[] args) throws IOException,
       ClassNotFoundException {
-    if (args.length == 8) {
-      String input = args[0];
-      String output = args[1];
-      String measureClass = args[2];
-      double t1 = Double.parseDouble(args[3]);
-      double t2 = Double.parseDouble(args[4]);
-      double convergenceDelta = Double.parseDouble(args[5]);
-      int maxIterations = Integer.parseInt(args[6]);
-      String vectorClassName = args[7];
-      Class<? extends Vector> vectorClass = (Class<? extends Vector>) Class
-          .forName(vectorClassName);
-      runJob(input, output, measureClass, t1, t2, convergenceDelta,
-          maxIterations, vectorClass);
-    } else
-      runJob("testdata", "output",
-          "org.apache.mahout.common.distance.EuclideanDistanceMeasure", 80, 55, 0.5, 10,
-          SparseVector.class);
+    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
+    ArgumentBuilder abuilder = new ArgumentBuilder();
+    GroupBuilder gbuilder = new GroupBuilder();
+
+    Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder).withRequired(false).create();
+    Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder).withRequired(false).create();
+    Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption(obuilder, abuilder).withRequired(false).create();
+    Option maxIterationsOpt = DefaultOptionCreator.maxIterOption(obuilder, abuilder).withRequired(false).create();
+
+    Option measureClassOpt = obuilder.withLongName("distance").withRequired(false).withArgument(
+        abuilder.withName("distance").withMinimum(1).withMaximum(1).create()).withDescription(
+        "The Distance Measure to use.  Default is SquaredEuclidean").withShortName("m").create();
+
+    Option t1Opt = obuilder.withLongName("t1").withRequired(false).withArgument(
+        abuilder.withName("t1").withMinimum(1).withMaximum(1).create()).withDescription(
+        "The t1 value to use.").withShortName("m").create();
+    Option t2Opt = obuilder.withLongName("t2").withRequired(false).withArgument(
+        abuilder.withName("t2").withMinimum(1).withMaximum(1).create()).withDescription(
+        "The t2 value to use.").withShortName("m").create();
+    Option vectorClassOpt = obuilder.withLongName("vectorClass").withRequired(false).withArgument(
+        abuilder.withName("vectorClass").withMinimum(1).withMaximum(1).create()).withDescription(
+        "The Vector implementation class name.  Default is SparseVector.class").withShortName("v").create();
+
+    Option helpOpt = DefaultOptionCreator.helpOption(obuilder);
+
+    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt)
+        .withOption(measureClassOpt).withOption(convergenceDeltaOpt).withOption(maxIterationsOpt)
+        .withOption(vectorClassOpt).withOption(t1Opt).withOption(t2Opt).withOption(helpOpt).create();
+    try {
+      Parser parser = new Parser();
+      parser.setGroup(group);
+      CommandLine cmdLine = parser.parse(args);
+
+      if (cmdLine.hasOption(helpOpt)) {
+        CommandLineUtil.printHelp(group);
+        return;
+      }
+      String input = cmdLine.getValue(inputOpt, "testdata").toString();
+      String output = cmdLine.getValue(outputOpt, "output").toString();
+      String measureClass = cmdLine.getValue(measureClassOpt, "org.apache.mahout.common.distance.EuclideanDistanceMeasure").toString();
+      double t1 = Double.parseDouble(cmdLine.getValue(t1Opt, "80").toString());
+      double t2 = Double.parseDouble(cmdLine.getValue(t2Opt, "55").toString());
+      double convergenceDelta = Double.parseDouble(cmdLine.getValue(convergenceDeltaOpt, "0.5").toString());
+      int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterationsOpt, 10).toString());
+      Class<? extends Vector> vectorClass = (Class<? extends Vector>) Class.forName(
+          cmdLine.getValue(vectorClassOpt, "org.apache.mahout.matrix.SparseVector").toString());
+
+      runJob(input, output, measureClass, t1, t2, convergenceDelta, maxIterations, vectorClass);
+    } catch (OptionException e) {
+      LOG.error("Exception", e);
+      CommandLineUtil.printHelp(group);
+    }
   }
 
   /**

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputDriver.java?rev=825516&r1=825515&r2=825516&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputDriver.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputDriver.java Thu Oct 15 15:27:40 2009
@@ -19,6 +19,14 @@
 
 import java.io.IOException;
 
+import org.apache.commons.cli2.CommandLine;
+import org.apache.commons.cli2.Group;
+import org.apache.commons.cli2.Option;
+import org.apache.commons.cli2.OptionException;
+import org.apache.commons.cli2.builder.ArgumentBuilder;
+import org.apache.commons.cli2.builder.DefaultOptionBuilder;
+import org.apache.commons.cli2.builder.GroupBuilder;
+import org.apache.commons.cli2.commandline.Parser;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.FileInputFormat;
@@ -27,14 +35,45 @@
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.Reducer;
 import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.apache.log4j.Logger;
 import org.apache.mahout.clustering.meanshift.MeanShiftCanopy;
+import org.apache.mahout.common.CommandLineUtil;
+import org.apache.mahout.common.commandline.DefaultOptionCreator;
+import org.apache.mahout.matrix.Vector;
 
 public class InputDriver {
+  /**Logger for this class.*/
+  private static final Logger LOG = Logger.getLogger(InputDriver.class);
+
   private InputDriver() {
   }
 
   public static void main(String[] args) throws IOException {
-    runJob(args[0], args[1]);
+    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
+    ArgumentBuilder abuilder = new ArgumentBuilder();
+    GroupBuilder gbuilder = new GroupBuilder();
+
+    Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder).withRequired(false).create();
+    Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder).withRequired(false).create();
+    Option helpOpt = DefaultOptionCreator.helpOption(obuilder);
+    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(helpOpt).create();
+
+    try {
+      Parser parser = new Parser();
+      parser.setGroup(group);
+      CommandLine cmdLine = parser.parse(args);
+      if (cmdLine.hasOption(helpOpt)) {
+        CommandLineUtil.printHelp(group);
+        return;
+      }
+
+      String input = cmdLine.getValue(inputOpt, "testdata").toString();
+      String output = cmdLine.getValue(outputOpt, "output").toString();
+      runJob(input, output);
+    } catch (OptionException e) {
+      LOG.error("Exception parsing command line: ", e);
+      CommandLineUtil.printHelp(group);
+    }
   }
 
   public static void runJob(String input, String output) throws IOException {

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java?rev=825516&r1=825515&r2=825516&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java Thu Oct 15 15:27:40 2009
@@ -21,33 +21,83 @@
 
 import java.io.IOException;
 
+import org.apache.commons.cli2.CommandLine;
+import org.apache.commons.cli2.Group;
+import org.apache.commons.cli2.Option;
+import org.apache.commons.cli2.OptionException;
+import org.apache.commons.cli2.builder.ArgumentBuilder;
+import org.apache.commons.cli2.builder.DefaultOptionBuilder;
+import org.apache.commons.cli2.builder.GroupBuilder;
+import org.apache.commons.cli2.commandline.Parser;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.JobClient;
 import org.apache.hadoop.mapred.JobConf;
+import org.apache.log4j.Logger;
 import org.apache.mahout.clustering.meanshift.MeanShiftCanopyJob;
+import org.apache.mahout.common.CommandLineUtil;
+import org.apache.mahout.common.commandline.DefaultOptionCreator;
 
 public class Job {
-  static final String CLUSTERED_POINTS_OUTPUT_DIRECTORY = "/clusteredPoints";
+  /** Logger for this class. */
+  private static final Logger LOG = Logger.getLogger(Job.class);
+
+  private static final String CLUSTERED_POINTS_OUTPUT_DIRECTORY = "/clusteredPoints";
 
   private Job() {
   }
 
   public static void main(String[] args) throws IOException {
-    if (args.length == 7) {
-      String input = args[0];
-      String output = args[1];
-      String measureClassName = args[2];
-      double t1 = Double.parseDouble(args[3]);
-      double t2 = Double.parseDouble(args[4]);
-      double convergenceDelta = Double.parseDouble(args[5]);
-      int maxIterations = Integer.parseInt(args[6]);
+    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
+    ArgumentBuilder abuilder = new ArgumentBuilder();
+    GroupBuilder gbuilder = new GroupBuilder();
+
+    Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder).create();
+    Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder).create();
+    Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption(obuilder, abuilder).create();
+    Option maxIterOpt = DefaultOptionCreator.maxIterOption(obuilder, abuilder).create();
+    Option helpOpt = DefaultOptionCreator.helpOption(obuilder);    
+
+    Option modelOpt = obuilder.withLongName("distanceClass").withRequired(true).withShortName("d").
+        withArgument(abuilder.withName("distanceClass").withMinimum(1).withMaximum(1).create()).
+        withDescription("The distance measure class name.").create();
+
+
+    Option threshold1Opt = obuilder.withLongName("threshold_1").withRequired(true).withShortName("t1").
+        withArgument(abuilder.withName("threshold_1").withMinimum(1).withMaximum(1).create()).
+        withDescription("The T1 distance threshold.").create();
+
+    Option threshold2Opt = obuilder.withLongName("threshold_2").withRequired(true).withShortName("t2").
+        withArgument(abuilder.withName("threshold_2").withMinimum(1).withMaximum(1).create()).
+        withDescription("The T1 distance threshold.").create();
+
+    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(modelOpt).
+        withOption(helpOpt).withOption(convergenceDeltaOpt).withOption(threshold1Opt).withOption(maxIterOpt).
+        withOption(threshold2Opt).create();
+
+    try {
+      Parser parser = new Parser();
+      parser.setGroup(group);
+      CommandLine cmdLine = parser.parse(args);
+      if (cmdLine.hasOption(helpOpt)) {
+        CommandLineUtil.printHelp(group);
+        return;
+      }
+
+      String input = cmdLine.getValue(inputOpt, "testdata").toString();
+      String output = cmdLine.getValue(outputOpt, "output").toString();
+      String measureClassName = cmdLine.getValue(modelOpt, "org.apache.mahout.common.distance.EuclideanDistanceMeasure").toString();
+      double t1 = Double.parseDouble(cmdLine.getValue(threshold1Opt, "47.6").toString());
+      double t2 = Double.parseDouble(cmdLine.getValue(threshold2Opt, "1").toString());
+      double convergenceDelta = Double.parseDouble(cmdLine.getValue(convergenceDeltaOpt, "0.5").toString());
+      int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterOpt, "10").toString());
       runJob(input, output, measureClassName, t1, t2, convergenceDelta,
           maxIterations);
-    } else
-      runJob("testdata", "output",
-          "org.apache.mahout.common.distance.EuclideanDistanceMeasure", 47.6, 1, 0.5, 10);
+    } catch (OptionException e) {
+      LOG.error("Exception parsing command line: ", e);
+      CommandLineUtil.printHelp(group);
+    }
   }
 
   /**

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputDriver.java?rev=825516&r1=825515&r2=825516&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputDriver.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputDriver.java Thu Oct 15 15:27:40 2009
@@ -19,6 +19,14 @@
 
 import java.io.IOException;
 
+import org.apache.commons.cli2.CommandLine;
+import org.apache.commons.cli2.Group;
+import org.apache.commons.cli2.Option;
+import org.apache.commons.cli2.OptionException;
+import org.apache.commons.cli2.builder.ArgumentBuilder;
+import org.apache.commons.cli2.builder.DefaultOptionBuilder;
+import org.apache.commons.cli2.builder.GroupBuilder;
+import org.apache.commons.cli2.commandline.Parser;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.FileInputFormat;
@@ -27,13 +35,43 @@
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.Reducer;
 import org.apache.hadoop.mapred.SequenceFileInputFormat;
+import org.apache.log4j.Logger;
+import org.apache.mahout.common.CommandLineUtil;
+import org.apache.mahout.common.commandline.DefaultOptionCreator;
 
 public class OutputDriver {
+  /** Logger for this class. */
+  private static final Logger LOG = Logger.getLogger(OutputDriver.class);
+
   private OutputDriver() {
   }
 
   public static void main(String[] args) throws IOException {
-    runJob(args[0], args[1]);
+    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
+    ArgumentBuilder abuilder = new ArgumentBuilder();
+    GroupBuilder gbuilder = new GroupBuilder();
+
+    Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder).withRequired(false).create();
+    Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder).withRequired(false).create();
+    Option helpOpt = DefaultOptionCreator.helpOption(obuilder);
+    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(helpOpt).create();
+
+    try {
+      Parser parser = new Parser();
+      parser.setGroup(group);
+      CommandLine cmdLine = parser.parse(args);
+      if (cmdLine.hasOption(helpOpt)) {
+        CommandLineUtil.printHelp(group);
+        return;
+      }
+
+      String input = cmdLine.getValue(inputOpt, "testdata").toString();
+      String output = cmdLine.getValue(outputOpt, "output").toString();
+      runJob(input, output);
+    } catch (OptionException e) {
+      LOG.error("Exception parsing command line: ", e);
+      CommandLineUtil.printHelp(group);
+    }
   }
 
   public static void runJob(String input, String output) throws IOException {