You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by is...@apache.org on 2009/10/15 17:27:45 UTC
svn commit: r825516 - in /lucene/mahout/trunk:
core/src/main/java/org/apache/mahout/cf/taste/hadoop/
core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/
core/src/main/java/org/apache/mahout/clustering/dirichlet/
core/src/main/java/o...
Author: isabel
Date: Thu Oct 15 15:27:40 2009
New Revision: 825516
URL: http://svn.apache.org/viewvc?rev=825516&view=rev
Log:
MAHOUT-138 - final changes for command line parsing in clustering
examples.
Modified:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommenderJob.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/SlopeOneDiffsToAveragesJob.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/SlopeOnePrefsToDiffsJob.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/JobExecutor.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletJob.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyJob.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputDriver.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputDriver.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputDriver.java
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommenderJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommenderJob.java?rev=825516&r1=825515&r2=825516&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommenderJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommenderJob.java Thu Oct 15 15:27:40 2009
@@ -87,7 +87,7 @@
Option dataModelFileOpt = obuilder.withLongName("dataModelFile").withRequired(true)
.withShortName("m").withArgument(abuilder.withName("dataModelFile").withMinimum(1)
.withMaximum(1).create()).withDescription("File containing data model.").create();
- Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder);
+ Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder).create();
Option helpOpt = DefaultOptionCreator.helpOption(obuilder);
Group group = gbuilder.withName("Options").withOption(recommendClassOpt).withOption(userRecommendOpt)
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/SlopeOneDiffsToAveragesJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/SlopeOneDiffsToAveragesJob.java?rev=825516&r1=825515&r2=825516&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/SlopeOneDiffsToAveragesJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/SlopeOneDiffsToAveragesJob.java Thu Oct 15 15:27:40 2009
@@ -60,7 +60,7 @@
Option inputOpt = obuilder.withLongName("input").withRequired(true).withShortName("i")
.withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create())
.withDescription("The Path for input preferences file.").create();
- Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder);
+ Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder).create();
Option helpOpt = DefaultOptionCreator.helpOption(obuilder);
Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt)
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/SlopeOnePrefsToDiffsJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/SlopeOnePrefsToDiffsJob.java?rev=825516&r1=825515&r2=825516&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/SlopeOnePrefsToDiffsJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/SlopeOnePrefsToDiffsJob.java Thu Oct 15 15:27:40 2009
@@ -62,7 +62,7 @@
.withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create())
.withDescription("The Path for input preferences file.").create();
- Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder);
+ Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder).create();
Option helpOpt = DefaultOptionCreator.helpOption(obuilder);
Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt)
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/JobExecutor.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/JobExecutor.java?rev=825516&r1=825515&r2=825516&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/JobExecutor.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/JobExecutor.java Thu Oct 15 15:27:40 2009
@@ -47,8 +47,8 @@
ArgumentBuilder abuilder = new ArgumentBuilder();
GroupBuilder gbuilder = new GroupBuilder();
- Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder);
- Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder);
+ Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder).create();
+ Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder).create();
Option helpOpt = DefaultOptionCreator.helpOption(obuilder);
Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt)
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java?rev=825516&r1=825515&r2=825516&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java Thu Oct 15 15:27:40 2009
@@ -68,10 +68,10 @@
ArgumentBuilder abuilder = new ArgumentBuilder();
GroupBuilder gbuilder = new GroupBuilder();
- Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder);
- Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder);
- Option maxIterOpt = DefaultOptionCreator.maxIterOption(obuilder, abuilder);
- Option topicsOpt = DefaultOptionCreator.kOption(obuilder, abuilder);
+ Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder).create();
+ Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder).create();
+ Option maxIterOpt = DefaultOptionCreator.maxIterOption(obuilder, abuilder).create();
+ Option topicsOpt = DefaultOptionCreator.kOption(obuilder, abuilder).create();
Option helpOpt = DefaultOptionCreator.helpOption(obuilder);
Option mOpt = obuilder.withLongName("alpha").withRequired(true).withShortName("m").
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletJob.java?rev=825516&r1=825515&r2=825516&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletJob.java Thu Oct 15 15:27:40 2009
@@ -47,10 +47,10 @@
ArgumentBuilder abuilder = new ArgumentBuilder();
GroupBuilder gbuilder = new GroupBuilder();
- Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder);
- Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder);
- Option maxIterOpt = DefaultOptionCreator.maxIterOption(obuilder, abuilder);
- Option topicsOpt = DefaultOptionCreator.kOption(obuilder, abuilder);
+ Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder).create();
+ Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder).create();
+ Option maxIterOpt = DefaultOptionCreator.maxIterOption(obuilder, abuilder).create();
+ Option topicsOpt = DefaultOptionCreator.kOption(obuilder, abuilder).create();
Option helpOpt = DefaultOptionCreator.helpOption(obuilder);
Option mOpt = obuilder.withLongName("alpha").withRequired(true).withShortName("m").
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java?rev=825516&r1=825515&r2=825516&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java Thu Oct 15 15:27:40 2009
@@ -49,11 +49,11 @@
ArgumentBuilder abuilder = new ArgumentBuilder();
GroupBuilder gbuilder = new GroupBuilder();
- Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder);
- Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder);
- Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption(obuilder, abuilder);
- Option measureClassOpt = DefaultOptionCreator.distanceOption(obuilder, abuilder);
- Option maxIterOpt = DefaultOptionCreator.maxIterOption(obuilder, abuilder);
+ Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder).create();
+ Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder).create();
+ Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption(obuilder, abuilder).create();
+ Option measureClassOpt = DefaultOptionCreator.distanceOption(obuilder, abuilder).create();
+ Option maxIterOpt = DefaultOptionCreator.maxIterOption(obuilder, abuilder).create();
Option helpOpt = DefaultOptionCreator.helpOption(obuilder);
Option clustersOpt = obuilder.withLongName("clusters").withRequired(true).withShortName("c").
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java?rev=825516&r1=825515&r2=825516&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java Thu Oct 15 15:27:40 2009
@@ -53,9 +53,9 @@
ArgumentBuilder abuilder = new ArgumentBuilder();
GroupBuilder gbuilder = new GroupBuilder();
- Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder);
- Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder);
- Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption(obuilder, abuilder);
+ Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder).create();
+ Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder).create();
+ Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption(obuilder, abuilder).create();
Option helpOpt = DefaultOptionCreator.helpOption(obuilder);
Option modelOpt = obuilder.withLongName("distanceClass").withRequired(true).withShortName("d").
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyJob.java?rev=825516&r1=825515&r2=825516&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyJob.java Thu Oct 15 15:27:40 2009
@@ -50,10 +50,10 @@
ArgumentBuilder abuilder = new ArgumentBuilder();
GroupBuilder gbuilder = new GroupBuilder();
- Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder);
- Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder);
- Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption(obuilder, abuilder);
- Option maxIterOpt = DefaultOptionCreator.maxIterOption(obuilder, abuilder);
+ Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder).create();
+ Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder).create();
+ Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption(obuilder, abuilder).create();
+ Option maxIterOpt = DefaultOptionCreator.maxIterOption(obuilder, abuilder).create();
Option helpOpt = DefaultOptionCreator.helpOption(obuilder);
Option modelOpt = obuilder.withLongName("distanceClass").withRequired(true).withShortName("d").
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java?rev=825516&r1=825515&r2=825516&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java Thu Oct 15 15:27:40 2009
@@ -28,92 +28,84 @@
/**
* Returns a default command line option for convergence delta specification.
*/
- public static Option convergenceOption(
+ public static DefaultOptionBuilder convergenceOption(
final DefaultOptionBuilder obuilder, final ArgumentBuilder abuilder) {
- Option convergenceDeltaOpt = obuilder.withLongName("convergencedelta")
+ return obuilder.withLongName("convergencedelta")
.withRequired(true).withShortName("v").withArgument(
abuilder.withName("convergenceDelta").withMinimum(1).withMaximum(1)
- .create()).withDescription("The convergence delta value.")
- .create();
- return convergenceDeltaOpt;
+ .create()).withDescription("The convergence delta value.");
}
/**
* Returns a default command line option for output directory specification.
*/
- public static Option outputOption(final DefaultOptionBuilder obuilder,
+ public static DefaultOptionBuilder outputOption(final DefaultOptionBuilder obuilder,
final ArgumentBuilder abuilder) {
- Option outputOpt = obuilder.withLongName("output").withRequired(true)
+ return obuilder.withLongName("output").withRequired(true)
.withShortName("o").withArgument(
abuilder.withName("output").withMinimum(1).withMaximum(1).create())
- .withDescription("The directory pathname for output.").create();
- return outputOpt;
+ .withDescription("The directory pathname for output.");
}
/**
* Returns a default command line option for input directory specification.
*/
- public static Option inputOption(final DefaultOptionBuilder obuilder,
+ public static DefaultOptionBuilder inputOption(final DefaultOptionBuilder obuilder,
final ArgumentBuilder abuilder) {
- Option inputOpt = obuilder
+ return obuilder
.withLongName("input")
.withRequired(true)
.withShortName("i")
.withArgument(
abuilder.withName("input").withMinimum(1).withMaximum(1).create())
.withDescription(
- "The Path for input Vectors. Must be a SequenceFile of Writable, Vector.")
- .create();
- return inputOpt;
+ "The Path for input Vectors. Must be a SequenceFile of Writable, Vector.");
}
/**
* Returns a default command line option for specification of numbers of
* clusters to create.
*/
- public static Option kOption(DefaultOptionBuilder obuilder,
+ public static DefaultOptionBuilder kOption(DefaultOptionBuilder obuilder,
ArgumentBuilder abuilder) {
- Option clustersOpt = obuilder
+ return obuilder
.withLongName("k")
- .withRequired(false)
+ .withRequired(true)
.withArgument(
abuilder.withName("k").withMinimum(1).withMaximum(1).create())
.withDescription(
"The k in k-Means. If specified, then a random selection of k Vectors will be chosen as the Centroid and written to the clusters output path.")
- .withShortName("k").create();
- return clustersOpt;
+ .withShortName("k");
}
/**
* Returns a default command line option for specification of max number of
* iterations.
*/
- public static Option maxIterOption(DefaultOptionBuilder obuilder,
+ public static DefaultOptionBuilder maxIterOption(DefaultOptionBuilder obuilder,
ArgumentBuilder abuilder) {
- Option maxIterOpt = obuilder
+ return obuilder
.withLongName("maxIter")
.withRequired(true)
.withShortName("x")
.withArgument(
abuilder.withName("maxIter").withMinimum(1).withMaximum(1).create())
- .withDescription("The maximum number of iterations.").create();
- return maxIterOpt;
+ .withDescription("The maximum number of iterations.");
}
/**
* Returns a default command line option for specification of distance measure
* class to use.
*/
- public static Option distanceOption(DefaultOptionBuilder obuilder,
+ public static DefaultOptionBuilder distanceOption(DefaultOptionBuilder obuilder,
ArgumentBuilder abuilder) {
- Option measureClassOpt = obuilder
+ return obuilder
.withLongName("measure")
.withRequired(true)
.withShortName("d")
.withArgument(
abuilder.withName("measure").withMinimum(1).withMaximum(1).create())
- .withDescription("The classname of the DistanceMeasure.").create();
- return measureClassOpt;
+ .withDescription("The classname of the DistanceMeasure.");
}
/**
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputDriver.java?rev=825516&r1=825515&r2=825516&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputDriver.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputDriver.java Thu Oct 15 15:27:40 2009
@@ -17,6 +17,14 @@
package org.apache.mahout.clustering.syntheticcontrol.canopy;
+import org.apache.commons.cli2.CommandLine;
+import org.apache.commons.cli2.Group;
+import org.apache.commons.cli2.Option;
+import org.apache.commons.cli2.OptionException;
+import org.apache.commons.cli2.builder.ArgumentBuilder;
+import org.apache.commons.cli2.builder.DefaultOptionBuilder;
+import org.apache.commons.cli2.builder.GroupBuilder;
+import org.apache.commons.cli2.commandline.Parser;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
@@ -25,20 +33,53 @@
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.apache.log4j.Logger;
+import org.apache.mahout.common.CommandLineUtil;
+import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.matrix.Vector;
import java.io.IOException;
public class InputDriver {
+ /**Logger for this class.*/
+ private static final Logger LOG = Logger.getLogger(InputDriver.class);
+
private InputDriver() {
}
public static void main(String[] args) throws IOException, ClassNotFoundException {
- String input = args[0];
- String output = args[1];
- String vectorClassName = args[2];
- Class<? extends Vector> vectorClass = (Class<? extends Vector>) Class.forName(vectorClassName);
- runJob(input, output, vectorClass);
+ DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
+ ArgumentBuilder abuilder = new ArgumentBuilder();
+ GroupBuilder gbuilder = new GroupBuilder();
+
+ Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder).withRequired(false).create();
+ Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder).withRequired(false).create();
+ Option vectorOpt = obuilder.withLongName("vector").withRequired(false).withArgument(
+ abuilder.withName("v").withMinimum(1).withMaximum(1).create()).withDescription(
+ "The vector implementation to use.").withShortName("v").create();
+
+ Option helpOpt = DefaultOptionCreator.helpOption(obuilder);
+
+ Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(vectorOpt).withOption(helpOpt).create();
+
+ try {
+ Parser parser = new Parser();
+ parser.setGroup(group);
+ CommandLine cmdLine = parser.parse(args);
+ if (cmdLine.hasOption(helpOpt)) {
+ CommandLineUtil.printHelp(group);
+ return;
+ }
+
+ String input = cmdLine.getValue(inputOpt, "testdata").toString();
+ String output = cmdLine.getValue(outputOpt, "output").toString();
+ String vectorClassName = cmdLine.getValue(vectorOpt).toString();
+ Class<? extends Vector> vectorClass = (Class<? extends Vector>) Class.forName(vectorClassName);
+ runJob(input, output, vectorClass);
+ } catch (OptionException e) {
+ LOG.error("Exception parsing command line: ", e);
+ CommandLineUtil.printHelp(group);
+ }
}
public static void runJob(String input, String output, Class<? extends Vector> vectorClass) throws IOException {
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java?rev=825516&r1=825515&r2=825516&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java Thu Oct 15 15:27:40 2009
@@ -19,32 +19,92 @@
import java.io.IOException;
+import org.apache.commons.cli2.CommandLine;
+import org.apache.commons.cli2.Group;
+import org.apache.commons.cli2.Option;
+import org.apache.commons.cli2.OptionException;
+import org.apache.commons.cli2.builder.ArgumentBuilder;
+import org.apache.commons.cli2.builder.DefaultOptionBuilder;
+import org.apache.commons.cli2.builder.GroupBuilder;
+import org.apache.commons.cli2.commandline.Parser;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
+import org.apache.log4j.Logger;
import org.apache.mahout.clustering.canopy.CanopyClusteringJob;
import org.apache.mahout.clustering.syntheticcontrol.Constants;
+import org.apache.mahout.common.CommandLineUtil;
import org.apache.mahout.matrix.Vector;
-import org.apache.mahout.matrix.SparseVector;
public class Job {
+ /** Logger for this class.*/
+ private static final Logger LOG = Logger.getLogger(Job.class);
+
private Job() {
}
public static void main(String[] args) throws IOException, ClassNotFoundException {
- if (args.length == 5) {
- String input = args[0];
- String output = args[1];
- String measureClassName = args[2];
- double t1 = Double.parseDouble(args[3]);
- double t2 = Double.parseDouble(args[4]);
- String vectorClassName = args[5];
- Class<? extends Vector> vectorClass = (Class<? extends Vector>) Class.forName(vectorClassName);
- runJob(input, output, measureClassName, t1, t2, vectorClass);
- } else
- runJob("testdata", "output",
- "org.apache.mahout.common.distance.EuclideanDistanceMeasure", 80, 55, SparseVector.class);
+ DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
+ ArgumentBuilder abuilder = new ArgumentBuilder();
+ GroupBuilder gbuilder = new GroupBuilder();
+
+ Option inputOpt = obuilder.withLongName("input").withRequired(false).withArgument(
+ abuilder.withName("input").withMinimum(1).withMaximum(1).create()).
+ withDescription("The Path for input Vectors. Must be a SequenceFile of Writable, Vector").withShortName("i").create();
+ Option outputOpt = obuilder.withLongName("output").withRequired(false).withArgument(
+ abuilder.withName("output").withMinimum(1).withMaximum(1).create()).
+ withDescription("The Path to put the output in").withShortName("o").create();
+
+ Option measureClassOpt = obuilder.withLongName("distance").withRequired(false).withArgument(
+ abuilder.withName("distance").withMinimum(1).withMaximum(1).create()).
+ withDescription("The Distance Measure to use. Default is SquaredEuclidean").withShortName("m").create();
+ Option vectorClassOpt = obuilder.withLongName("vectorClass").withRequired(false).withArgument(
+ abuilder.withName("vectorClass").withMinimum(1).withMaximum(1).create()).
+ withDescription("The Vector implementation class name. Default is SparseVector.class").withShortName("v").create();
+
+ Option t1Opt = obuilder.withLongName("t1").withRequired(false).withArgument(
+ abuilder.withName("t1").withMinimum(1).withMaximum(1).create()).
+ withDescription("t1").withShortName("t1").create();
+ Option t2Opt = obuilder.withLongName("t2").withRequired(false).withArgument(
+ abuilder.withName("t2").withMinimum(1).withMaximum(1).create()).
+ withDescription("t2").withShortName("t2").create();
+
+
+ Option helpOpt = obuilder.withLongName("help").
+ withDescription("Print out help").withShortName("h").create();
+
+ Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt)
+ .withOption(measureClassOpt).withOption(vectorClassOpt)
+ .withOption(t1Opt).withOption(t2Opt)
+ .withOption(helpOpt).create();
+
+
+ try {
+ Parser parser = new Parser();
+ parser.setGroup(group);
+ CommandLine cmdLine = parser.parse(args);
+
+ if (cmdLine.hasOption(helpOpt)) {
+ CommandLineUtil.printHelp(group);
+ return;
+ }
+
+ String input = cmdLine.getValue(inputOpt, "testdata").toString();
+ String output = cmdLine.getValue(outputOpt, "output").toString();
+ String measureClass = cmdLine.getValue(
+ measureClassOpt, "org.apache.mahout.common.distance.EuclideanDistanceMeasure").toString();
+
+ Class<? extends Vector> vectorClass = (Class<? extends Vector>) Class.forName(
+ cmdLine.getValue(vectorClassOpt, "org.apache.mahout.matrix.SparseVector").toString());
+ double t1 = Double.parseDouble(cmdLine.getValue(t1Opt, "80").toString());
+ double t2 = Double.parseDouble(cmdLine.getValue(t2Opt, "55").toString());
+
+ runJob(input, output, measureClass, t1, t2, vectorClass);
+ } catch (OptionException e) {
+ LOG.error("Exception", e);
+ CommandLineUtil.printHelp(group);
+ }
}
/**
@@ -58,14 +118,20 @@
* resides in a directory named "testdata", and writes output to a directory
* named "output".
*
- * @param input the String denoting the input directory path
- * @param output the String denoting the output directory path
- * @param measureClassName the String class name of the DistanceMeasure to use
- * @param t1 the canopy T1 threshold
- * @param t2 the canopy T2 threshold
+ * @param input
+ * the String denoting the input directory path
+ * @param output
+ * the String denoting the output directory path
+ * @param measureClassName
+ * the String class name of the DistanceMeasure to use
+ * @param t1
+ * the canopy T1 threshold
+ * @param t2
+ * the canopy T2 threshold
*/
private static void runJob(String input, String output,
- String measureClassName, double t1, double t2, Class<? extends Vector> vectorClass) throws IOException {
+ String measureClassName, double t1, double t2,
+ Class<? extends Vector> vectorClass) throws IOException {
JobClient client = new JobClient();
JobConf conf = new JobConf(Job.class);
@@ -74,10 +140,11 @@
FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
if (dfs.exists(outPath))
dfs.delete(outPath, true);
- final String directoryContainingConvertedInput = output + Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT;
+ final String directoryContainingConvertedInput = output
+ + Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT;
InputDriver.runJob(input, directoryContainingConvertedInput, vectorClass);
- CanopyClusteringJob.runJob(directoryContainingConvertedInput, output, measureClassName,
- t1, t2, vectorClass);
+ CanopyClusteringJob.runJob(directoryContainingConvertedInput, output,
+ measureClassName, t1, t2, vectorClass);
}
}
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java?rev=825516&r1=825515&r2=825516&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java Thu Oct 15 15:27:40 2009
@@ -21,9 +21,18 @@
import java.util.ArrayList;
import java.util.List;
+import org.apache.commons.cli2.CommandLine;
+import org.apache.commons.cli2.Group;
+import org.apache.commons.cli2.Option;
+import org.apache.commons.cli2.OptionException;
+import org.apache.commons.cli2.builder.ArgumentBuilder;
+import org.apache.commons.cli2.builder.DefaultOptionBuilder;
+import org.apache.commons.cli2.builder.GroupBuilder;
+import org.apache.commons.cli2.commandline.Parser;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;
+import org.apache.log4j.Logger;
import org.apache.mahout.clustering.dirichlet.DirichletCluster;
import org.apache.mahout.clustering.dirichlet.DirichletDriver;
import org.apache.mahout.clustering.dirichlet.DirichletJob;
@@ -31,37 +40,75 @@
import org.apache.mahout.clustering.dirichlet.models.Model;
import org.apache.mahout.clustering.kmeans.KMeansDriver;
import org.apache.mahout.clustering.syntheticcontrol.canopy.InputDriver;
+import org.apache.mahout.common.CommandLineUtil;
+import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.matrix.Vector;
import org.apache.mahout.matrix.SparseVector;
import static org.apache.mahout.clustering.syntheticcontrol.Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT;
public class Job {
-
-
+
+ /**Logger for this class.*/
+ private static final Logger LOG = Logger.getLogger(Job.class);
+
private Job() {
}
public static void main(String[] args) throws IOException,
ClassNotFoundException, InstantiationException, IllegalAccessException {
- if (args.length == 7) {
- String input = args[0];
- String output = args[1];
- String modelFactory = args[2];
- int numClusters = Integer.parseInt(args[3]);
- int maxIterations = Integer.parseInt(args[4]);
- double alpha_0 = Double.parseDouble(args[5]);
- int numReducers = Integer.parseInt(args[6]);
- String vectorClassName = args[7];
+ DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
+ ArgumentBuilder abuilder = new ArgumentBuilder();
+ GroupBuilder gbuilder = new GroupBuilder();
+
+ Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder).withRequired(false).create();
+ Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder).withRequired(false).create();
+ Option maxIterOpt = DefaultOptionCreator.maxIterOption(obuilder, abuilder).withRequired(false).create();
+ Option topicsOpt = DefaultOptionCreator.kOption(obuilder, abuilder).withRequired(false).create();
+
+ Option redOpt = obuilder.withLongName("reducerNum").withRequired(false).withArgument(
+ abuilder.withName("r").withMinimum(1).withMaximum(1).create()).withDescription(
+ "The number of reducers to use.").withShortName("r").create();
+
+ Option vectorOpt = obuilder.withLongName("vector").withRequired(false).withArgument(
+ abuilder.withName("v").withMinimum(1).withMaximum(1).create()).withDescription(
+ "The vector implementation to use.").withShortName("v").create();
+
+ Option mOpt = obuilder.withLongName("alpha").withRequired(false).withShortName("m").
+ withArgument(abuilder.withName("alpha").withMinimum(1).withMaximum(1).create()).
+ withDescription("The alpha0 value for the DirichletDistribution.").create();
+
+ Option modelOpt = obuilder.withLongName("modelClass").withRequired(false).withShortName("d").
+ withArgument(abuilder.withName("modelClass").withMinimum(1).withMaximum(1).create()).
+ withDescription("The ModelDistribution class name.").create();
+ Option helpOpt = DefaultOptionCreator.helpOption(obuilder);
+
+ Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(modelOpt).
+ withOption(maxIterOpt).withOption(mOpt).withOption(topicsOpt).withOption(redOpt).withOption(helpOpt).create();
+
+ try {
+ Parser parser = new Parser();
+ parser.setGroup(group);
+ CommandLine cmdLine = parser.parse(args);
+ if (cmdLine.hasOption(helpOpt)) {
+ CommandLineUtil.printHelp(group);
+ return;
+ }
+
+ String input = cmdLine.getValue(inputOpt, "testdata").toString();
+ String output = cmdLine.getValue(outputOpt, "output").toString();
+ String modelFactory = cmdLine.getValue(modelOpt, "org.apache.mahout.clustering.syntheticcontrol.dirichlet.NormalScModelDistribution").toString();
+ int numModels = Integer.parseInt(cmdLine.getValue(topicsOpt, "10").toString());
+ int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterOpt, "5").toString());
+ double alpha_0 = Double.parseDouble(cmdLine.getValue(mOpt, "1.0").toString());
+ int numReducers = Integer.parseInt(cmdLine.getValue(redOpt, "1").toString());
+ String vectorClassName = cmdLine.getValue(vectorOpt, "org.apache.mahout.matrix.SparseVector").toString();
Class<? extends Vector> vectorClass = (Class<? extends Vector>) Class.forName(vectorClassName);
- runJob(input, output, modelFactory, numClusters, maxIterations, alpha_0,
- numReducers, vectorClass);
- } else
- runJob(
- "testdata",
- "output",
- "org.apache.mahout.clustering.syntheticcontrol.dirichlet.NormalScModelDistribution",
- 10, 5, 1.0, 1, SparseVector.class);
+ runJob(input, output, modelFactory, numModels, maxIterations, alpha_0, numReducers, vectorClass);
+ } catch (OptionException e) {
+ LOG.error("Exception parsing command line: ", e);
+ CommandLineUtil.printHelp(group);
+ }
}
/**
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java?rev=825516&r1=825515&r2=825516&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java Thu Oct 15 15:27:40 2009
@@ -21,41 +21,93 @@
import java.io.IOException;
+import org.apache.commons.cli2.CommandLine;
+import org.apache.commons.cli2.Group;
+import org.apache.commons.cli2.Option;
+import org.apache.commons.cli2.OptionException;
+import org.apache.commons.cli2.builder.ArgumentBuilder;
+import org.apache.commons.cli2.builder.DefaultOptionBuilder;
+import org.apache.commons.cli2.builder.GroupBuilder;
+import org.apache.commons.cli2.commandline.Parser;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
+import org.apache.log4j.Logger;
import org.apache.mahout.clustering.canopy.CanopyClusteringJob;
import org.apache.mahout.clustering.canopy.CanopyDriver;
import org.apache.mahout.clustering.kmeans.KMeansDriver;
+import org.apache.mahout.clustering.kmeans.RandomSeedGenerator;
import org.apache.mahout.clustering.syntheticcontrol.canopy.InputDriver;
+import org.apache.mahout.common.CommandLineUtil;
+import org.apache.mahout.common.HadoopUtil;
+import org.apache.mahout.common.commandline.DefaultOptionCreator;
+import org.apache.mahout.common.distance.SquaredEuclideanDistanceMeasure;
import org.apache.mahout.matrix.SparseVector;
import org.apache.mahout.matrix.Vector;
public class Job {
+ /** Logger for this class.*/
+ private static final Logger LOG = Logger.getLogger(Job.class);
+
private Job() {
}
public static void main(String[] args) throws IOException,
ClassNotFoundException {
- if (args.length == 8) {
- String input = args[0];
- String output = args[1];
- String measureClass = args[2];
- double t1 = Double.parseDouble(args[3]);
- double t2 = Double.parseDouble(args[4]);
- double convergenceDelta = Double.parseDouble(args[5]);
- int maxIterations = Integer.parseInt(args[6]);
- String vectorClassName = args[7];
- Class<? extends Vector> vectorClass = (Class<? extends Vector>) Class
- .forName(vectorClassName);
- runJob(input, output, measureClass, t1, t2, convergenceDelta,
- maxIterations, vectorClass);
- } else
- runJob("testdata", "output",
- "org.apache.mahout.common.distance.EuclideanDistanceMeasure", 80, 55, 0.5, 10,
- SparseVector.class);
+ DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
+ ArgumentBuilder abuilder = new ArgumentBuilder();
+ GroupBuilder gbuilder = new GroupBuilder();
+
+ Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder).withRequired(false).create();
+ Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder).withRequired(false).create();
+ Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption(obuilder, abuilder).withRequired(false).create();
+ Option maxIterationsOpt = DefaultOptionCreator.maxIterOption(obuilder, abuilder).withRequired(false).create();
+
+ Option measureClassOpt = obuilder.withLongName("distance").withRequired(false).withArgument(
+ abuilder.withName("distance").withMinimum(1).withMaximum(1).create()).withDescription(
+ "The Distance Measure to use. Default is SquaredEuclidean").withShortName("m").create();
+
+ Option t1Opt = obuilder.withLongName("t1").withRequired(false).withArgument(
+ abuilder.withName("t1").withMinimum(1).withMaximum(1).create()).withDescription(
+ "The t1 value to use.").withShortName("m").create();
+ Option t2Opt = obuilder.withLongName("t2").withRequired(false).withArgument(
+ abuilder.withName("t2").withMinimum(1).withMaximum(1).create()).withDescription(
+ "The t2 value to use.").withShortName("m").create();
+ Option vectorClassOpt = obuilder.withLongName("vectorClass").withRequired(false).withArgument(
+ abuilder.withName("vectorClass").withMinimum(1).withMaximum(1).create()).withDescription(
+ "The Vector implementation class name. Default is SparseVector.class").withShortName("v").create();
+
+ Option helpOpt = DefaultOptionCreator.helpOption(obuilder);
+
+ Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt)
+ .withOption(measureClassOpt).withOption(convergenceDeltaOpt).withOption(maxIterationsOpt)
+ .withOption(vectorClassOpt).withOption(t1Opt).withOption(t2Opt).withOption(helpOpt).create();
+ try {
+ Parser parser = new Parser();
+ parser.setGroup(group);
+ CommandLine cmdLine = parser.parse(args);
+
+ if (cmdLine.hasOption(helpOpt)) {
+ CommandLineUtil.printHelp(group);
+ return;
+ }
+ String input = cmdLine.getValue(inputOpt, "testdata").toString();
+ String output = cmdLine.getValue(outputOpt, "output").toString();
+ String measureClass = cmdLine.getValue(measureClassOpt, "org.apache.mahout.common.distance.EuclideanDistanceMeasure").toString();
+ double t1 = Double.parseDouble(cmdLine.getValue(t1Opt, "80").toString());
+ double t2 = Double.parseDouble(cmdLine.getValue(t2Opt, "55").toString());
+ double convergenceDelta = Double.parseDouble(cmdLine.getValue(convergenceDeltaOpt, "0.5").toString());
+ int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterationsOpt, 10).toString());
+ Class<? extends Vector> vectorClass = (Class<? extends Vector>) Class.forName(
+ cmdLine.getValue(vectorClassOpt, "org.apache.mahout.matrix.SparseVector").toString());
+
+ runJob(input, output, measureClass, t1, t2, convergenceDelta, maxIterations, vectorClass);
+ } catch (OptionException e) {
+ LOG.error("Exception", e);
+ CommandLineUtil.printHelp(group);
+ }
}
/**
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputDriver.java?rev=825516&r1=825515&r2=825516&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputDriver.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputDriver.java Thu Oct 15 15:27:40 2009
@@ -19,6 +19,14 @@
import java.io.IOException;
+import org.apache.commons.cli2.CommandLine;
+import org.apache.commons.cli2.Group;
+import org.apache.commons.cli2.Option;
+import org.apache.commons.cli2.OptionException;
+import org.apache.commons.cli2.builder.ArgumentBuilder;
+import org.apache.commons.cli2.builder.DefaultOptionBuilder;
+import org.apache.commons.cli2.builder.GroupBuilder;
+import org.apache.commons.cli2.commandline.Parser;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
@@ -27,14 +35,45 @@
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.apache.log4j.Logger;
import org.apache.mahout.clustering.meanshift.MeanShiftCanopy;
+import org.apache.mahout.common.CommandLineUtil;
+import org.apache.mahout.common.commandline.DefaultOptionCreator;
+import org.apache.mahout.matrix.Vector;
public class InputDriver {
+ /**Logger for this class.*/
+ private static final Logger LOG = Logger.getLogger(InputDriver.class);
+
private InputDriver() {
}
public static void main(String[] args) throws IOException {
- runJob(args[0], args[1]);
+ DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
+ ArgumentBuilder abuilder = new ArgumentBuilder();
+ GroupBuilder gbuilder = new GroupBuilder();
+
+ Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder).withRequired(false).create();
+ Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder).withRequired(false).create();
+ Option helpOpt = DefaultOptionCreator.helpOption(obuilder);
+ Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(helpOpt).create();
+
+ try {
+ Parser parser = new Parser();
+ parser.setGroup(group);
+ CommandLine cmdLine = parser.parse(args);
+ if (cmdLine.hasOption(helpOpt)) {
+ CommandLineUtil.printHelp(group);
+ return;
+ }
+
+ String input = cmdLine.getValue(inputOpt, "testdata").toString();
+ String output = cmdLine.getValue(outputOpt, "output").toString();
+ runJob(input, output);
+ } catch (OptionException e) {
+ LOG.error("Exception parsing command line: ", e);
+ CommandLineUtil.printHelp(group);
+ }
}
public static void runJob(String input, String output) throws IOException {
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java?rev=825516&r1=825515&r2=825516&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java Thu Oct 15 15:27:40 2009
@@ -21,33 +21,83 @@
import java.io.IOException;
+import org.apache.commons.cli2.CommandLine;
+import org.apache.commons.cli2.Group;
+import org.apache.commons.cli2.Option;
+import org.apache.commons.cli2.OptionException;
+import org.apache.commons.cli2.builder.ArgumentBuilder;
+import org.apache.commons.cli2.builder.DefaultOptionBuilder;
+import org.apache.commons.cli2.builder.GroupBuilder;
+import org.apache.commons.cli2.commandline.Parser;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
+import org.apache.log4j.Logger;
import org.apache.mahout.clustering.meanshift.MeanShiftCanopyJob;
+import org.apache.mahout.common.CommandLineUtil;
+import org.apache.mahout.common.commandline.DefaultOptionCreator;
public class Job {
- static final String CLUSTERED_POINTS_OUTPUT_DIRECTORY = "/clusteredPoints";
+ /** Logger for this class. */
+ private static final Logger LOG = Logger.getLogger(Job.class);
+
+ private static final String CLUSTERED_POINTS_OUTPUT_DIRECTORY = "/clusteredPoints";
private Job() {
}
public static void main(String[] args) throws IOException {
- if (args.length == 7) {
- String input = args[0];
- String output = args[1];
- String measureClassName = args[2];
- double t1 = Double.parseDouble(args[3]);
- double t2 = Double.parseDouble(args[4]);
- double convergenceDelta = Double.parseDouble(args[5]);
- int maxIterations = Integer.parseInt(args[6]);
+ DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
+ ArgumentBuilder abuilder = new ArgumentBuilder();
+ GroupBuilder gbuilder = new GroupBuilder();
+
+ Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder).create();
+ Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder).create();
+ Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption(obuilder, abuilder).create();
+ Option maxIterOpt = DefaultOptionCreator.maxIterOption(obuilder, abuilder).create();
+ Option helpOpt = DefaultOptionCreator.helpOption(obuilder);
+
+ Option modelOpt = obuilder.withLongName("distanceClass").withRequired(true).withShortName("d").
+ withArgument(abuilder.withName("distanceClass").withMinimum(1).withMaximum(1).create()).
+ withDescription("The distance measure class name.").create();
+
+
+ Option threshold1Opt = obuilder.withLongName("threshold_1").withRequired(true).withShortName("t1").
+ withArgument(abuilder.withName("threshold_1").withMinimum(1).withMaximum(1).create()).
+ withDescription("The T1 distance threshold.").create();
+
+ Option threshold2Opt = obuilder.withLongName("threshold_2").withRequired(true).withShortName("t2").
+ withArgument(abuilder.withName("threshold_2").withMinimum(1).withMaximum(1).create()).
+ withDescription("The T1 distance threshold.").create();
+
+ Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(modelOpt).
+ withOption(helpOpt).withOption(convergenceDeltaOpt).withOption(threshold1Opt).withOption(maxIterOpt).
+ withOption(threshold2Opt).create();
+
+ try {
+ Parser parser = new Parser();
+ parser.setGroup(group);
+ CommandLine cmdLine = parser.parse(args);
+ if (cmdLine.hasOption(helpOpt)) {
+ CommandLineUtil.printHelp(group);
+ return;
+ }
+
+ String input = cmdLine.getValue(inputOpt, "testdata").toString();
+ String output = cmdLine.getValue(outputOpt, "output").toString();
+ String measureClassName = cmdLine.getValue(modelOpt, "org.apache.mahout.common.distance.EuclideanDistanceMeasure").toString();
+ double t1 = Double.parseDouble(cmdLine.getValue(threshold1Opt, "47.6").toString());
+ double t2 = Double.parseDouble(cmdLine.getValue(threshold2Opt, "1").toString());
+ double convergenceDelta = Double.parseDouble(cmdLine.getValue(convergenceDeltaOpt, "0.5").toString());
+ int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterOpt, "10").toString());
runJob(input, output, measureClassName, t1, t2, convergenceDelta,
maxIterations);
- } else
- runJob("testdata", "output",
- "org.apache.mahout.common.distance.EuclideanDistanceMeasure", 47.6, 1, 0.5, 10);
+ } catch (OptionException e) {
+ LOG.error("Exception parsing command line: ", e);
+ CommandLineUtil.printHelp(group);
+ }
}
/**
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputDriver.java?rev=825516&r1=825515&r2=825516&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputDriver.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/OutputDriver.java Thu Oct 15 15:27:40 2009
@@ -19,6 +19,14 @@
import java.io.IOException;
+import org.apache.commons.cli2.CommandLine;
+import org.apache.commons.cli2.Group;
+import org.apache.commons.cli2.Option;
+import org.apache.commons.cli2.OptionException;
+import org.apache.commons.cli2.builder.ArgumentBuilder;
+import org.apache.commons.cli2.builder.DefaultOptionBuilder;
+import org.apache.commons.cli2.builder.GroupBuilder;
+import org.apache.commons.cli2.commandline.Parser;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
@@ -27,13 +35,43 @@
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
+import org.apache.log4j.Logger;
+import org.apache.mahout.common.CommandLineUtil;
+import org.apache.mahout.common.commandline.DefaultOptionCreator;
public class OutputDriver {
+ /** Logger for this class. */
+ private static final Logger LOG = Logger.getLogger(OutputDriver.class);
+
private OutputDriver() {
}
public static void main(String[] args) throws IOException {
- runJob(args[0], args[1]);
+ DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
+ ArgumentBuilder abuilder = new ArgumentBuilder();
+ GroupBuilder gbuilder = new GroupBuilder();
+
+ Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder).withRequired(false).create();
+ Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder).withRequired(false).create();
+ Option helpOpt = DefaultOptionCreator.helpOption(obuilder);
+ Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(helpOpt).create();
+
+ try {
+ Parser parser = new Parser();
+ parser.setGroup(group);
+ CommandLine cmdLine = parser.parse(args);
+ if (cmdLine.hasOption(helpOpt)) {
+ CommandLineUtil.printHelp(group);
+ return;
+ }
+
+ String input = cmdLine.getValue(inputOpt, "testdata").toString();
+ String output = cmdLine.getValue(outputOpt, "output").toString();
+ runJob(input, output);
+ } catch (OptionException e) {
+ LOG.error("Exception parsing command line: ", e);
+ CommandLineUtil.printHelp(group);
+ }
}
public static void runJob(String input, String output) throws IOException {