You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by je...@apache.org on 2010/05/12 21:08:09 UTC
svn commit: r943637 - in /mahout/trunk:
core/src/main/java/org/apache/mahout/clustering/canopy/
core/src/main/java/org/apache/mahout/clustering/dirichlet/
core/src/main/java/org/apache/mahout/clustering/meanshift/
examples/src/main/java/org/apache/maho...
Author: jeastman
Date: Wed May 12 19:08:08 2010
New Revision: 943637
URL: http://svn.apache.org/viewvc?rev=943637&view=rev
Log:
- added -w options to DirichletDriver, CanopyDriver and MeanShiftCanopyDriver
- added optional output deletion to driver main[] iff -w is set
- removed non-Hadoopable ClusterDumper calls from syntheticcontrol examples. All run again in Hadoop
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java?rev=943637&r1=943636&r2=943637&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java Wed May 12 19:08:08 2010
@@ -48,78 +48,81 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public final class CanopyDriver {
-
+
private static final Logger log = LoggerFactory.getLogger(CanopyDriver.class);
+
public static final String DEFAULT_CLUSTERED_POINTS_DIRECTORY = "clusteredPoints";
-
- private CanopyDriver() { }
-
+
+ private CanopyDriver() {
+ }
+
public static void main(String[] args) throws IOException {
DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
ArgumentBuilder abuilder = new ArgumentBuilder();
GroupBuilder gbuilder = new GroupBuilder();
-
+
Option inputOpt = obuilder.withLongName("input").withRequired(true).withArgument(
- abuilder.withName("input").withMinimum(1).withMaximum(1).create()).withDescription(
- "The Path for input Vectors. Must be a SequenceFile of Writable, Vector").withShortName("i").create();
-
+ abuilder.withName("input").withMinimum(1).withMaximum(1).create()).withDescription(
+ "The Path for input Vectors. Must be a SequenceFile of Writable, Vector").withShortName("i").create();
+
Option outputOpt = obuilder.withLongName("output").withRequired(true).withArgument(
- abuilder.withName("output").withMinimum(1).withMaximum(1).create()).withDescription(
- "The Path to put the output in").withShortName("o").create();
-
+ abuilder.withName("output").withMinimum(1).withMaximum(1).create()).withDescription("The Path to put the output in")
+ .withShortName("o").create();
+
+ Option overwriteOutput = obuilder.withLongName("overwrite").withRequired(false).withDescription(
+ "If set, overwrite the output directory").withShortName("w").create();
+
Option measureClassOpt = obuilder.withLongName("distance").withRequired(false).withArgument(
- abuilder.withName("distance").withMinimum(1).withMaximum(1).create()).withDescription(
- "The Distance Measure to use. Default is SquaredEuclidean").withShortName("m").create();
-
+ abuilder.withName("distance").withMinimum(1).withMaximum(1).create()).withDescription(
+ "The Distance Measure to use. Default is SquaredEuclidean").withShortName("m").create();
+
Option vectorClassOpt = obuilder.withLongName("vectorClass").withRequired(false).withArgument(
- abuilder.withName("vectorClass").withMinimum(1).withMaximum(1).create()).withDescription(
- "The Vector implementation class name. Default is RandomAccessSparseVector.class").withShortName("v")
- .create();
+ abuilder.withName("vectorClass").withMinimum(1).withMaximum(1).create()).withDescription(
+ "The Vector implementation class name. Default is RandomAccessSparseVector.class").withShortName("v").create();
Option t1Opt = obuilder.withLongName("t1").withRequired(true).withArgument(
- abuilder.withName("t1").withMinimum(1).withMaximum(1).create()).withDescription("t1").withShortName(
- "t1").create();
+ abuilder.withName("t1").withMinimum(1).withMaximum(1).create()).withDescription("t1").withShortName("t1").create();
Option t2Opt = obuilder.withLongName("t2").withRequired(true).withArgument(
- abuilder.withName("t2").withMinimum(1).withMaximum(1).create()).withDescription("t2").withShortName(
- "t2").create();
-
- Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
- .create();
-
- Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(
- measureClassOpt).withOption(vectorClassOpt).withOption(t1Opt).withOption(t2Opt).withOption(helpOpt)
- .create();
-
+ abuilder.withName("t2").withMinimum(1).withMaximum(1).create()).withDescription("t2").withShortName("t2").create();
+
+ Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h").create();
+
+ Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(overwriteOutput).withOption(
+ measureClassOpt).withOption(vectorClassOpt).withOption(t1Opt).withOption(t2Opt).withOption(helpOpt).create();
+
try {
Parser parser = new Parser();
parser.setGroup(group);
CommandLine cmdLine = parser.parse(args);
-
+
if (cmdLine.hasOption(helpOpt)) {
CommandLineUtil.printHelp(group);
return;
}
-
+
Path input = new Path(cmdLine.getValue(inputOpt).toString());
Path output = new Path(cmdLine.getValue(outputOpt).toString());
+ if (cmdLine.hasOption(overwriteOutput)) {
+ HadoopUtil.overwriteOutput(output);
+ }
String measureClass = SquaredEuclideanDistanceMeasure.class.getName();
if (cmdLine.hasOption(measureClassOpt)) {
measureClass = cmdLine.getValue(measureClassOpt).toString();
}
-
+
// Class<? extends Vector> vectorClass = cmdLine.hasOption(vectorClassOpt) == false ?
// RandomAccessSparseVector.class
// : (Class<? extends Vector>) Class.forName(cmdLine.getValue(vectorClassOpt).toString());
double t1 = Double.parseDouble(cmdLine.getValue(t1Opt).toString());
double t2 = Double.parseDouble(cmdLine.getValue(t2Opt).toString());
-
+
runJob(input, output, measureClass, t1, t2, false);
} catch (OptionException e) {
log.error("Exception", e);
CommandLineUtil.printHelp(group);
-
+
}
}
-
+
/**
* Run the job
*
@@ -136,39 +139,37 @@ public final class CanopyDriver {
* @param runClustering
* true if points are to be clustered after clusters are determined
*/
- public static void runJob(Path input, Path output,
- String measureClassName, double t1, double t2, boolean runClustering) throws IOException {
- log.info("Input: {} Out: {} "
- + "Measure: {} t1: {} t2: {}", new Object[] {input, output, measureClassName, t1, t2});
+ public static void runJob(Path input, Path output, String measureClassName, double t1, double t2, boolean runClustering)
+ throws IOException {
+ log.info("Input: {} Out: {} " + "Measure: {} t1: {} t2: {}", new Object[] { input, output, measureClassName, t1, t2 });
Configurable client = new JobClient();
JobConf conf = new JobConf(CanopyDriver.class);
conf.set(CanopyConfigKeys.DISTANCE_MEASURE_KEY, measureClassName);
conf.set(CanopyConfigKeys.T1_KEY, String.valueOf(t1));
conf.set(CanopyConfigKeys.T2_KEY, String.valueOf(t2));
-
+
conf.setInputFormat(SequenceFileInputFormat.class);
-
+
conf.setMapOutputKeyClass(Text.class);
conf.setMapOutputValueClass(VectorWritable.class);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(Canopy.class);
-
+
FileInputFormat.setInputPaths(conf, input);
-
+
Path canopyOutputDir = new Path(output, Cluster.CLUSTERS_DIR + '0');
FileOutputFormat.setOutputPath(conf, canopyOutputDir);
-
+
conf.setMapperClass(CanopyMapper.class);
conf.setReducerClass(CanopyReducer.class);
conf.setNumReduceTasks(1);
conf.setOutputFormat(SequenceFileOutputFormat.class);
-
+
client.setConf(conf);
- HadoopUtil.overwriteOutput(output);
JobClient.runJob(conf);
-
- if (runClustering){
+
+ if (runClustering) {
runClustering(input, canopyOutputDir, output, measureClassName, t1, t2);
}
}
@@ -189,36 +190,32 @@ public final class CanopyDriver {
* @param t2
* the T2 distance threshold
*/
- public static void runClustering(Path points,
- Path canopies,
- Path output,
- String measureClassName,
- double t1,
- double t2) throws IOException {
+ public static void runClustering(Path points, Path canopies, Path output, String measureClassName, double t1, double t2)
+ throws IOException {
Configurable client = new JobClient();
JobConf conf = new JobConf(CanopyDriver.class);
-
+
conf.set(CanopyConfigKeys.DISTANCE_MEASURE_KEY, measureClassName);
conf.set(CanopyConfigKeys.T1_KEY, String.valueOf(t1));
conf.set(CanopyConfigKeys.T2_KEY, String.valueOf(t2));
conf.set(CanopyConfigKeys.CANOPY_PATH_KEY, canopies.toString());
-
+
conf.setInputFormat(SequenceFileInputFormat.class);
conf.setOutputKeyClass(IntWritable.class);
conf.setOutputValueClass(WeightedVectorWritable.class);
conf.setOutputFormat(SequenceFileOutputFormat.class);
-
+
FileInputFormat.setInputPaths(conf, points);
Path outPath = new Path(output, DEFAULT_CLUSTERED_POINTS_DIRECTORY);
FileOutputFormat.setOutputPath(conf, outPath);
-
+
conf.setMapperClass(ClusterMapper.class);
conf.setReducerClass(IdentityReducer.class);
conf.setNumReduceTasks(0);
-
+
client.setConf(conf);
- HadoopUtil.overwriteOutput(outPath);
+ HadoopUtil.overwriteOutput(outPath);
JobClient.runJob(conf);
}
-
+
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java?rev=943637&r1=943636&r2=943637&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java Wed May 12 19:08:08 2010
@@ -45,6 +45,7 @@ import org.apache.mahout.clustering.Clus
import org.apache.mahout.clustering.WeightedVectorWritable;
import org.apache.mahout.clustering.dirichlet.models.VectorModelDistribution;
import org.apache.mahout.common.CommandLineUtil;
+import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
@@ -85,6 +86,9 @@ public class DirichletDriver {
Option topicsOpt = DefaultOptionCreator.kOption().create();
Option helpOpt = DefaultOptionCreator.helpOption();
+ Option overwriteOutput = obuilder.withLongName("overwrite").withRequired(false).withDescription(
+ "If set, overwrite the output directory").withShortName("w").create();
+
Option mOpt = obuilder.withLongName("alpha").withRequired(true).withShortName("m").withArgument(
abuilder.withName("alpha").withMinimum(1).withMaximum(1).create()).withDescription(
"The alpha0 value for the DirichletDistribution.").create();
@@ -116,9 +120,10 @@ public class DirichletDriver {
Option thresholdOpt = obuilder.withLongName("threshold").withRequired(false).withShortName("t").withArgument(
abuilder.withName("threshold").withMinimum(1).withMaximum(1).create()).withDescription("The pdf threshold").create();
- Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(modelOpt).withOption(
- prototypeOpt).withOption(sizeOpt).withOption(maxIterOpt).withOption(mOpt).withOption(topicsOpt).withOption(helpOpt)
- .withOption(numRedOpt).withOption(clusteringOpt).withOption(emitMostLikelyOpt).withOption(thresholdOpt).create();
+ Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(overwriteOutput).withOption(
+ modelOpt).withOption(prototypeOpt).withOption(sizeOpt).withOption(maxIterOpt).withOption(mOpt).withOption(topicsOpt)
+ .withOption(helpOpt).withOption(numRedOpt).withOption(clusteringOpt).withOption(emitMostLikelyOpt).withOption(thresholdOpt)
+ .create();
try {
Parser parser = new Parser();
@@ -131,6 +136,9 @@ public class DirichletDriver {
Path input = new Path(cmdLine.getValue(inputOpt).toString());
Path output = new Path(cmdLine.getValue(outputOpt).toString());
+ if (cmdLine.hasOption(overwriteOutput)) {
+ HadoopUtil.overwriteOutput(output);
+ }
String modelFactory = "org.apache.mahout.clustering.dirichlet.models.NormalModelDistribution";
if (cmdLine.hasOption(modelOpt)) {
modelFactory = cmdLine.getValue(modelOpt).toString();
@@ -241,9 +249,9 @@ public class DirichletDriver {
}
}
- private static void writeInitialState(Path output, Path stateIn, String modelFactory, String modelPrototype,
- int prototypeSize, int numModels, double alpha_0) throws ClassNotFoundException, InstantiationException,
- IllegalAccessException, IOException, SecurityException, NoSuchMethodException, InvocationTargetException {
+ private static void writeInitialState(Path output, Path stateIn, String modelFactory, String modelPrototype, int prototypeSize,
+ int numModels, double alpha_0) throws ClassNotFoundException, InstantiationException, IllegalAccessException, IOException,
+ SecurityException, NoSuchMethodException, InvocationTargetException {
DirichletState<VectorWritable> state = createState(modelFactory, modelPrototype, prototypeSize, numModels, alpha_0);
JobConf job = new JobConf(DirichletDriver.class);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java?rev=943637&r1=943636&r2=943637&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java Wed May 12 19:08:08 2010
@@ -43,6 +43,7 @@ import org.apache.mahout.clustering.Clus
import org.apache.mahout.clustering.WeightedVectorWritable;
import org.apache.mahout.clustering.fuzzykmeans.FuzzyKMeansDriver;
import org.apache.mahout.common.CommandLineUtil;
+import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -68,6 +69,9 @@ public final class MeanShiftCanopyDriver
Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption().create();
Option helpOpt = DefaultOptionCreator.helpOption();
Option maxIterOpt = DefaultOptionCreator.maxIterOption().create();
+ Option overwriteOutput = obuilder.withLongName("overwrite").withRequired(false).withDescription(
+ "If set, overwrite the output directory").withShortName("w").create();
+
Option inputIsCanopiesOpt = obuilder.withLongName("inputIsCanopies").withRequired(true).withShortName("i").withArgument(
abuilder.withName("inputIsCanopies").withMinimum(1).withMaximum(1).create()).withDescription(
"True if the input directory already contains MeanShiftCanopies").create();
@@ -87,9 +91,9 @@ public final class MeanShiftCanopyDriver
Option clusteringOpt = obuilder.withLongName("clustering").withRequired(false).withDescription(
"If true, run clustering after the iterations have taken place").withShortName("cl").create();
- Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(modelOpt).withOption(helpOpt)
- .withOption(convergenceDeltaOpt).withOption(threshold1Opt).withOption(threshold2Opt).withOption(clusteringOpt).withOption(
- maxIterOpt).withOption(inputIsCanopiesOpt).create();
+ Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(overwriteOutput).withOption(
+ modelOpt).withOption(helpOpt).withOption(convergenceDeltaOpt).withOption(threshold1Opt).withOption(threshold2Opt)
+ .withOption(clusteringOpt).withOption(maxIterOpt).withOption(inputIsCanopiesOpt).create();
try {
Parser parser = new Parser();
@@ -107,6 +111,9 @@ public final class MeanShiftCanopyDriver
Path input = new Path(cmdLine.getValue(inputOpt).toString());
Path output = new Path(cmdLine.getValue(outputOpt).toString());
String measureClassName = cmdLine.getValue(modelOpt).toString();
+ if (cmdLine.hasOption(overwriteOutput)) {
+ HadoopUtil.overwriteOutput(output);
+ }
double t1 = Double.parseDouble(cmdLine.getValue(threshold1Opt).toString());
double t2 = Double.parseDouble(cmdLine.getValue(threshold2Opt).toString());
double convergenceDelta = Double.parseDouble(cmdLine.getValue(convergenceDeltaOpt).toString());
@@ -281,8 +288,8 @@ public final class MeanShiftCanopyDriver
if (runClustering) {
// now cluster the points
- MeanShiftCanopyDriver.runClustering((inputIsCanopies ? input : new Path(output, Cluster.INITIAL_CLUSTERS_DIR)),
- clustersIn, new Path(output, Cluster.CLUSTERED_POINTS_DIR));
+ MeanShiftCanopyDriver.runClustering((inputIsCanopies ? input : new Path(output, Cluster.INITIAL_CLUSTERS_DIR)), clustersIn,
+ new Path(output, Cluster.CLUSTERED_POINTS_DIR));
}
}
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java?rev=943637&r1=943636&r2=943637&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java Wed May 12 19:08:08 2010
@@ -41,71 +41,68 @@ import org.slf4j.LoggerFactory;
public final class Job {
private static final Logger log = LoggerFactory.getLogger(Job.class);
-
- private Job() { }
-
+
+ private Job() {
+ }
+
public static void main(String[] args) throws Exception {
DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
ArgumentBuilder abuilder = new ArgumentBuilder();
GroupBuilder gbuilder = new GroupBuilder();
-
+
Option inputOpt = obuilder.withLongName("input").withRequired(false).withArgument(
- abuilder.withName("input").withMinimum(1).withMaximum(1).create()).withDescription(
- "The Path for input Vectors. Must be a SequenceFile of Writable, Vector").withShortName("i").create();
+ abuilder.withName("input").withMinimum(1).withMaximum(1).create()).withDescription(
+ "The Path for input Vectors. Must be a SequenceFile of Writable, Vector").withShortName("i").create();
Option outputOpt = obuilder.withLongName("output").withRequired(false).withArgument(
- abuilder.withName("output").withMinimum(1).withMaximum(1).create()).withDescription(
- "The Path to put the output in").withShortName("o").create();
-
+ abuilder.withName("output").withMinimum(1).withMaximum(1).create()).withDescription("The Path to put the output in")
+ .withShortName("o").create();
+
Option measureClassOpt = obuilder.withLongName("distance").withRequired(false).withArgument(
- abuilder.withName("distance").withMinimum(1).withMaximum(1).create()).withDescription(
- "The Distance Measure to use. Default is SquaredEuclidean").withShortName("m").create();
+ abuilder.withName("distance").withMinimum(1).withMaximum(1).create()).withDescription(
+ "The Distance Measure to use. Default is SquaredEuclidean").withShortName("m").create();
// Option vectorClassOpt = obuilder.withLongName("vectorClass").withRequired(false).withArgument(
// abuilder.withName("vectorClass").withMinimum(1).withMaximum(1).create()).
// withDescription("The Vector implementation class name. Default is RandomAccessSparseVector.class")
// .withShortName("v").create();
-
+
Option t1Opt = obuilder.withLongName("t1").withRequired(false).withArgument(
- abuilder.withName("t1").withMinimum(1).withMaximum(1).create()).withDescription("t1").withShortName(
- "t1").create();
+ abuilder.withName("t1").withMinimum(1).withMaximum(1).create()).withDescription("t1").withShortName("t1").create();
Option t2Opt = obuilder.withLongName("t2").withRequired(false).withArgument(
- abuilder.withName("t2").withMinimum(1).withMaximum(1).create()).withDescription("t2").withShortName(
- "t2").create();
-
- Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
- .create();
-
- Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(
- measureClassOpt)// .withOption(vectorClassOpt)
+ abuilder.withName("t2").withMinimum(1).withMaximum(1).create()).withDescription("t2").withShortName("t2").create();
+
+ Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h").create();
+
+ Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(measureClassOpt)// .withOption(vectorClassOpt)
.withOption(t1Opt).withOption(t2Opt).withOption(helpOpt).create();
-
+
try {
Parser parser = new Parser();
parser.setGroup(group);
CommandLine cmdLine = parser.parse(args);
-
+
if (cmdLine.hasOption(helpOpt)) {
CommandLineUtil.printHelp(group);
return;
}
-
+
Path input = new Path(cmdLine.getValue(inputOpt, "testdata").toString());
Path output = new Path(cmdLine.getValue(outputOpt, "output").toString());
- String measureClass = cmdLine.getValue(measureClassOpt,
- "org.apache.mahout.common.distance.EuclideanDistanceMeasure").toString();
-
+ String measureClass = cmdLine.getValue(measureClassOpt, "org.apache.mahout.common.distance.EuclideanDistanceMeasure")
+ .toString();
+
// String className = cmdLine.getValue(vectorClassOpt,
// "org.apache.mahout.math.RandomAccessSparseVector").toString();
// Class<? extends Vector> vectorClass = Class.forName(className).asSubclass(Vector.class);
double t1 = Double.parseDouble(cmdLine.getValue(t1Opt, "80").toString());
double t2 = Double.parseDouble(cmdLine.getValue(t2Opt, "55").toString());
-
+
runJob(input, output, measureClass, t1, t2);
} catch (OptionException e) {
Job.log.error("Exception", e);
CommandLineUtil.printHelp(group);
}
}
-
+
/**
* Run the canopy clustering job on an input dataset using the given distance measure, t1 and t2 parameters.
* All output data will be written to the output directory, which will be initially deleted if it exists.
@@ -127,23 +124,18 @@ public final class Job {
* @throws IllegalAccessException
* @throws InstantiationException
*/
- private static void runJob(Path input, Path output, String measureClassName,
- double t1, double t2) throws IOException, InstantiationException, IllegalAccessException {
+ private static void runJob(Path input, Path output, String measureClassName, double t1, double t2) throws IOException,
+ InstantiationException, IllegalAccessException {
JobClient client = new JobClient();
JobConf conf = new JobConf(Job.class);
-
+
client.setConf(conf);
HadoopUtil.overwriteOutput(output);
Path directoryContainingConvertedInput = new Path(output, Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT);
- InputDriver.runJob(input, directoryContainingConvertedInput,
- "org.apache.mahout.math.RandomAccessSparseVector");
+ InputDriver.runJob(input, directoryContainingConvertedInput, "org.apache.mahout.math.RandomAccessSparseVector");
CanopyDriver.runJob(directoryContainingConvertedInput, output, measureClassName, t1, t2, true);
-
- ClusterDumper clusterDumper =
- new ClusterDumper(new Path(output, "clusters-0"), new Path(output, "clusteredPoints"));
- clusterDumper.printClusters(null);
}
-
+
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java?rev=943637&r1=943636&r2=943637&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java Wed May 12 19:08:08 2010
@@ -151,11 +151,6 @@ public class Job {
InputDriver.runJob(input, directoryContainingConvertedInput, vectorClassName);
DirichletDriver.runJob(directoryContainingConvertedInput, output, modelFactory,
vectorClassName, 60, numModels, maxIterations, alpha_0, numReducers, true, true, 0);
-
- ClusterDumper clusterDumper =
- new ClusterDumper(new Path(output, "clusters-5"), new Path(output, "clusteredPoints"));
- clusterDumper.printClusters(null);
-
}
/**
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java?rev=943637&r1=943636&r2=943637&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java Wed May 12 19:08:08 2010
@@ -147,9 +147,5 @@ public final class Job {
log.info("Running KMeans");
KMeansDriver.runJob(directoryContainingConvertedInput, new Path(output, Cluster.INITIAL_CLUSTERS_DIR), output, measureClass,
convergenceDelta, maxIterations, 1, true);
-
- ClusterDumper clusterDumper =
- new ClusterDumper(new Path(output, "clusters-10"), new Path(output, "clusteredPoints"));
- clusterDumper.printClusters(null);
}
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java?rev=943637&r1=943636&r2=943637&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java Wed May 12 19:08:08 2010
@@ -132,11 +132,6 @@ public final class Job {
InputDriver.runJob(input, directoryContainingConvertedInput);
MeanShiftCanopyDriver.runJob(directoryContainingConvertedInput, output, measureClassName, t1, t2,
convergenceDelta, maxIterations, true, true);
-
- ClusterDumper clusterDumper =
- new ClusterDumper(new Path(output, "clusters-10"), new Path(output, "clusteredPoints"));
- clusterDumper.printClusters(null);
-
}
}