You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by je...@apache.org on 2010/07/15 19:53:52 UTC
svn commit: r964507 [3/3] - in /mahout/trunk:
core/src/main/java/org/apache/mahout/clustering/canopy/
core/src/main/java/org/apache/mahout/clustering/dirichlet/
core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/
core/src/main/java/org/apache/...
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java?rev=964507&r1=964506&r2=964507&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java Thu Jul 15 17:53:51 2010
@@ -18,88 +18,88 @@
package org.apache.mahout.clustering.syntheticcontrol.kmeans;
import java.io.IOException;
+import java.util.Map;
-import org.apache.commons.cli2.CommandLine;
-import org.apache.commons.cli2.Group;
-import org.apache.commons.cli2.Option;
-import org.apache.commons.cli2.OptionException;
-import org.apache.commons.cli2.builder.ArgumentBuilder;
-import org.apache.commons.cli2.builder.DefaultOptionBuilder;
-import org.apache.commons.cli2.builder.GroupBuilder;
-import org.apache.commons.cli2.commandline.Parser;
import org.apache.hadoop.fs.Path;
import org.apache.mahout.clustering.Cluster;
import org.apache.mahout.clustering.canopy.CanopyDriver;
import org.apache.mahout.clustering.kmeans.KMeansDriver;
+import org.apache.mahout.clustering.kmeans.RandomSeedGenerator;
import org.apache.mahout.clustering.syntheticcontrol.Constants;
import org.apache.mahout.clustering.syntheticcontrol.canopy.InputDriver;
-import org.apache.mahout.common.CommandLineUtil;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
+import org.apache.mahout.common.distance.SquaredEuclideanDistanceMeasure;
+import org.apache.mahout.utils.clustering.ClusterDumper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-public final class Job {
+public final class Job extends KMeansDriver {
private static final Logger log = LoggerFactory.getLogger(Job.class);
private Job() {
+ super();
}
public static void main(String[] args) throws Exception {
- DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
- ArgumentBuilder abuilder = new ArgumentBuilder();
- GroupBuilder gbuilder = new GroupBuilder();
-
- Option inputOpt = DefaultOptionCreator.inputOption().withRequired(false).create();
- Option outputOpt = DefaultOptionCreator.outputOption().withRequired(false).create();
- Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption().withRequired(false).create();
- Option maxIterationsOpt = DefaultOptionCreator.maxIterationsOption().withRequired(false).create();
-
- Option measureClassOpt = obuilder.withLongName("distance").withRequired(false).withArgument(
- abuilder.withName("distance").withMinimum(1).withMaximum(1).create()).withDescription(
- "The Distance Measure to use. Default is SquaredEuclidean").withShortName("m").create();
-
- Option t1Opt = obuilder.withLongName("t1").withRequired(false).withArgument(
- abuilder.withName("t1").withMinimum(1).withMaximum(1).create()).withDescription("The t1 value to use.").withShortName("m")
- .create();
- Option t2Opt = obuilder.withLongName("t2").withRequired(false).withArgument(
- abuilder.withName("t2").withMinimum(1).withMaximum(1).create()).withDescription("The t2 value to use.").withShortName("m")
- .create();
- Option vectorClassOpt = obuilder.withLongName("vectorClass").withRequired(false).withArgument(
- abuilder.withName("vectorClass").withMinimum(1).withMaximum(1).create()).withDescription(
- "The Vector implementation class name. Default is RandomAccessSparseVector.class").withShortName("v").create();
-
- Option helpOpt = DefaultOptionCreator.helpOption();
-
- Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(measureClassOpt).withOption(
- convergenceDeltaOpt).withOption(maxIterationsOpt).withOption(vectorClassOpt).withOption(t1Opt).withOption(t2Opt)
- .withOption(helpOpt).create();
- try {
- Parser parser = new Parser();
- parser.setGroup(group);
- CommandLine cmdLine = parser.parse(args);
-
- if (cmdLine.hasOption(helpOpt)) {
- CommandLineUtil.printHelp(group);
- return;
- }
- Path input = new Path(cmdLine.getValue(inputOpt, "testdata").toString());
- Path output = new Path(cmdLine.getValue(outputOpt, "output").toString());
- String measureClass = cmdLine.getValue(measureClassOpt, "org.apache.mahout.common.distance.EuclideanDistanceMeasure")
- .toString();
- double t1 = Double.parseDouble(cmdLine.getValue(t1Opt, "80").toString());
- double t2 = Double.parseDouble(cmdLine.getValue(t2Opt, "55").toString());
- double convergenceDelta = Double.parseDouble(cmdLine.getValue(convergenceDeltaOpt, "0.5").toString());
- int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterationsOpt, 10).toString());
-
- runJob(input, output, measureClass, t1, t2, convergenceDelta, maxIterations);
- } catch (OptionException e) {
- log.error("Exception", e);
- CommandLineUtil.printHelp(group);
+ if (args.length > 0) {
+ log.info("Running with only user-supplied arguments");
+ new Job().run(args);
+ } else {
+ log.info("Running with default arguments");
+ Path output = new Path("output");
+ HadoopUtil.overwriteOutput(output);
+ new Job().job(new Path("testdata"), output, "org.apache.mahout.common.distance.EuclideanDistanceMeasure", 80, 55, 0.5, 10);
}
}
+ @Override
+ public int run(String[] args) throws Exception {
+
+ addInputOption();
+ addOutputOption();
+ addOption(DefaultOptionCreator.distanceMeasureOption().create());
+ addOption(DefaultOptionCreator.clustersInOption()
+ .withDescription("The input centroids, as Vectors. Must be a SequenceFile of Writable, Cluster/Canopy. "
+ + "If k is also specified, then a random set of vectors will be selected" + " and written out to this path first")
+ .create());
+ addOption(DefaultOptionCreator.numClustersOption()
+ .withDescription("The k in k-Means. If specified, then a random selection of k Vectors will be chosen"
+ + " as the Centroid and written to the clusters input path.").create());
+ addOption(DefaultOptionCreator.convergenceOption().create());
+ addOption(DefaultOptionCreator.maxIterationsOption().create());
+ addOption(DefaultOptionCreator.overwriteOption().create());
+ addOption(DefaultOptionCreator.numReducersOption().create());
+ addOption(DefaultOptionCreator.clusteringOption().create());
+
+ Map<String, String> argMap = parseArguments(args);
+ if (argMap == null) {
+ return -1;
+ }
+
+ Path input = getInputPath();
+ Path clusters = new Path(argMap.get(DefaultOptionCreator.CLUSTERS_IN_OPTION_KEY));
+ Path output = getOutputPath();
+ String measureClass = argMap.get(DefaultOptionCreator.DISTANCE_MEASURE_OPTION_KEY);
+ if (measureClass == null) {
+ measureClass = SquaredEuclideanDistanceMeasure.class.getName();
+ }
+ double convergenceDelta = Double.parseDouble(argMap.get(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION_KEY));
+ int numReduceTasks = Integer.parseInt(argMap.get(DefaultOptionCreator.MAX_REDUCERS_OPTION_KEY));
+ int maxIterations = Integer.parseInt(argMap.get(DefaultOptionCreator.MAX_ITERATIONS_OPTION_KEY));
+ if (argMap.containsKey(DefaultOptionCreator.OVERWRITE_OPTION_KEY)) {
+ HadoopUtil.overwriteOutput(output);
+ }
+ if (argMap.containsKey(DefaultOptionCreator.NUM_CLUSTERS_OPTION_KEY)) {
+ clusters = RandomSeedGenerator.buildRandom(input, clusters, Integer.parseInt(argMap
+ .get(DefaultOptionCreator.NUM_CLUSTERS_OPTION_KEY)));
+ }
+ boolean runClustering = argMap.containsKey(DefaultOptionCreator.CLUSTERING_OPTION_KEY);
+ runJob(input, clusters, output, measureClass, convergenceDelta, maxIterations, numReduceTasks, runClustering);
+ return 0;
+ }
+
/**
* Run the kmeans clustering job on an input dataset using the given distance measure, t1, t2 and iteration
* parameters. All output data will be written to the output directory, which will be initially deleted if
@@ -127,9 +127,8 @@ public final class Job {
* @throws ClassNotFoundException
* @throws InterruptedException
*/
- private static void runJob(Path input, Path output, String measureClass, double t1, double t2, double convergenceDelta,
- int maxIterations) throws IOException, InstantiationException, IllegalAccessException, InterruptedException,
- ClassNotFoundException {
+ private void job(Path input, Path output, String measureClass, double t1, double t2, double convergenceDelta, int maxIterations)
+ throws IOException, InstantiationException, IllegalAccessException, InterruptedException, ClassNotFoundException {
HadoopUtil.overwriteOutput(output);
Path directoryContainingConvertedInput = new Path(output, Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT);
@@ -138,7 +137,17 @@ public final class Job {
log.info("Running Canopy to get initial clusters");
CanopyDriver.runJob(directoryContainingConvertedInput, output, measureClass, t1, t2, false);
log.info("Running KMeans");
- KMeansDriver.runJob(directoryContainingConvertedInput, new Path(output, Cluster.INITIAL_CLUSTERS_DIR), output, measureClass,
- convergenceDelta, maxIterations, 1, true);
+ KMeansDriver.runJob(directoryContainingConvertedInput,
+ new Path(output, Cluster.INITIAL_CLUSTERS_DIR),
+ output,
+ measureClass,
+ convergenceDelta,
+ maxIterations,
+ 1,
+ true);
+ // run ClusterDumper
+ ClusterDumper clusterDumper = new ClusterDumper(new Path(output, "clusters-" + maxIterations), new Path(output,
+ "clusteredPoints"));
+ clusterDumper.printClusters(null);
}
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java?rev=964507&r1=964506&r2=964507&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java Thu Jul 15 17:53:51 2010
@@ -18,85 +18,79 @@
package org.apache.mahout.clustering.syntheticcontrol.meanshift;
import java.io.IOException;
+import java.util.Map;
-import org.apache.commons.cli2.CommandLine;
-import org.apache.commons.cli2.Group;
-import org.apache.commons.cli2.Option;
-import org.apache.commons.cli2.OptionException;
import org.apache.commons.cli2.builder.ArgumentBuilder;
import org.apache.commons.cli2.builder.DefaultOptionBuilder;
-import org.apache.commons.cli2.builder.GroupBuilder;
-import org.apache.commons.cli2.commandline.Parser;
-import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapred.JobClient;
import org.apache.mahout.clustering.meanshift.MeanShiftCanopyDriver;
import org.apache.mahout.clustering.syntheticcontrol.Constants;
-import org.apache.mahout.common.CommandLineUtil;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
+import org.apache.mahout.utils.clustering.ClusterDumper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-public final class Job {
+public final class Job extends MeanShiftCanopyDriver {
private static final Logger log = LoggerFactory.getLogger(Job.class);
private Job() {
+ super();
}
public static void main(String[] args) throws Exception {
- DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
- ArgumentBuilder abuilder = new ArgumentBuilder();
- GroupBuilder gbuilder = new GroupBuilder();
-
- Option inputOpt = DefaultOptionCreator.inputOption().withRequired(false).create();
- Option outputOpt = DefaultOptionCreator.outputOption().withRequired(false).create();
- Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption().withRequired(false).create();
- Option maxIterOpt = DefaultOptionCreator.maxIterationsOption().withRequired(false).create();
- Option helpOpt = DefaultOptionCreator.helpOption();
-
- Option modelOpt = obuilder.withLongName("distanceClass").withRequired(false).withShortName("d").withArgument(
- abuilder.withName("distanceClass").withMinimum(1).withMaximum(1).create()).withDescription(
- "The distance measure class name.").create();
-
- Option threshold1Opt = obuilder.withLongName("threshold_1").withRequired(false).withShortName("t1").withArgument(
- abuilder.withName("threshold_1").withMinimum(1).withMaximum(1).create())
- .withDescription("The T1 distance threshold.").create();
-
- Option threshold2Opt = obuilder.withLongName("threshold_2").withRequired(false).withShortName("t2").withArgument(
- abuilder.withName("threshold_2").withMinimum(1).withMaximum(1).create())
- .withDescription("The T1 distance threshold.").create();
-
- Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt)
- .withOption(modelOpt).withOption(helpOpt)
- .withOption(convergenceDeltaOpt).withOption(threshold1Opt).withOption(maxIterOpt)
- .withOption(threshold2Opt).create();
-
- try {
- Parser parser = new Parser();
- parser.setGroup(group);
- CommandLine cmdLine = parser.parse(args);
- if (cmdLine.hasOption(helpOpt)) {
- CommandLineUtil.printHelp(group);
- return;
- }
-
- Path input = new Path(cmdLine.getValue(inputOpt, "testdata").toString());
- Path output = new Path(cmdLine.getValue(outputOpt, "output").toString());
- String measureClassName =
- cmdLine.getValue(modelOpt, "org.apache.mahout.common.distance.EuclideanDistanceMeasure").toString();
- double t1 = Double.parseDouble(cmdLine.getValue(threshold1Opt, "47.6").toString());
- double t2 = Double.parseDouble(cmdLine.getValue(threshold2Opt, "1").toString());
- double convergenceDelta = Double.parseDouble(cmdLine.getValue(convergenceDeltaOpt, "0.5").toString());
- int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterOpt, "10").toString());
- runJob(input, output, measureClassName, t1, t2, convergenceDelta, maxIterations);
- } catch (OptionException e) {
- log.error("Exception parsing command line: ", e);
- CommandLineUtil.printHelp(group);
+ if (args.length > 0) {
+ log.info("Running with only user-supplied arguments");
+ new Job().run(args);
+ } else {
+ log.info("Running with default arguments");
+ Path output = new Path("output");
+ HadoopUtil.overwriteOutput(output);
+ new Job().job(new Path("testdata"), output, "org.apache.mahout.common.distance.EuclideanDistanceMeasure", 47.6, 1, 0.5, 10);
}
}
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.util.Tool#run(java.lang.String[])
+ */
+ @Override
+ public int run(String[] args) throws Exception {
+ addInputOption();
+ addOutputOption();
+ addOption(DefaultOptionCreator.convergenceOption().create());
+ addOption(DefaultOptionCreator.maxIterationsOption().create());
+ addOption(DefaultOptionCreator.overwriteOption().create());
+ addOption(new DefaultOptionBuilder().withLongName(INPUT_IS_CANOPIES_OPTION).withRequired(false).withShortName("ic")
+ .withArgument(new ArgumentBuilder().withName(INPUT_IS_CANOPIES_OPTION).withMinimum(1).withMaximum(1).create())
+ .withDescription("If present, the input directory already contains MeanShiftCanopies").create());
+ addOption(DefaultOptionCreator.distanceMeasureOption().create());
+ addOption(DefaultOptionCreator.t1Option().create());
+ addOption(DefaultOptionCreator.t2Option().create());
+ addOption(DefaultOptionCreator.clusteringOption().create());
+
+ Map<String, String> argMap = parseArguments(args);
+ if (argMap == null) {
+ return -1;
+ }
+
+ Path input = getInputPath();
+ Path output = getOutputPath();
+ if (argMap.containsKey(DefaultOptionCreator.OVERWRITE_OPTION_KEY)) {
+ HadoopUtil.overwriteOutput(output);
+ }
+ String measureClass = argMap.get(DefaultOptionCreator.DISTANCE_MEASURE_OPTION_KEY);
+ double t1 = Double.parseDouble(argMap.get(DefaultOptionCreator.T1_OPTION_KEY));
+ double t2 = Double.parseDouble(argMap.get(DefaultOptionCreator.T2_OPTION_KEY));
+ boolean runClustering = argMap.containsKey(DefaultOptionCreator.CLUSTERING_OPTION_KEY);
+ double convergenceDelta = Double.parseDouble(argMap.get(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION_KEY));
+ int maxIterations = Integer.parseInt(argMap.get(DefaultOptionCreator.MAX_ITERATIONS_OPTION_KEY));
+ boolean inputIsCanopies = argMap.containsKey(INPUT_IS_CANOPIES_OPTION_KEY);
+
+ runJob(input, output, measureClass, t1, t2, convergenceDelta, maxIterations, inputIsCanopies, runClustering);
+ return 0;
+ }
+
/**
* Run the meanshift clustering job on an input dataset using the given distance measure, t1, t2 and
* iteration parameters. All output data will be written to the output directory, which will be initially
@@ -123,24 +117,31 @@ public final class Job {
* @throws InterruptedException
* @throws IllegalAccessException
* @throws InstantiationException
+ * @throws IllegalAccessException
+ * @throws InstantiationException
*/
- private static void runJob(Path input,
- Path output,
- String measureClassName,
- double t1,
- double t2,
- double convergenceDelta,
- int maxIterations) throws IOException, InterruptedException, ClassNotFoundException {
- JobClient client = new JobClient();
- Configuration conf = new Configuration();
-
- client.setConf(conf);
- HadoopUtil.overwriteOutput(output);
-
+ private void job(Path input,
+ Path output,
+ String measureClassName,
+ double t1,
+ double t2,
+ double convergenceDelta,
+ int maxIterations) throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
Path directoryContainingConvertedInput = new Path(output, Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT);
InputDriver.runJob(input, directoryContainingConvertedInput);
- MeanShiftCanopyDriver.runJob(directoryContainingConvertedInput, output, measureClassName, t1, t2,
- convergenceDelta, maxIterations, true, true);
+ MeanShiftCanopyDriver.runJob(directoryContainingConvertedInput,
+ output,
+ measureClassName,
+ t1,
+ t2,
+ convergenceDelta,
+ maxIterations,
+ true,
+ true);
+ // run ClusterDumper
+ ClusterDumper clusterDumper = new ClusterDumper(new Path(output, "clusters-" + maxIterations), new Path(output,
+ "clusteredPoints"));
+ clusterDumper.printClusters(null);
}
}
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java?rev=964507&r1=964506&r2=964507&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java Thu Jul 15 17:53:51 2010
@@ -314,7 +314,7 @@ public final class ClusterDumper {
FileStatus[] children = fs.listStatus(pointsPathDir, new PathFilter() {
@Override
public boolean accept(Path path) {
- return !path.getName().endsWith(".crc");
+ return !(path.getName().endsWith(".crc") || path.getName().equals("_logs"));
}
});
Modified: mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java?rev=964507&r1=964506&r2=964507&view=diff
==============================================================================
--- mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java (original)
+++ mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java Thu Jul 15 17:53:51 2010
@@ -17,7 +17,6 @@
package org.apache.mahout.clustering;
-import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
@@ -42,7 +41,6 @@ import org.apache.mahout.clustering.fuzz
import org.apache.mahout.clustering.kmeans.KMeansDriver;
import org.apache.mahout.clustering.meanshift.MeanShiftCanopyDriver;
import org.apache.mahout.common.MahoutTestCase;
-import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.common.distance.CosineDistanceMeasure;
import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
import org.apache.mahout.math.NamedVector;
@@ -83,17 +81,6 @@ public class TestClusterDumper extends M
ClusteringTestUtils.writePointsToFile(sampleData, getTestTempFilePath("testdata/file1"), fs, conf);
}
- private static void rmDir(File f) {
- if (f != null && f.exists()) {
- if (f.isDirectory()) {
- for (File g : f.listFiles()) {
- rmDir(g);
- }
- }
- f.delete();
- }
- }
-
private void getSampleData(String[] docs2) throws IOException {
sampleData = new ArrayList<VectorWritable>();
RAMDirectory directory = new RAMDirectory();
@@ -178,7 +165,7 @@ public class TestClusterDumper extends M
// now run the KMeans job
FuzzyKMeansDriver.runJob(getTestTempDirPath("testdata"), new Path(output, "clusters-0"), output,
EuclideanDistanceMeasure.class.getName(), 0.001, 10,
- 1, 1, (float) 1.1, true, true, 0);
+ 1, (float) 1.1, true, true, 0);
// run ClusterDumper
ClusterDumper clusterDumper = new ClusterDumper(new Path(output, "clusters-3"),
new Path(output, "clusteredPoints"));
Modified: mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java?rev=964507&r1=964506&r2=964507&view=diff
==============================================================================
--- mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java (original)
+++ mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java Thu Jul 15 17:53:51 2010
@@ -168,7 +168,7 @@ public class TestCDbwEvaluator extends M
// now run the KMeans job
Path output = getTestTempDirPath("output");
FuzzyKMeansDriver.runJob(getTestTempDirPath("testdata"), new Path(output, "clusters-0"), output,
- EuclideanDistanceMeasure.class.getName(), 0.001, 10, 1, 1, 2, false, true, 0);
+ EuclideanDistanceMeasure.class.getName(), 0.001, 10, 1, 2, true, true, 0);
int numIterations = 2;
CDbwDriver.runJob(new Path(output, "clusters-4"), new Path(output, "clusteredPoints"), output,
EuclideanDistanceMeasure.class.getName(), numIterations, 1);