You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by je...@apache.org on 2010/05/12 21:08:09 UTC

svn commit: r943637 - in /mahout/trunk: core/src/main/java/org/apache/mahout/clustering/canopy/ core/src/main/java/org/apache/mahout/clustering/dirichlet/ core/src/main/java/org/apache/mahout/clustering/meanshift/ examples/src/main/java/org/apache/maho...

Author: jeastman
Date: Wed May 12 19:08:08 2010
New Revision: 943637

URL: http://svn.apache.org/viewvc?rev=943637&view=rev
Log:
- added -w options to DirichletDriver, CanopyDriver and MeanShiftCanopyDriver
- added optional output deletion to driver main[] iff -w is set
- removed non-Hadoopable ClusterDumper calls from syntheticcontrol examples. All run again in Hadoop

Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java?rev=943637&r1=943636&r2=943637&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java Wed May 12 19:08:08 2010
@@ -48,78 +48,81 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 public final class CanopyDriver {
-  
+
   private static final Logger log = LoggerFactory.getLogger(CanopyDriver.class);
+
   public static final String DEFAULT_CLUSTERED_POINTS_DIRECTORY = "clusteredPoints";
-  
-  private CanopyDriver() { }
-  
+
+  private CanopyDriver() {
+  }
+
   public static void main(String[] args) throws IOException {
     DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
     ArgumentBuilder abuilder = new ArgumentBuilder();
     GroupBuilder gbuilder = new GroupBuilder();
-    
+
     Option inputOpt = obuilder.withLongName("input").withRequired(true).withArgument(
-      abuilder.withName("input").withMinimum(1).withMaximum(1).create()).withDescription(
-      "The Path for input Vectors. Must be a SequenceFile of Writable, Vector").withShortName("i").create();
-    
+        abuilder.withName("input").withMinimum(1).withMaximum(1).create()).withDescription(
+        "The Path for input Vectors. Must be a SequenceFile of Writable, Vector").withShortName("i").create();
+
     Option outputOpt = obuilder.withLongName("output").withRequired(true).withArgument(
-      abuilder.withName("output").withMinimum(1).withMaximum(1).create()).withDescription(
-      "The Path to put the output in").withShortName("o").create();
-    
+        abuilder.withName("output").withMinimum(1).withMaximum(1).create()).withDescription("The Path to put the output in")
+        .withShortName("o").create();
+
+    Option overwriteOutput = obuilder.withLongName("overwrite").withRequired(false).withDescription(
+        "If set, overwrite the output directory").withShortName("w").create();
+
     Option measureClassOpt = obuilder.withLongName("distance").withRequired(false).withArgument(
-      abuilder.withName("distance").withMinimum(1).withMaximum(1).create()).withDescription(
-      "The Distance Measure to use.  Default is SquaredEuclidean").withShortName("m").create();
-    
+        abuilder.withName("distance").withMinimum(1).withMaximum(1).create()).withDescription(
+        "The Distance Measure to use.  Default is SquaredEuclidean").withShortName("m").create();
+
     Option vectorClassOpt = obuilder.withLongName("vectorClass").withRequired(false).withArgument(
-      abuilder.withName("vectorClass").withMinimum(1).withMaximum(1).create()).withDescription(
-      "The Vector implementation class name.  Default is RandomAccessSparseVector.class").withShortName("v")
-        .create();
+        abuilder.withName("vectorClass").withMinimum(1).withMaximum(1).create()).withDescription(
+        "The Vector implementation class name.  Default is RandomAccessSparseVector.class").withShortName("v").create();
     Option t1Opt = obuilder.withLongName("t1").withRequired(true).withArgument(
-      abuilder.withName("t1").withMinimum(1).withMaximum(1).create()).withDescription("t1").withShortName(
-      "t1").create();
+        abuilder.withName("t1").withMinimum(1).withMaximum(1).create()).withDescription("t1").withShortName("t1").create();
     Option t2Opt = obuilder.withLongName("t2").withRequired(true).withArgument(
-      abuilder.withName("t2").withMinimum(1).withMaximum(1).create()).withDescription("t2").withShortName(
-      "t2").create();
-    
-    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
-        .create();
-    
-    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(
-      measureClassOpt).withOption(vectorClassOpt).withOption(t1Opt).withOption(t2Opt).withOption(helpOpt)
-        .create();
-    
+        abuilder.withName("t2").withMinimum(1).withMaximum(1).create()).withDescription("t2").withShortName("t2").create();
+
+    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h").create();
+
+    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(overwriteOutput).withOption(
+        measureClassOpt).withOption(vectorClassOpt).withOption(t1Opt).withOption(t2Opt).withOption(helpOpt).create();
+
     try {
       Parser parser = new Parser();
       parser.setGroup(group);
       CommandLine cmdLine = parser.parse(args);
-      
+
       if (cmdLine.hasOption(helpOpt)) {
         CommandLineUtil.printHelp(group);
         return;
       }
-      
+
       Path input = new Path(cmdLine.getValue(inputOpt).toString());
       Path output = new Path(cmdLine.getValue(outputOpt).toString());
+      if (cmdLine.hasOption(overwriteOutput)) {
+        HadoopUtil.overwriteOutput(output);
+      }
       String measureClass = SquaredEuclideanDistanceMeasure.class.getName();
       if (cmdLine.hasOption(measureClassOpt)) {
         measureClass = cmdLine.getValue(measureClassOpt).toString();
       }
-      
+
       // Class<? extends Vector> vectorClass = cmdLine.hasOption(vectorClassOpt) == false ?
       // RandomAccessSparseVector.class
       // : (Class<? extends Vector>) Class.forName(cmdLine.getValue(vectorClassOpt).toString());
       double t1 = Double.parseDouble(cmdLine.getValue(t1Opt).toString());
       double t2 = Double.parseDouble(cmdLine.getValue(t2Opt).toString());
-      
+
       runJob(input, output, measureClass, t1, t2, false);
     } catch (OptionException e) {
       log.error("Exception", e);
       CommandLineUtil.printHelp(group);
-      
+
     }
   }
-  
+
   /**
    * Run the job
    * 
@@ -136,39 +139,37 @@ public final class CanopyDriver {
    * @param runClustering 
    *          true if points are to be clustered after clusters are determined
    */
-  public static void runJob(Path input, Path output,
-                            String measureClassName, double t1, double t2, boolean runClustering) throws IOException {
-    log.info("Input: {} Out: {} " 
-      + "Measure: {} t1: {} t2: {}", new Object[] {input, output, measureClassName, t1, t2});
+  public static void runJob(Path input, Path output, String measureClassName, double t1, double t2, boolean runClustering)
+      throws IOException {
+    log.info("Input: {} Out: {} " + "Measure: {} t1: {} t2: {}", new Object[] { input, output, measureClassName, t1, t2 });
     Configurable client = new JobClient();
     JobConf conf = new JobConf(CanopyDriver.class);
     conf.set(CanopyConfigKeys.DISTANCE_MEASURE_KEY, measureClassName);
     conf.set(CanopyConfigKeys.T1_KEY, String.valueOf(t1));
     conf.set(CanopyConfigKeys.T2_KEY, String.valueOf(t2));
-    
+
     conf.setInputFormat(SequenceFileInputFormat.class);
-    
+
     conf.setMapOutputKeyClass(Text.class);
     conf.setMapOutputValueClass(VectorWritable.class);
     conf.setOutputKeyClass(Text.class);
     conf.setOutputValueClass(Canopy.class);
-    
+
     FileInputFormat.setInputPaths(conf, input);
-    
+
     Path canopyOutputDir = new Path(output, Cluster.CLUSTERS_DIR + '0');
     FileOutputFormat.setOutputPath(conf, canopyOutputDir);
-    
+
     conf.setMapperClass(CanopyMapper.class);
     conf.setReducerClass(CanopyReducer.class);
     conf.setNumReduceTasks(1);
     conf.setOutputFormat(SequenceFileOutputFormat.class);
-    
+
     client.setConf(conf);
-    HadoopUtil.overwriteOutput(output);
 
     JobClient.runJob(conf);
-    
-    if (runClustering){
+
+    if (runClustering) {
       runClustering(input, canopyOutputDir, output, measureClassName, t1, t2);
     }
   }
@@ -189,36 +190,32 @@ public final class CanopyDriver {
    * @param t2
    *          the T2 distance threshold
    */
-  public static void runClustering(Path points,
-                                   Path canopies,
-                                   Path output,
-                                   String measureClassName,
-                                   double t1,
-                                   double t2) throws IOException {
+  public static void runClustering(Path points, Path canopies, Path output, String measureClassName, double t1, double t2)
+      throws IOException {
     Configurable client = new JobClient();
     JobConf conf = new JobConf(CanopyDriver.class);
-    
+
     conf.set(CanopyConfigKeys.DISTANCE_MEASURE_KEY, measureClassName);
     conf.set(CanopyConfigKeys.T1_KEY, String.valueOf(t1));
     conf.set(CanopyConfigKeys.T2_KEY, String.valueOf(t2));
     conf.set(CanopyConfigKeys.CANOPY_PATH_KEY, canopies.toString());
-    
+
     conf.setInputFormat(SequenceFileInputFormat.class);
     conf.setOutputKeyClass(IntWritable.class);
     conf.setOutputValueClass(WeightedVectorWritable.class);
     conf.setOutputFormat(SequenceFileOutputFormat.class);
-    
+
     FileInputFormat.setInputPaths(conf, points);
     Path outPath = new Path(output, DEFAULT_CLUSTERED_POINTS_DIRECTORY);
     FileOutputFormat.setOutputPath(conf, outPath);
-    
+
     conf.setMapperClass(ClusterMapper.class);
     conf.setReducerClass(IdentityReducer.class);
     conf.setNumReduceTasks(0);
-    
+
     client.setConf(conf);
-    HadoopUtil.overwriteOutput(outPath);    
+    HadoopUtil.overwriteOutput(outPath);
     JobClient.runJob(conf);
   }
-  
+
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java?rev=943637&r1=943636&r2=943637&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java Wed May 12 19:08:08 2010
@@ -45,6 +45,7 @@ import org.apache.mahout.clustering.Clus
 import org.apache.mahout.clustering.WeightedVectorWritable;
 import org.apache.mahout.clustering.dirichlet.models.VectorModelDistribution;
 import org.apache.mahout.common.CommandLineUtil;
+import org.apache.mahout.common.HadoopUtil;
 import org.apache.mahout.common.commandline.DefaultOptionCreator;
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
@@ -85,6 +86,9 @@ public class DirichletDriver {
     Option topicsOpt = DefaultOptionCreator.kOption().create();
     Option helpOpt = DefaultOptionCreator.helpOption();
 
+    Option overwriteOutput = obuilder.withLongName("overwrite").withRequired(false).withDescription(
+        "If set, overwrite the output directory").withShortName("w").create();
+
     Option mOpt = obuilder.withLongName("alpha").withRequired(true).withShortName("m").withArgument(
         abuilder.withName("alpha").withMinimum(1).withMaximum(1).create()).withDescription(
         "The alpha0 value for the DirichletDistribution.").create();
@@ -116,9 +120,10 @@ public class DirichletDriver {
     Option thresholdOpt = obuilder.withLongName("threshold").withRequired(false).withShortName("t").withArgument(
         abuilder.withName("threshold").withMinimum(1).withMaximum(1).create()).withDescription("The pdf threshold").create();
 
-    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(modelOpt).withOption(
-        prototypeOpt).withOption(sizeOpt).withOption(maxIterOpt).withOption(mOpt).withOption(topicsOpt).withOption(helpOpt)
-        .withOption(numRedOpt).withOption(clusteringOpt).withOption(emitMostLikelyOpt).withOption(thresholdOpt).create();
+    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(overwriteOutput).withOption(
+        modelOpt).withOption(prototypeOpt).withOption(sizeOpt).withOption(maxIterOpt).withOption(mOpt).withOption(topicsOpt)
+        .withOption(helpOpt).withOption(numRedOpt).withOption(clusteringOpt).withOption(emitMostLikelyOpt).withOption(thresholdOpt)
+        .create();
 
     try {
       Parser parser = new Parser();
@@ -131,6 +136,9 @@ public class DirichletDriver {
 
       Path input = new Path(cmdLine.getValue(inputOpt).toString());
       Path output = new Path(cmdLine.getValue(outputOpt).toString());
+      if (cmdLine.hasOption(overwriteOutput)) {
+        HadoopUtil.overwriteOutput(output);
+      }
       String modelFactory = "org.apache.mahout.clustering.dirichlet.models.NormalModelDistribution";
       if (cmdLine.hasOption(modelOpt)) {
         modelFactory = cmdLine.getValue(modelOpt).toString();
@@ -241,9 +249,9 @@ public class DirichletDriver {
     }
   }
 
-  private static void writeInitialState(Path output, Path stateIn, String modelFactory, String modelPrototype,
-      int prototypeSize, int numModels, double alpha_0) throws ClassNotFoundException, InstantiationException,
-      IllegalAccessException, IOException, SecurityException, NoSuchMethodException, InvocationTargetException {
+  private static void writeInitialState(Path output, Path stateIn, String modelFactory, String modelPrototype, int prototypeSize,
+      int numModels, double alpha_0) throws ClassNotFoundException, InstantiationException, IllegalAccessException, IOException,
+      SecurityException, NoSuchMethodException, InvocationTargetException {
 
     DirichletState<VectorWritable> state = createState(modelFactory, modelPrototype, prototypeSize, numModels, alpha_0);
     JobConf job = new JobConf(DirichletDriver.class);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java?rev=943637&r1=943636&r2=943637&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java Wed May 12 19:08:08 2010
@@ -43,6 +43,7 @@ import org.apache.mahout.clustering.Clus
 import org.apache.mahout.clustering.WeightedVectorWritable;
 import org.apache.mahout.clustering.fuzzykmeans.FuzzyKMeansDriver;
 import org.apache.mahout.common.CommandLineUtil;
+import org.apache.mahout.common.HadoopUtil;
 import org.apache.mahout.common.commandline.DefaultOptionCreator;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -68,6 +69,9 @@ public final class MeanShiftCanopyDriver
     Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption().create();
     Option helpOpt = DefaultOptionCreator.helpOption();
     Option maxIterOpt = DefaultOptionCreator.maxIterOption().create();
+    Option overwriteOutput = obuilder.withLongName("overwrite").withRequired(false).withDescription(
+        "If set, overwrite the output directory").withShortName("w").create();
+
     Option inputIsCanopiesOpt = obuilder.withLongName("inputIsCanopies").withRequired(true).withShortName("i").withArgument(
         abuilder.withName("inputIsCanopies").withMinimum(1).withMaximum(1).create()).withDescription(
         "True if the input directory already contains MeanShiftCanopies").create();
@@ -87,9 +91,9 @@ public final class MeanShiftCanopyDriver
     Option clusteringOpt = obuilder.withLongName("clustering").withRequired(false).withDescription(
         "If true, run clustering after the iterations have taken place").withShortName("cl").create();
 
-    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(modelOpt).withOption(helpOpt)
-        .withOption(convergenceDeltaOpt).withOption(threshold1Opt).withOption(threshold2Opt).withOption(clusteringOpt).withOption(
-            maxIterOpt).withOption(inputIsCanopiesOpt).create();
+    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(overwriteOutput).withOption(
+        modelOpt).withOption(helpOpt).withOption(convergenceDeltaOpt).withOption(threshold1Opt).withOption(threshold2Opt)
+        .withOption(clusteringOpt).withOption(maxIterOpt).withOption(inputIsCanopiesOpt).create();
 
     try {
       Parser parser = new Parser();
@@ -107,6 +111,9 @@ public final class MeanShiftCanopyDriver
       Path input = new Path(cmdLine.getValue(inputOpt).toString());
       Path output = new Path(cmdLine.getValue(outputOpt).toString());
       String measureClassName = cmdLine.getValue(modelOpt).toString();
+      if (cmdLine.hasOption(overwriteOutput)) {
+        HadoopUtil.overwriteOutput(output);
+      }
       double t1 = Double.parseDouble(cmdLine.getValue(threshold1Opt).toString());
       double t2 = Double.parseDouble(cmdLine.getValue(threshold2Opt).toString());
       double convergenceDelta = Double.parseDouble(cmdLine.getValue(convergenceDeltaOpt).toString());
@@ -281,8 +288,8 @@ public final class MeanShiftCanopyDriver
 
     if (runClustering) {
       // now cluster the points
-      MeanShiftCanopyDriver.runClustering((inputIsCanopies ? input : new Path(output, Cluster.INITIAL_CLUSTERS_DIR)),
-                                          clustersIn, new Path(output, Cluster.CLUSTERED_POINTS_DIR));
+      MeanShiftCanopyDriver.runClustering((inputIsCanopies ? input : new Path(output, Cluster.INITIAL_CLUSTERS_DIR)), clustersIn,
+          new Path(output, Cluster.CLUSTERED_POINTS_DIR));
     }
   }
 }

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java?rev=943637&r1=943636&r2=943637&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java Wed May 12 19:08:08 2010
@@ -41,71 +41,68 @@ import org.slf4j.LoggerFactory;
 public final class Job {
 
   private static final Logger log = LoggerFactory.getLogger(Job.class);
-  
-  private Job() { }
-  
+
+  private Job() {
+  }
+
   public static void main(String[] args) throws Exception {
     DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
     ArgumentBuilder abuilder = new ArgumentBuilder();
     GroupBuilder gbuilder = new GroupBuilder();
-    
+
     Option inputOpt = obuilder.withLongName("input").withRequired(false).withArgument(
-      abuilder.withName("input").withMinimum(1).withMaximum(1).create()).withDescription(
-      "The Path for input Vectors. Must be a SequenceFile of Writable, Vector").withShortName("i").create();
+        abuilder.withName("input").withMinimum(1).withMaximum(1).create()).withDescription(
+        "The Path for input Vectors. Must be a SequenceFile of Writable, Vector").withShortName("i").create();
     Option outputOpt = obuilder.withLongName("output").withRequired(false).withArgument(
-      abuilder.withName("output").withMinimum(1).withMaximum(1).create()).withDescription(
-      "The Path to put the output in").withShortName("o").create();
-    
+        abuilder.withName("output").withMinimum(1).withMaximum(1).create()).withDescription("The Path to put the output in")
+        .withShortName("o").create();
+
     Option measureClassOpt = obuilder.withLongName("distance").withRequired(false).withArgument(
-      abuilder.withName("distance").withMinimum(1).withMaximum(1).create()).withDescription(
-      "The Distance Measure to use.  Default is SquaredEuclidean").withShortName("m").create();
+        abuilder.withName("distance").withMinimum(1).withMaximum(1).create()).withDescription(
+        "The Distance Measure to use.  Default is SquaredEuclidean").withShortName("m").create();
     // Option vectorClassOpt = obuilder.withLongName("vectorClass").withRequired(false).withArgument(
     // abuilder.withName("vectorClass").withMinimum(1).withMaximum(1).create()).
     // withDescription("The Vector implementation class name.  Default is RandomAccessSparseVector.class")
     // .withShortName("v").create();
-    
+
     Option t1Opt = obuilder.withLongName("t1").withRequired(false).withArgument(
-      abuilder.withName("t1").withMinimum(1).withMaximum(1).create()).withDescription("t1").withShortName(
-      "t1").create();
+        abuilder.withName("t1").withMinimum(1).withMaximum(1).create()).withDescription("t1").withShortName("t1").create();
     Option t2Opt = obuilder.withLongName("t2").withRequired(false).withArgument(
-      abuilder.withName("t2").withMinimum(1).withMaximum(1).create()).withDescription("t2").withShortName(
-      "t2").create();
-    
-    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
-        .create();
-    
-    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(
-      measureClassOpt)// .withOption(vectorClassOpt)
+        abuilder.withName("t2").withMinimum(1).withMaximum(1).create()).withDescription("t2").withShortName("t2").create();
+
+    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h").create();
+
+    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(measureClassOpt)// .withOption(vectorClassOpt)
         .withOption(t1Opt).withOption(t2Opt).withOption(helpOpt).create();
-    
+
     try {
       Parser parser = new Parser();
       parser.setGroup(group);
       CommandLine cmdLine = parser.parse(args);
-      
+
       if (cmdLine.hasOption(helpOpt)) {
         CommandLineUtil.printHelp(group);
         return;
       }
-      
+
       Path input = new Path(cmdLine.getValue(inputOpt, "testdata").toString());
       Path output = new Path(cmdLine.getValue(outputOpt, "output").toString());
-      String measureClass = cmdLine.getValue(measureClassOpt,
-        "org.apache.mahout.common.distance.EuclideanDistanceMeasure").toString();
-      
+      String measureClass = cmdLine.getValue(measureClassOpt, "org.apache.mahout.common.distance.EuclideanDistanceMeasure")
+          .toString();
+
       // String className = cmdLine.getValue(vectorClassOpt,
       // "org.apache.mahout.math.RandomAccessSparseVector").toString();
       // Class<? extends Vector> vectorClass = Class.forName(className).asSubclass(Vector.class);
       double t1 = Double.parseDouble(cmdLine.getValue(t1Opt, "80").toString());
       double t2 = Double.parseDouble(cmdLine.getValue(t2Opt, "55").toString());
-      
+
       runJob(input, output, measureClass, t1, t2);
     } catch (OptionException e) {
       Job.log.error("Exception", e);
       CommandLineUtil.printHelp(group);
     }
   }
-  
+
   /**
    * Run the canopy clustering job on an input dataset using the given distance measure, t1 and t2 parameters.
    * All output data will be written to the output directory, which will be initially deleted if it exists.
@@ -127,23 +124,18 @@ public final class Job {
    * @throws IllegalAccessException 
    * @throws InstantiationException 
    */
-  private static void runJob(Path input, Path output, String measureClassName,
-                             double t1, double t2) throws IOException, InstantiationException, IllegalAccessException {
+  private static void runJob(Path input, Path output, String measureClassName, double t1, double t2) throws IOException,
+      InstantiationException, IllegalAccessException {
     JobClient client = new JobClient();
     JobConf conf = new JobConf(Job.class);
-    
+
     client.setConf(conf);
     HadoopUtil.overwriteOutput(output);
 
     Path directoryContainingConvertedInput = new Path(output, Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT);
-    InputDriver.runJob(input, directoryContainingConvertedInput,
-      "org.apache.mahout.math.RandomAccessSparseVector");
+    InputDriver.runJob(input, directoryContainingConvertedInput, "org.apache.mahout.math.RandomAccessSparseVector");
     CanopyDriver.runJob(directoryContainingConvertedInput, output, measureClassName, t1, t2, true);
-    
-    ClusterDumper clusterDumper =
-        new ClusterDumper(new Path(output, "clusters-0"), new Path(output, "clusteredPoints"));
-    clusterDumper.printClusters(null);
 
   }
-  
+
 }

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java?rev=943637&r1=943636&r2=943637&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java Wed May 12 19:08:08 2010
@@ -151,11 +151,6 @@ public class Job {
     InputDriver.runJob(input, directoryContainingConvertedInput, vectorClassName);
     DirichletDriver.runJob(directoryContainingConvertedInput, output, modelFactory,
       vectorClassName, 60, numModels, maxIterations, alpha_0, numReducers, true, true, 0);
-
-    ClusterDumper clusterDumper =
-        new ClusterDumper(new Path(output, "clusters-5"), new Path(output, "clusteredPoints"));
-    clusterDumper.printClusters(null);
-
   }
   
   /**

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java?rev=943637&r1=943636&r2=943637&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java Wed May 12 19:08:08 2010
@@ -147,9 +147,5 @@ public final class Job {
     log.info("Running KMeans");
     KMeansDriver.runJob(directoryContainingConvertedInput, new Path(output, Cluster.INITIAL_CLUSTERS_DIR), output, measureClass,
         convergenceDelta, maxIterations, 1, true);
-
-    ClusterDumper clusterDumper =
-        new ClusterDumper(new Path(output, "clusters-10"), new Path(output, "clusteredPoints"));
-    clusterDumper.printClusters(null);
   }
 }

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java?rev=943637&r1=943636&r2=943637&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java Wed May 12 19:08:08 2010
@@ -132,11 +132,6 @@ public final class Job {
     InputDriver.runJob(input, directoryContainingConvertedInput);
     MeanShiftCanopyDriver.runJob(directoryContainingConvertedInput, output, measureClassName, t1, t2,
         convergenceDelta, maxIterations, true, true);
-
-    ClusterDumper clusterDumper =
-        new ClusterDumper(new Path(output, "clusters-10"), new Path(output, "clusteredPoints"));
-    clusterDumper.printClusters(null);
-
   }
 
 }