You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by je...@apache.org on 2010/07/15 19:53:52 UTC

svn commit: r964507 [3/3] - in /mahout/trunk: core/src/main/java/org/apache/mahout/clustering/canopy/ core/src/main/java/org/apache/mahout/clustering/dirichlet/ core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/ core/src/main/java/org/apache/...

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java?rev=964507&r1=964506&r2=964507&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java Thu Jul 15 17:53:51 2010
@@ -18,88 +18,88 @@
 package org.apache.mahout.clustering.syntheticcontrol.kmeans;
 
 import java.io.IOException;
+import java.util.Map;
 
-import org.apache.commons.cli2.CommandLine;
-import org.apache.commons.cli2.Group;
-import org.apache.commons.cli2.Option;
-import org.apache.commons.cli2.OptionException;
-import org.apache.commons.cli2.builder.ArgumentBuilder;
-import org.apache.commons.cli2.builder.DefaultOptionBuilder;
-import org.apache.commons.cli2.builder.GroupBuilder;
-import org.apache.commons.cli2.commandline.Parser;
 import org.apache.hadoop.fs.Path;
 import org.apache.mahout.clustering.Cluster;
 import org.apache.mahout.clustering.canopy.CanopyDriver;
 import org.apache.mahout.clustering.kmeans.KMeansDriver;
+import org.apache.mahout.clustering.kmeans.RandomSeedGenerator;
 import org.apache.mahout.clustering.syntheticcontrol.Constants;
 import org.apache.mahout.clustering.syntheticcontrol.canopy.InputDriver;
-import org.apache.mahout.common.CommandLineUtil;
 import org.apache.mahout.common.HadoopUtil;
 import org.apache.mahout.common.commandline.DefaultOptionCreator;
+import org.apache.mahout.common.distance.SquaredEuclideanDistanceMeasure;
+import org.apache.mahout.utils.clustering.ClusterDumper;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-public final class Job {
+public final class Job extends KMeansDriver {
 
   private static final Logger log = LoggerFactory.getLogger(Job.class);
 
   private Job() {
+    super();
   }
 
   public static void main(String[] args) throws Exception {
-    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
-    ArgumentBuilder abuilder = new ArgumentBuilder();
-    GroupBuilder gbuilder = new GroupBuilder();
-
-    Option inputOpt = DefaultOptionCreator.inputOption().withRequired(false).create();
-    Option outputOpt = DefaultOptionCreator.outputOption().withRequired(false).create();
-    Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption().withRequired(false).create();
-    Option maxIterationsOpt = DefaultOptionCreator.maxIterationsOption().withRequired(false).create();
-
-    Option measureClassOpt = obuilder.withLongName("distance").withRequired(false).withArgument(
-        abuilder.withName("distance").withMinimum(1).withMaximum(1).create()).withDescription(
-        "The Distance Measure to use.  Default is SquaredEuclidean").withShortName("m").create();
-
-    Option t1Opt = obuilder.withLongName("t1").withRequired(false).withArgument(
-        abuilder.withName("t1").withMinimum(1).withMaximum(1).create()).withDescription("The t1 value to use.").withShortName("m")
-        .create();
-    Option t2Opt = obuilder.withLongName("t2").withRequired(false).withArgument(
-        abuilder.withName("t2").withMinimum(1).withMaximum(1).create()).withDescription("The t2 value to use.").withShortName("m")
-        .create();
-    Option vectorClassOpt = obuilder.withLongName("vectorClass").withRequired(false).withArgument(
-        abuilder.withName("vectorClass").withMinimum(1).withMaximum(1).create()).withDescription(
-        "The Vector implementation class name.  Default is RandomAccessSparseVector.class").withShortName("v").create();
-
-    Option helpOpt = DefaultOptionCreator.helpOption();
-
-    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(measureClassOpt).withOption(
-        convergenceDeltaOpt).withOption(maxIterationsOpt).withOption(vectorClassOpt).withOption(t1Opt).withOption(t2Opt)
-        .withOption(helpOpt).create();
-    try {
-      Parser parser = new Parser();
-      parser.setGroup(group);
-      CommandLine cmdLine = parser.parse(args);
-
-      if (cmdLine.hasOption(helpOpt)) {
-        CommandLineUtil.printHelp(group);
-        return;
-      }
-      Path input = new Path(cmdLine.getValue(inputOpt, "testdata").toString());
-      Path output = new Path(cmdLine.getValue(outputOpt, "output").toString());
-      String measureClass = cmdLine.getValue(measureClassOpt, "org.apache.mahout.common.distance.EuclideanDistanceMeasure")
-          .toString();
-      double t1 = Double.parseDouble(cmdLine.getValue(t1Opt, "80").toString());
-      double t2 = Double.parseDouble(cmdLine.getValue(t2Opt, "55").toString());
-      double convergenceDelta = Double.parseDouble(cmdLine.getValue(convergenceDeltaOpt, "0.5").toString());
-      int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterationsOpt, 10).toString());
-
-      runJob(input, output, measureClass, t1, t2, convergenceDelta, maxIterations);
-    } catch (OptionException e) {
-      log.error("Exception", e);
-      CommandLineUtil.printHelp(group);
+    if (args.length > 0) {
+      log.info("Running with only user-supplied arguments");
+      new Job().run(args);
+    } else {
+      log.info("Running with default arguments");
+      Path output = new Path("output");
+      HadoopUtil.overwriteOutput(output);
+      new Job().job(new Path("testdata"), output, "org.apache.mahout.common.distance.EuclideanDistanceMeasure", 80, 55, 0.5, 10);
     }
   }
 
+  @Override
+  public int run(String[] args) throws Exception {
+
+    addInputOption();
+    addOutputOption();
+    addOption(DefaultOptionCreator.distanceMeasureOption().create());
+    addOption(DefaultOptionCreator.clustersInOption()
+        .withDescription("The input centroids, as Vectors.  Must be a SequenceFile of Writable, Cluster/Canopy.  "
+            + "If k is also specified, then a random set of vectors will be selected" + " and written out to this path first")
+        .create());
+    addOption(DefaultOptionCreator.numClustersOption()
+        .withDescription("The k in k-Means.  If specified, then a random selection of k Vectors will be chosen"
+            + " as the Centroid and written to the clusters input path.").create());
+    addOption(DefaultOptionCreator.convergenceOption().create());
+    addOption(DefaultOptionCreator.maxIterationsOption().create());
+    addOption(DefaultOptionCreator.overwriteOption().create());
+    addOption(DefaultOptionCreator.numReducersOption().create());
+    addOption(DefaultOptionCreator.clusteringOption().create());
+
+    Map<String, String> argMap = parseArguments(args);
+    if (argMap == null) {
+      return -1;
+    }
+
+    Path input = getInputPath();
+    Path clusters = new Path(argMap.get(DefaultOptionCreator.CLUSTERS_IN_OPTION_KEY));
+    Path output = getOutputPath();
+    String measureClass = argMap.get(DefaultOptionCreator.DISTANCE_MEASURE_OPTION_KEY);
+    if (measureClass == null) {
+      measureClass = SquaredEuclideanDistanceMeasure.class.getName();
+    }
+    double convergenceDelta = Double.parseDouble(argMap.get(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION_KEY));
+    int numReduceTasks = Integer.parseInt(argMap.get(DefaultOptionCreator.MAX_REDUCERS_OPTION_KEY));
+    int maxIterations = Integer.parseInt(argMap.get(DefaultOptionCreator.MAX_ITERATIONS_OPTION_KEY));
+    if (argMap.containsKey(DefaultOptionCreator.OVERWRITE_OPTION_KEY)) {
+      HadoopUtil.overwriteOutput(output);
+    }
+    if (argMap.containsKey(DefaultOptionCreator.NUM_CLUSTERS_OPTION_KEY)) {
+      clusters = RandomSeedGenerator.buildRandom(input, clusters, Integer.parseInt(argMap
+          .get(DefaultOptionCreator.NUM_CLUSTERS_OPTION_KEY)));
+    }
+    boolean runClustering = argMap.containsKey(DefaultOptionCreator.CLUSTERING_OPTION_KEY);
+    runJob(input, clusters, output, measureClass, convergenceDelta, maxIterations, numReduceTasks, runClustering);
+    return 0;
+  }
+
   /**
    * Run the kmeans clustering job on an input dataset using the given distance measure, t1, t2 and iteration
    * parameters. All output data will be written to the output directory, which will be initially deleted if
@@ -127,9 +127,8 @@ public final class Job {
    * @throws ClassNotFoundException 
    * @throws InterruptedException 
    */
-  private static void runJob(Path input, Path output, String measureClass, double t1, double t2, double convergenceDelta,
-      int maxIterations) throws IOException, InstantiationException, IllegalAccessException, InterruptedException,
-      ClassNotFoundException {
+  private void job(Path input, Path output, String measureClass, double t1, double t2, double convergenceDelta, int maxIterations)
+      throws IOException, InstantiationException, IllegalAccessException, InterruptedException, ClassNotFoundException {
     HadoopUtil.overwriteOutput(output);
 
     Path directoryContainingConvertedInput = new Path(output, Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT);
@@ -138,7 +137,17 @@ public final class Job {
     log.info("Running Canopy to get initial clusters");
     CanopyDriver.runJob(directoryContainingConvertedInput, output, measureClass, t1, t2, false);
     log.info("Running KMeans");
-    KMeansDriver.runJob(directoryContainingConvertedInput, new Path(output, Cluster.INITIAL_CLUSTERS_DIR), output, measureClass,
-        convergenceDelta, maxIterations, 1, true);
+    KMeansDriver.runJob(directoryContainingConvertedInput,
+                        new Path(output, Cluster.INITIAL_CLUSTERS_DIR),
+                        output,
+                        measureClass,
+                        convergenceDelta,
+                        maxIterations,
+                        1,
+                        true);
+    // run ClusterDumper
+    ClusterDumper clusterDumper = new ClusterDumper(new Path(output, "clusters-" + maxIterations), new Path(output,
+                                                                                                            "clusteredPoints"));
+    clusterDumper.printClusters(null);
   }
 }

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java?rev=964507&r1=964506&r2=964507&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java Thu Jul 15 17:53:51 2010
@@ -18,85 +18,79 @@
 package org.apache.mahout.clustering.syntheticcontrol.meanshift;
 
 import java.io.IOException;
+import java.util.Map;
 
-import org.apache.commons.cli2.CommandLine;
-import org.apache.commons.cli2.Group;
-import org.apache.commons.cli2.Option;
-import org.apache.commons.cli2.OptionException;
 import org.apache.commons.cli2.builder.ArgumentBuilder;
 import org.apache.commons.cli2.builder.DefaultOptionBuilder;
-import org.apache.commons.cli2.builder.GroupBuilder;
-import org.apache.commons.cli2.commandline.Parser;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapred.JobClient;
 import org.apache.mahout.clustering.meanshift.MeanShiftCanopyDriver;
 import org.apache.mahout.clustering.syntheticcontrol.Constants;
-import org.apache.mahout.common.CommandLineUtil;
 import org.apache.mahout.common.HadoopUtil;
 import org.apache.mahout.common.commandline.DefaultOptionCreator;
+import org.apache.mahout.utils.clustering.ClusterDumper;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-public final class Job {
+public final class Job extends MeanShiftCanopyDriver {
 
   private static final Logger log = LoggerFactory.getLogger(Job.class);
 
   private Job() {
+    super();
   }
 
   public static void main(String[] args) throws Exception {
-    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
-    ArgumentBuilder abuilder = new ArgumentBuilder();
-    GroupBuilder gbuilder = new GroupBuilder();
-
-    Option inputOpt = DefaultOptionCreator.inputOption().withRequired(false).create();
-    Option outputOpt = DefaultOptionCreator.outputOption().withRequired(false).create();
-    Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption().withRequired(false).create();
-    Option maxIterOpt = DefaultOptionCreator.maxIterationsOption().withRequired(false).create();
-    Option helpOpt = DefaultOptionCreator.helpOption();
-
-    Option modelOpt = obuilder.withLongName("distanceClass").withRequired(false).withShortName("d").withArgument(
-        abuilder.withName("distanceClass").withMinimum(1).withMaximum(1).create()).withDescription(
-        "The distance measure class name.").create();
-
-    Option threshold1Opt = obuilder.withLongName("threshold_1").withRequired(false).withShortName("t1").withArgument(
-        abuilder.withName("threshold_1").withMinimum(1).withMaximum(1).create())
-        .withDescription("The T1 distance threshold.").create();
-
-    Option threshold2Opt = obuilder.withLongName("threshold_2").withRequired(false).withShortName("t2").withArgument(
-        abuilder.withName("threshold_2").withMinimum(1).withMaximum(1).create())
-        .withDescription("The T1 distance threshold.").create();
-
-    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt)
-        .withOption(modelOpt).withOption(helpOpt)
-        .withOption(convergenceDeltaOpt).withOption(threshold1Opt).withOption(maxIterOpt)
-        .withOption(threshold2Opt).create();
-
-    try {
-      Parser parser = new Parser();
-      parser.setGroup(group);
-      CommandLine cmdLine = parser.parse(args);
-      if (cmdLine.hasOption(helpOpt)) {
-        CommandLineUtil.printHelp(group);
-        return;
-      }
-
-      Path input = new Path(cmdLine.getValue(inputOpt, "testdata").toString());
-      Path output = new Path(cmdLine.getValue(outputOpt, "output").toString());
-      String measureClassName =
-          cmdLine.getValue(modelOpt, "org.apache.mahout.common.distance.EuclideanDistanceMeasure").toString();
-      double t1 = Double.parseDouble(cmdLine.getValue(threshold1Opt, "47.6").toString());
-      double t2 = Double.parseDouble(cmdLine.getValue(threshold2Opt, "1").toString());
-      double convergenceDelta = Double.parseDouble(cmdLine.getValue(convergenceDeltaOpt, "0.5").toString());
-      int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterOpt, "10").toString());
-      runJob(input, output, measureClassName, t1, t2, convergenceDelta, maxIterations);
-    } catch (OptionException e) {
-      log.error("Exception parsing command line: ", e);
-      CommandLineUtil.printHelp(group);
+    if (args.length > 0) {
+      log.info("Running with only user-supplied arguments");
+      new Job().run(args);
+    } else {
+      log.info("Running with default arguments");
+      Path output = new Path("output");
+      HadoopUtil.overwriteOutput(output);
+      new Job().job(new Path("testdata"), output, "org.apache.mahout.common.distance.EuclideanDistanceMeasure", 47.6, 1, 0.5, 10);
     }
   }
 
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.util.Tool#run(java.lang.String[])
+   */
+  @Override
+  public int run(String[] args) throws Exception {
+    addInputOption();
+    addOutputOption();
+    addOption(DefaultOptionCreator.convergenceOption().create());
+    addOption(DefaultOptionCreator.maxIterationsOption().create());
+    addOption(DefaultOptionCreator.overwriteOption().create());
+    addOption(new DefaultOptionBuilder().withLongName(INPUT_IS_CANOPIES_OPTION).withRequired(false).withShortName("ic")
+        .withArgument(new ArgumentBuilder().withName(INPUT_IS_CANOPIES_OPTION).withMinimum(1).withMaximum(1).create())
+        .withDescription("If present, the input directory already contains MeanShiftCanopies").create());
+    addOption(DefaultOptionCreator.distanceMeasureOption().create());
+    addOption(DefaultOptionCreator.t1Option().create());
+    addOption(DefaultOptionCreator.t2Option().create());
+    addOption(DefaultOptionCreator.clusteringOption().create());
+
+    Map<String, String> argMap = parseArguments(args);
+    if (argMap == null) {
+      return -1;
+    }
+
+    Path input = getInputPath();
+    Path output = getOutputPath();
+    if (argMap.containsKey(DefaultOptionCreator.OVERWRITE_OPTION_KEY)) {
+      HadoopUtil.overwriteOutput(output);
+    }
+    String measureClass = argMap.get(DefaultOptionCreator.DISTANCE_MEASURE_OPTION_KEY);
+    double t1 = Double.parseDouble(argMap.get(DefaultOptionCreator.T1_OPTION_KEY));
+    double t2 = Double.parseDouble(argMap.get(DefaultOptionCreator.T2_OPTION_KEY));
+    boolean runClustering = argMap.containsKey(DefaultOptionCreator.CLUSTERING_OPTION_KEY);
+    double convergenceDelta = Double.parseDouble(argMap.get(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION_KEY));
+    int maxIterations = Integer.parseInt(argMap.get(DefaultOptionCreator.MAX_ITERATIONS_OPTION_KEY));
+    boolean inputIsCanopies = argMap.containsKey(INPUT_IS_CANOPIES_OPTION_KEY);
+
+    runJob(input, output, measureClass, t1, t2, convergenceDelta, maxIterations, inputIsCanopies, runClustering);
+    return 0;
+  }
+
   /**
    * Run the meanshift clustering job on an input dataset using the given distance measure, t1, t2 and
    * iteration parameters. All output data will be written to the output directory, which will be initially
@@ -123,24 +117,31 @@ public final class Job {
    * @throws InterruptedException 
    * @throws IllegalAccessException 
    * @throws InstantiationException 
+   * @throws IllegalAccessException 
+   * @throws InstantiationException 
    */
-  private static void runJob(Path input,
-                             Path output,
-                             String measureClassName,
-                             double t1,
-                             double t2,
-                             double convergenceDelta,
-                             int maxIterations) throws IOException, InterruptedException, ClassNotFoundException {
-    JobClient client = new JobClient();
-    Configuration conf = new Configuration();
-
-    client.setConf(conf);
-    HadoopUtil.overwriteOutput(output);
-
+  private void job(Path input,
+                   Path output,
+                   String measureClassName,
+                   double t1,
+                   double t2,
+                   double convergenceDelta,
+                   int maxIterations) throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
     Path directoryContainingConvertedInput = new Path(output, Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT);
     InputDriver.runJob(input, directoryContainingConvertedInput);
-    MeanShiftCanopyDriver.runJob(directoryContainingConvertedInput, output, measureClassName, t1, t2,
-        convergenceDelta, maxIterations, true, true);
+    MeanShiftCanopyDriver.runJob(directoryContainingConvertedInput,
+                                 output,
+                                 measureClassName,
+                                 t1,
+                                 t2,
+                                 convergenceDelta,
+                                 maxIterations,
+                                 true,
+                                 true);
+    // run ClusterDumper
+    ClusterDumper clusterDumper = new ClusterDumper(new Path(output, "clusters-" + maxIterations), new Path(output,
+                                                                                                            "clusteredPoints"));
+    clusterDumper.printClusters(null);
   }
 
 }

Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java?rev=964507&r1=964506&r2=964507&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java Thu Jul 15 17:53:51 2010
@@ -314,7 +314,7 @@ public final class ClusterDumper {
     FileStatus[] children = fs.listStatus(pointsPathDir, new PathFilter() {
       @Override
       public boolean accept(Path path) {
-        return !path.getName().endsWith(".crc");
+        return !(path.getName().endsWith(".crc") || path.getName().equals("_logs"));
       }
     });
 

Modified: mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java?rev=964507&r1=964506&r2=964507&view=diff
==============================================================================
--- mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java (original)
+++ mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java Thu Jul 15 17:53:51 2010
@@ -17,7 +17,6 @@
 
 package org.apache.mahout.clustering;
 
-import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Iterator;
@@ -42,7 +41,6 @@ import org.apache.mahout.clustering.fuzz
 import org.apache.mahout.clustering.kmeans.KMeansDriver;
 import org.apache.mahout.clustering.meanshift.MeanShiftCanopyDriver;
 import org.apache.mahout.common.MahoutTestCase;
-import org.apache.mahout.common.RandomUtils;
 import org.apache.mahout.common.distance.CosineDistanceMeasure;
 import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
 import org.apache.mahout.math.NamedVector;
@@ -83,17 +81,6 @@ public class TestClusterDumper extends M
     ClusteringTestUtils.writePointsToFile(sampleData, getTestTempFilePath("testdata/file1"), fs, conf);
   }
 
-  private static void rmDir(File f) {
-    if (f != null && f.exists()) {
-      if (f.isDirectory()) {
-        for (File g : f.listFiles()) {
-          rmDir(g);
-        }
-      }
-      f.delete();
-    }
-  }
-
   private void getSampleData(String[] docs2) throws IOException {
     sampleData = new ArrayList<VectorWritable>();
     RAMDirectory directory = new RAMDirectory();
@@ -178,7 +165,7 @@ public class TestClusterDumper extends M
     // now run the KMeans job
     FuzzyKMeansDriver.runJob(getTestTempDirPath("testdata"), new Path(output, "clusters-0"), output,
                              EuclideanDistanceMeasure.class.getName(), 0.001, 10,
-        1, 1, (float) 1.1, true, true, 0);
+        1, (float) 1.1, true, true, 0);
     // run ClusterDumper
     ClusterDumper clusterDumper = new ClusterDumper(new Path(output, "clusters-3"),
                                                     new Path(output, "clusteredPoints"));

Modified: mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java?rev=964507&r1=964506&r2=964507&view=diff
==============================================================================
--- mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java (original)
+++ mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java Thu Jul 15 17:53:51 2010
@@ -168,7 +168,7 @@ public class TestCDbwEvaluator extends M
     // now run the KMeans job
     Path output = getTestTempDirPath("output");
     FuzzyKMeansDriver.runJob(getTestTempDirPath("testdata"), new Path(output, "clusters-0"), output,
-                             EuclideanDistanceMeasure.class.getName(), 0.001, 10, 1, 1, 2, false, true, 0);
+                             EuclideanDistanceMeasure.class.getName(), 0.001, 10, 1, 2, true, true, 0);
     int numIterations = 2;
     CDbwDriver.runJob(new Path(output, "clusters-4"), new Path(output, "clusteredPoints"), output,
                       EuclideanDistanceMeasure.class.getName(), numIterations, 1);