You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sm...@apache.org on 2014/01/28 09:07:35 UTC

svn commit: r1561975 - in /mahout/trunk: core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/ core/src/main/java/org/apache/mahout/clustering/kmeans/ core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/ core/src/test/java/org/apache...

Author: smarthi
Date: Tue Jan 28 08:07:34 2014
New Revision: 1561975

URL: http://svn.apache.org/r1561975
Log:
MAHOUT-1310: Changed method signatures to remove unused DistanceMeasure parameter.

Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java
    mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
    mahout/trunk/core/src/test/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterCountReaderTest.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
    mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
    mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterEvaluator.java
    mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java?rev=1561975&r1=1561974&r2=1561975&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java Tue Jan 28 08:07:34 2014
@@ -110,7 +110,6 @@ public class FuzzyKMeansDriver extends A
         input,
         clusters,
         output,
-        measure,
         convergenceDelta,
         maxIterations,
         fuzziness,
@@ -124,32 +123,31 @@ public class FuzzyKMeansDriver extends A
   /**
    * Iterate over the input vectors to produce clusters and, if requested, use the
    * results of the final iteration to cluster the input vectors.
-   * 
+   *
    * @param input
    *          the directory pathname for input points
    * @param clustersIn
    *          the directory pathname for initial & computed clusters
    * @param output
-   *          the directory pathname for output points
+ *          the directory pathname for output points
    * @param convergenceDelta
-   *          the convergence delta value
+*          the convergence delta value
    * @param maxIterations
-   *          the maximum number of iterations
+*          the maximum number of iterations
    * @param m
-   *          the fuzzification factor, see
-   *          http://en.wikipedia.org/wiki/Data_clustering#Fuzzy_c-means_clustering
-   * @param runClustering 
-   *          true if points are to be clustered after iterations complete
+*          the fuzzification factor, see
+*          http://en.wikipedia.org/wiki/Data_clustering#Fuzzy_c-means_clustering
+   * @param runClustering
+*          true if points are to be clustered after iterations complete
    * @param emitMostLikely
-   *          a boolean if true emit only most likely cluster for each point
-   * @param threshold 
-   *          a double threshold value emits all clusters having greater pdf (emitMostLikely = false)
+*          a boolean if true emit only most likely cluster for each point
+   * @param threshold
+*          a double threshold value emits all clusters having greater pdf (emitMostLikely = false)
    * @param runSequential if true run in sequential execution mode
    */
   public static void run(Path input,
                          Path clustersIn,
                          Path output,
-                         DistanceMeasure measure,
                          double convergenceDelta,
                          int maxIterations,
                          float m,
@@ -162,7 +160,6 @@ public class FuzzyKMeansDriver extends A
                                      input,
                                      clustersIn,
                                      output,
-                                     measure,
                                      convergenceDelta,
                                      maxIterations,
                                      m,
@@ -172,7 +169,6 @@ public class FuzzyKMeansDriver extends A
       clusterData(conf, input,
                   clustersOut,
                   output,
-                  measure,
                   convergenceDelta,
                   m,
                   emitMostLikely,
@@ -189,27 +185,26 @@ public class FuzzyKMeansDriver extends A
    * @param clustersIn
    *          the directory pathname for initial & computed clusters
    * @param output
-   *          the directory pathname for output points
+ *          the directory pathname for output points
    * @param convergenceDelta
-   *          the convergence delta value
+*          the convergence delta value
    * @param maxIterations
-   *          the maximum number of iterations
+*          the maximum number of iterations
    * @param m
-   *          the fuzzification factor, see
-   *          http://en.wikipedia.org/wiki/Data_clustering#Fuzzy_c-means_clustering
-   * @param runClustering 
-   *          true if points are to be clustered after iterations complete
+*          the fuzzification factor, see
+*          http://en.wikipedia.org/wiki/Data_clustering#Fuzzy_c-means_clustering
+   * @param runClustering
+*          true if points are to be clustered after iterations complete
    * @param emitMostLikely
-   *          a boolean if true emit only most likely cluster for each point
-   * @param threshold 
-   *          a double threshold value emits all clusters having greater pdf (emitMostLikely = false)
+*          a boolean if true emit only most likely cluster for each point
+   * @param threshold
+*          a double threshold value emits all clusters having greater pdf (emitMostLikely = false)
    * @param runSequential if true run in sequential execution mode
    */
   public static void run(Configuration conf,
                          Path input,
                          Path clustersIn,
                          Path output,
-                         DistanceMeasure measure,
                          double convergenceDelta,
                          int maxIterations,
                          float m,
@@ -219,14 +214,13 @@ public class FuzzyKMeansDriver extends A
                          boolean runSequential)
     throws IOException, ClassNotFoundException, InterruptedException {
     Path clustersOut =
-        buildClusters(conf, input, clustersIn, output, measure, convergenceDelta, maxIterations, m, runSequential);
+        buildClusters(conf, input, clustersIn, output, convergenceDelta, maxIterations, m, runSequential);
     if (runClustering) {
       log.info("Clustering");
       clusterData(conf, 
                   input,
                   clustersOut,
                   output,
-                  measure,
                   convergenceDelta,
                   m,
                   emitMostLikely,
@@ -237,14 +231,13 @@ public class FuzzyKMeansDriver extends A
 
   /**
    * Iterate over the input vectors to produce cluster directories for each iteration
+   *
    * @param input
    *          the directory pathname for input points
    * @param clustersIn
    *          the file pathname for initial cluster centers
    * @param output
    *          the directory pathname for output points
-   * @param measure
-   *          the classname of the DistanceMeasure
    * @param convergenceDelta
    *          the convergence delta value
    * @param maxIterations
@@ -253,14 +246,13 @@ public class FuzzyKMeansDriver extends A
    *          the fuzzification factor, see
    *          http://en.wikipedia.org/wiki/Data_clustering#Fuzzy_c-means_clustering
    * @param runSequential if true run in sequential execution mode
-   * 
+   *
    * @return the Path of the final clusters directory
    */
   public static Path buildClusters(Configuration conf,
                                    Path input,
                                    Path clustersIn,
                                    Path output,
-                                   DistanceMeasure measure,
                                    double convergenceDelta,
                                    int maxIterations,
                                    float m,
@@ -293,28 +285,25 @@ public class FuzzyKMeansDriver extends A
 
   /**
    * Run the job using supplied arguments
-   * 
+   *
    * @param input
    *          the directory pathname for input points
    * @param clustersIn
    *          the directory pathname for input clusters
    * @param output
-   *          the directory pathname for output points
-   * @param measure
-   *          the classname of the DistanceMeasure
+ *          the directory pathname for output points
    * @param convergenceDelta
-   *          the convergence delta value
+*          the convergence delta value
    * @param emitMostLikely
-   *          a boolean if true emit only most likely cluster for each point
+*          a boolean if true emit only most likely cluster for each point
    * @param threshold
-   *          a double threshold value emits all clusters having greater pdf (emitMostLikely = false)
+*          a double threshold value emits all clusters having greater pdf (emitMostLikely = false)
    * @param runSequential if true run in sequential execution mode
    */
   public static void clusterData(Configuration conf,
                                  Path input,
                                  Path clustersIn,
                                  Path output,
-                                 DistanceMeasure measure,
                                  double convergenceDelta,
                                  float m,
                                  boolean emitMostLikely,

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java?rev=1561975&r1=1561974&r2=1561975&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java Tue Jan 28 08:07:34 2014
@@ -100,7 +100,7 @@ public class KMeansDriver extends Abstra
     if (hasOption(DefaultOptionCreator.OUTLIER_THRESHOLD)) {
       clusterClassificationThreshold = Double.parseDouble(getOption(DefaultOptionCreator.OUTLIER_THRESHOLD));
     }
-    run(getConf(), input, clusters, output, measure, convergenceDelta, maxIterations, runClustering,
+    run(getConf(), input, clusters, output, convergenceDelta, maxIterations, runClustering,
         clusterClassificationThreshold, runSequential);
     return 0;
   }
@@ -108,15 +108,13 @@ public class KMeansDriver extends Abstra
   /**
    * Iterate over the input vectors to produce clusters and, if requested, use the results of the final iteration to
    * cluster the input vectors.
-   * 
+   *
    * @param input
    *          the directory pathname for input points
    * @param clustersIn
    *          the directory pathname for initial & computed clusters
    * @param output
    *          the directory pathname for output points
-   * @param measure
-   *          the DistanceMeasure to use
    * @param convergenceDelta
    *          the convergence delta value
    * @param maxIterations
@@ -129,36 +127,33 @@ public class KMeansDriver extends Abstra
    * @param runSequential
    *          if true execute sequential algorithm
    */
-  public static void run(Configuration conf, Path input, Path clustersIn, Path output, DistanceMeasure measure,
-      double convergenceDelta, int maxIterations, boolean runClustering, double clusterClassificationThreshold,
-      boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException {
+  public static void run(Configuration conf, Path input, Path clustersIn, Path output,
+    double convergenceDelta, int maxIterations, boolean runClustering, double clusterClassificationThreshold,
+    boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException {
     
     // iterate until the clusters converge
     String delta = Double.toString(convergenceDelta);
     if (log.isInfoEnabled()) {
-      log.info("Input: {} Clusters In: {} Out: {} Distance: {}", input, clustersIn, output,
-               measure.getClass().getName());
+      log.info("Input: {} Clusters In: {} Out: {}", input, clustersIn, output);
       log.info("convergence: {} max Iterations: {}", convergenceDelta, maxIterations);
     }
-    Path clustersOut = buildClusters(conf, input, clustersIn, output, measure, maxIterations, delta, runSequential);
+    Path clustersOut = buildClusters(conf, input, clustersIn, output, maxIterations, delta, runSequential);
     if (runClustering) {
       log.info("Clustering data");
-      clusterData(conf, input, clustersOut, output, measure, clusterClassificationThreshold, runSequential);
+      clusterData(conf, input, clustersOut, output, clusterClassificationThreshold, runSequential);
     }
   }
   
   /**
    * Iterate over the input vectors to produce clusters and, if requested, use the results of the final iteration to
    * cluster the input vectors.
-   * 
+   *
    * @param input
    *          the directory pathname for input points
    * @param clustersIn
    *          the directory pathname for initial & computed clusters
    * @param output
    *          the directory pathname for output points
-   * @param measure
-   *          the DistanceMeasure to use
    * @param convergenceDelta
    *          the convergence delta value
    * @param maxIterations
@@ -166,21 +161,22 @@ public class KMeansDriver extends Abstra
    * @param runClustering
    *          true if points are to be clustered after iterations are completed
    * @param clusterClassificationThreshold
-   *          Is a clustering strictness / outlier removal parrameter. Its value should be between 0 and 1. Vectors
+   *          Is a clustering strictness / outlier removal parameter. Its value should be between 0 and 1. Vectors
    *          having pdf below this value will not be clustered.
    * @param runSequential
    *          if true execute sequential algorithm
    */
-  public static void run(Path input, Path clustersIn, Path output, DistanceMeasure measure, double convergenceDelta,
-      int maxIterations, boolean runClustering, double clusterClassificationThreshold, boolean runSequential)
+  public static void run(Path input, Path clustersIn, Path output, double convergenceDelta,
+    int maxIterations, boolean runClustering, double clusterClassificationThreshold, boolean runSequential)
     throws IOException, InterruptedException, ClassNotFoundException {
-    run(new Configuration(), input, clustersIn, output, measure, convergenceDelta, maxIterations, runClustering,
+    run(new Configuration(), input, clustersIn, output, convergenceDelta, maxIterations, runClustering,
         clusterClassificationThreshold, runSequential);
   }
   
   /**
    * Iterate over the input vectors to produce cluster directories for each iteration
    * 
+   *
    * @param conf
    *          the Configuration to use
    * @param input
@@ -189,20 +185,18 @@ public class KMeansDriver extends Abstra
    *          the directory pathname for initial & computed clusters
    * @param output
    *          the directory pathname for output points
-   * @param measure
-   *          the classname of the DistanceMeasure
    * @param maxIterations
    *          the maximum number of iterations
    * @param delta
    *          the convergence delta value
    * @param runSequential
    *          if true execute sequential algorithm
-   * 
+   *
    * @return the Path of the final clusters directory
    */
   public static Path buildClusters(Configuration conf, Path input, Path clustersIn, Path output,
-      DistanceMeasure measure, int maxIterations, String delta, boolean runSequential) throws IOException,
-      InterruptedException, ClassNotFoundException {
+    int maxIterations, String delta, boolean runSequential) throws IOException,
+    InterruptedException, ClassNotFoundException {
     
     double convergenceDelta = Double.parseDouble(delta);
     List<Cluster> clusters = Lists.newArrayList();
@@ -227,28 +221,26 @@ public class KMeansDriver extends Abstra
   
   /**
    * Run the job using supplied arguments
-   * 
+   *
    * @param input
    *          the directory pathname for input points
    * @param clustersIn
    *          the directory pathname for input clusters
    * @param output
    *          the directory pathname for output points
-   * @param measure
-   *          the classname of the DistanceMeasure
    * @param clusterClassificationThreshold
-   *          Is a clustering strictness / outlier removal parrameter. Its value should be between 0 and 1. Vectors
+   *          Is a clustering strictness / outlier removal parameter. Its value should be between 0 and 1. Vectors
    *          having pdf below this value will not be clustered.
    * @param runSequential
    *          if true execute sequential algorithm
    */
-  public static void clusterData(Configuration conf, Path input, Path clustersIn, Path output, DistanceMeasure measure,
-      double clusterClassificationThreshold, boolean runSequential) throws IOException, InterruptedException,
-      ClassNotFoundException {
+  public static void clusterData(Configuration conf, Path input, Path clustersIn, Path output,
+    double clusterClassificationThreshold, boolean runSequential) throws IOException, InterruptedException,
+    ClassNotFoundException {
     
     if (log.isInfoEnabled()) {
       log.info("Running Clustering");
-      log.info("Input: {} Clusters In: {} Out: {} Distance: {}", input, clustersIn, output, measure);
+      log.info("Input: {} Clusters In: {} Out: {}", input, clustersIn, output);
     }
     ClusterClassifier.writePolicy(new KMeansClusteringPolicy(), clustersIn);
     ClusterClassificationDriver.run(conf, input, output, new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY),

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java?rev=1561975&r1=1561974&r2=1561975&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java Tue Jan 28 08:07:34 2014
@@ -152,6 +152,7 @@ public class SpectralKMeansDriver extend
    * @param ssvd
    *          Flag to indicate the eigensolver to use
    * @param numReducers
+   *          Number of reducers
    * @param blockHeight
    * @param oversampling
    * @param poweriters
@@ -244,7 +245,7 @@ public class SpectralKMeansDriver extend
 
     // Run the KMeansDriver
     Path answer = new Path(output, "kmeans_out");
-    KMeansDriver.run(conf, data, initialclusters, answer, measure, convergenceDelta, maxIterations, true, 0.0, false);
+    KMeansDriver.run(conf, data, initialclusters, answer, convergenceDelta, maxIterations, true, 0.0, false);
 
     // Restore name to id mapping and read through the cluster assignments
     Path mappingPath = new Path(new Path(conf.get("hadoop.tmp.dir")), "generic_input_mapping");

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java?rev=1561975&r1=1561974&r2=1561975&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java Tue Jan 28 08:07:34 2014
@@ -349,8 +349,8 @@ public final class TestKmeansClustering 
 
     // now run the KMeans job
     Path kmeansOutput = new Path(outputPath, "kmeans");
-	KMeansDriver.run(getConfiguration(), pointsPath, new Path(outputPath, "clusters-0-final"), kmeansOutput, new EuclideanDistanceMeasure(),
-        0.001, 10, true, 0.0, false);
+	  KMeansDriver.run(getConfiguration(), pointsPath, new Path(outputPath, "clusters-0-final"), kmeansOutput,
+      0.001, 10, true, 0.0, false);
     
     // now compare the expected clusters with actual
     Path clusteredPointsPath = new Path(kmeansOutput, "clusteredPoints");

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterCountReaderTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterCountReaderTest.java?rev=1561975&r1=1561974&r2=1561975&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterCountReaderTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterCountReaderTest.java Tue Jan 28 08:07:34 2014
@@ -101,7 +101,7 @@ public final class ClusterCountReaderTes
     CanopyDriver.run(conf, pointsPath, outputPathForCanopy, measure, 4.0, 3.0, true, 0.0, true);
     Path clustersIn = new Path(outputPathForCanopy, new Path(Cluster.CLUSTERS_DIR + '0'
                                                                    + Cluster.FINAL_ITERATION_SUFFIX));
-    KMeansDriver.run(conf, pointsPath, clustersIn, outputPathForKMeans, measure, 1, 1, true, 0.0, true);
+    KMeansDriver.run(conf, pointsPath, clustersIn, outputPathForKMeans, 1, 1, true, 0.0, true);
   }
   
   private static void verifyThatNumberOfClustersIsCorrect(Configuration conf, Path clusteredPointsPath) {

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java?rev=1561975&r1=1561974&r2=1561975&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java Tue Jan 28 08:07:34 2014
@@ -102,7 +102,7 @@ public class DisplayFuzzyKMeans extends 
       ClassNotFoundException, InterruptedException {
     Path clustersIn = new Path(output, "random-seeds");
     RandomSeedGenerator.buildRandom(conf, samples, clustersIn, 3, measure);
-    FuzzyKMeansDriver.run(samples, clustersIn, output, measure, threshold, maxIterations, m, true, true, threshold,
+    FuzzyKMeansDriver.run(samples, clustersIn, output, threshold, maxIterations, m, true, true, threshold,
         true);
     
     loadClustersWritable(output);

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java?rev=1561975&r1=1561974&r2=1561975&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java Tue Jan 28 08:07:34 2014
@@ -93,7 +93,7 @@ public class DisplayKMeans extends Displ
     throws IOException, InterruptedException, ClassNotFoundException {
     Path clustersIn = new Path(output, "random-seeds");
     RandomSeedGenerator.buildRandom(conf, samples, clustersIn, numClusters, measure);
-    KMeansDriver.run(samples, clustersIn, output, measure, convergenceDelta, maxIterations, true, 0.0, true);
+    KMeansDriver.run(samples, clustersIn, output, convergenceDelta, maxIterations, true, 0.0, true);
     loadClustersWritable(output);
   }
   

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java?rev=1561975&r1=1561974&r2=1561975&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java Tue Jan 28 08:07:34 2014
@@ -133,14 +133,12 @@ public final class Job extends AbstractJ
     InputDriver.runJob(input, directoryContainingConvertedInput, "org.apache.mahout.math.RandomAccessSparseVector");
     log.info("Running Canopy to get initial clusters");
     Path canopyOutput = new Path(output, "canopies");
-    CanopyDriver
-        .run(new Configuration(), directoryContainingConvertedInput, canopyOutput, measure, t1, t2, false, 0.0, false);
+    CanopyDriver.run(new Configuration(), directoryContainingConvertedInput, canopyOutput, measure, t1, t2, false, 0.0, false);
     log.info("Running FuzzyKMeans");
     FuzzyKMeansDriver.run(directoryContainingConvertedInput, new Path(canopyOutput, "clusters-0-final"), output,
-        measure, convergenceDelta, maxIterations, fuzziness, true, true, 0.0, false);
+        convergenceDelta, maxIterations, fuzziness, true, true, 0.0, false);
     // run ClusterDumper
-    ClusterDumper clusterDumper = new ClusterDumper(new Path(output, "clusters-*-final"), new Path(output,
-        "clusteredPoints"));
+    ClusterDumper clusterDumper = new ClusterDumper(new Path(output, "clusters-*-final"), new Path(output, "clusteredPoints"));
     clusterDumper.printClusters(null);
   }
 }

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java?rev=1561975&r1=1561974&r2=1561975&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java Tue Jan 28 08:07:34 2014
@@ -132,12 +132,11 @@ public final class Job extends AbstractJ
     Path clusters = new Path(output, "random-seeds");
     clusters = RandomSeedGenerator.buildRandom(conf, directoryContainingConvertedInput, clusters, k, measure);
     log.info("Running KMeans with k = {}", k);
-    KMeansDriver.run(conf, directoryContainingConvertedInput, clusters, output, measure, convergenceDelta,
+    KMeansDriver.run(conf, directoryContainingConvertedInput, clusters, output, convergenceDelta,
         maxIterations, true, 0.0, false);
     // run ClusterDumper
     Path outGlob = new Path(output, "clusters-*-final");
-    Path clusteredPoints = new Path(output,
-            "clusteredPoints");
+    Path clusteredPoints = new Path(output,"clusteredPoints");
     log.info("Dumping out clusters from clusters: {} and clusteredPoints: {}", outGlob, clusteredPoints);
     ClusterDumper clusterDumper = new ClusterDumper(outGlob, clusteredPoints);
     clusterDumper.printClusters(null);
@@ -179,7 +178,7 @@ public final class Job extends AbstractJ
         false);
     log.info("Running KMeans");
     KMeansDriver.run(conf, directoryContainingConvertedInput, new Path(canopyOutput, Cluster.INITIAL_CLUSTERS_DIR
-        + "-final"), output, measure, convergenceDelta, maxIterations, true, 0.0, false);
+        + "-final"), output, convergenceDelta, maxIterations, true, 0.0, false);
     // run ClusterDumper
     ClusterDumper clusterDumper = new ClusterDumper(new Path(output, "clusters-*-final"), new Path(output,
         "clusteredPoints"));

Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java?rev=1561975&r1=1561974&r2=1561975&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java (original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java Tue Jan 28 08:07:34 2014
@@ -201,7 +201,7 @@ public final class TestClusterDumper ext
     // now run the KMeans job
     Path kMeansOutput = new Path(output, "kmeans");
     KMeansDriver.run(conf, getTestTempDirPath("testdata"), new Path(output,
-        "clusters-0-final"), kMeansOutput, measure, 0.001, 10, true, 0.0, false);
+        "clusters-0-final"), kMeansOutput, 0.001, 10, true, 0.0, false);
     // run ClusterDumper
     ClusterDumper clusterDumper = new ClusterDumper(finalClusterPath(conf,
         output, 10), new Path(kMeansOutput, "clusteredPoints"));
@@ -219,7 +219,7 @@ public final class TestClusterDumper ext
     // now run the KMeans job
     Path kmeansOutput = new Path(output, "kmeans");
     KMeansDriver.run(conf, getTestTempDirPath("testdata"), new Path(output,
-        "clusters-0-final"), kmeansOutput, measure, 0.001, 10, true, 0.0, false);
+        "clusters-0-final"), kmeansOutput, 0.001, 10, true, 0.0, false);
     // run ClusterDumper
     ClusterDumper clusterDumper = new ClusterDumper(finalClusterPath(conf,
         output, 10), new Path(kmeansOutput, "clusteredPoints"));
@@ -238,7 +238,7 @@ public final class TestClusterDumper ext
     // now run the Fuzzy KMeans job
     Path kMeansOutput = new Path(output, "kmeans");
     FuzzyKMeansDriver.run(conf, getTestTempDirPath("testdata"), new Path(
-        output, "clusters-0-final"), kMeansOutput, measure, 0.001, 10, 1.1f, true,
+        output, "clusters-0-final"), kMeansOutput, 0.001, 10, 1.1f, true,
         true, 0, true);
     // run ClusterDumper
     ClusterDumper clusterDumper = new ClusterDumper(finalClusterPath(conf,

Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterEvaluator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterEvaluator.java?rev=1561975&r1=1561974&r2=1561975&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterEvaluator.java (original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterEvaluator.java Tue Jan 28 08:07:34 2014
@@ -284,7 +284,7 @@ public final class TestClusterEvaluator 
     CanopyDriver.run(conf, testdata, output, measure, 3.1, 1.1, false, 0.0, true);
     // now run the KMeans job
     Path kmeansOutput = new Path(output, "kmeans");
-    KMeansDriver.run(testdata, new Path(output, "clusters-0-final"), kmeansOutput, measure, 0.001, 10, true, 0.0, true);
+    KMeansDriver.run(testdata, new Path(output, "clusters-0-final"), kmeansOutput, 0.001, 10, true, 0.0, true);
     int numIterations = 10;
     Path clustersIn = new Path(kmeansOutput, "clusters-2");
     RepresentativePointsDriver.run(conf, clustersIn, new Path(kmeansOutput, "clusteredPoints"), kmeansOutput, measure,
@@ -305,7 +305,7 @@ public final class TestClusterEvaluator 
     CanopyDriver.run(conf, testdata, output, measure, 3.1, 1.1, false, 0.0, true);
     Path fuzzyKMeansOutput = new Path(output, "fuzzyk");
     // now run the KMeans job
-    FuzzyKMeansDriver.run(testdata, new Path(output, "clusters-0-final"), fuzzyKMeansOutput, measure, 0.001, 10, 2,
+    FuzzyKMeansDriver.run(testdata, new Path(output, "clusters-0-final"), fuzzyKMeansOutput, 0.001, 10, 2,
         true, true, 0, true);
     int numIterations = 10;
     Path clustersIn = new Path(fuzzyKMeansOutput, "clusters-4");

Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java?rev=1561975&r1=1561974&r2=1561975&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java (original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java Tue Jan 28 08:07:34 2014
@@ -124,7 +124,6 @@ public final class TestCDbwEvaluator ext
    *          double y-value of the sample mean
    * @param sd
    *          double standard deviation of the samples
-   * @throws Exception
    */
   private void generateSamples(int num, double mx, double my, double sd) {
     log.info("Generating {} samples m=[{}, {}] sd={}", num, mx, my, sd);
@@ -288,7 +287,7 @@ public final class TestCDbwEvaluator ext
     CanopyDriver.run(getConfiguration(), testdata, output, measure, 3.1, 2.1, false, 0.0, true);
     // now run the KMeans job
     Path kmeansOutput = new Path(output, "kmeans");
-    KMeansDriver.run(testdata, new Path(output, "clusters-0-final"), kmeansOutput, measure, 0.001, 10, true, 0.0, true);
+    KMeansDriver.run(testdata, new Path(output, "clusters-0-final"), kmeansOutput, 0.001, 10, true, 0.0, true);
     int numIterations = 10;
     Path clustersIn = new Path(kmeansOutput, "clusters-10-final");
     RepresentativePointsDriver.run(conf, clustersIn, new Path(kmeansOutput, "clusteredPoints"), kmeansOutput, measure,
@@ -310,7 +309,7 @@ public final class TestCDbwEvaluator ext
     CanopyDriver.run(getConfiguration(), testdata, output, measure, 3.1, 2.1, false, 0.0, true);
     Path fuzzyKMeansOutput = new Path(output, "fuzzyk");
     // now run the KMeans job
-    FuzzyKMeansDriver.run(testdata, new Path(output, "clusters-0-final"), fuzzyKMeansOutput, measure, 0.001, 10, 2,
+    FuzzyKMeansDriver.run(testdata, new Path(output, "clusters-0-final"), fuzzyKMeansOutput, 0.001, 10, 2,
         true, true, 0, true);
     int numIterations = 10;
     Path clustersIn = new Path(fuzzyKMeansOutput, "clusters-4");