You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by je...@apache.org on 2010/08/18 23:47:32 UTC

svn commit: r986960 [2/2] - in /mahout/trunk: core/src/main/java/org/apache/mahout/clustering/ core/src/main/java/org/apache/mahout/clustering/canopy/ core/src/main/java/org/apache/mahout/clustering/dirichlet/ core/src/main/java/org/apache/mahout/clust...

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java?rev=986960&r1=986959&r2=986960&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java Wed Aug 18 21:47:30 2010
@@ -39,6 +39,7 @@ import org.apache.mahout.clustering.Abst
 import org.apache.mahout.clustering.Cluster;
 import org.apache.mahout.clustering.WeightedVectorWritable;
 import org.apache.mahout.clustering.canopy.CanopyDriver;
+import org.apache.mahout.clustering.kmeans.KMeansConfigKeys;
 import org.apache.mahout.clustering.kmeans.OutputLogFilter;
 import org.apache.mahout.common.AbstractJob;
 import org.apache.mahout.common.HadoopUtil;
@@ -69,8 +70,8 @@ public class MeanShiftCanopyDriver exten
    *          the input pathname String
    * @param output
    *          the output pathname String
-   * @param measureClassName
-   *          the DistanceMeasure class name
+   * @param measure
+   *          the DistanceMeasure
    * @param t1
    *          the T1 distance threshold
    * @param t2
@@ -87,7 +88,7 @@ public class MeanShiftCanopyDriver exten
    */
   public static void runJob(Path input,
                             Path output,
-                            String measureClassName,
+                            DistanceMeasure measure,
                             double t1,
                             double t2,
                             double convergenceDelta,
@@ -98,7 +99,7 @@ public class MeanShiftCanopyDriver exten
       throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
     new MeanShiftCanopyDriver().job(input,
                                     output,
-                                    measureClassName,
+                                    measure,
                                     t1,
                                     t2,
                                     convergenceDelta,
@@ -142,10 +143,12 @@ public class MeanShiftCanopyDriver exten
     boolean inputIsCanopies = hasOption(INPUT_IS_CANOPIES_OPTION);
     boolean runSequential = (getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(
         DefaultOptionCreator.SEQUENTIAL_METHOD));
+    ClassLoader ccl = Thread.currentThread().getContextClassLoader();
+    DistanceMeasure measure = (DistanceMeasure) ((Class<?>) ccl.loadClass(measureClass)).newInstance();
 
     job(input,
         output,
-        measureClass,
+        measure,
         t1,
         t2,
         convergenceDelta,
@@ -215,8 +218,8 @@ public class MeanShiftCanopyDriver exten
    *          the input pathname String
    * @param output
    *          the output pathname String
-   * @param measureClassName
-   *          the DistanceMeasure class name
+   * @param measure
+   *          the DistanceMeasure
    * @param t1
    *          the T1 distance threshold
    * @param t2
@@ -233,7 +236,7 @@ public class MeanShiftCanopyDriver exten
    */
   public void job(Path input,
                   Path output,
-                  String measureClassName,
+                  DistanceMeasure measure,
                   double t1,
                   double t2,
                   double convergenceDelta,
@@ -246,11 +249,8 @@ public class MeanShiftCanopyDriver exten
     if (inputIsCanopies) {
       clustersIn = input;
     } else {
-      createCanopyFromVectors(input, clustersIn, runSequential);
+      createCanopyFromVectors(input, clustersIn, measure, runSequential);
     }
-    ClassLoader ccl = Thread.currentThread().getContextClassLoader();
-    Class<?> cl = ccl.loadClass(measureClassName);
-    DistanceMeasure measure = (DistanceMeasure) cl.newInstance();
 
     Path clustersOut =
         buildClusters(clustersIn, output, measure, t1, t2, convergenceDelta, maxIterations, runSequential);
@@ -263,20 +263,21 @@ public class MeanShiftCanopyDriver exten
     }
   }
 
-  public void createCanopyFromVectors(Path input, Path output, boolean runSequential)
+  public void createCanopyFromVectors(Path input, Path output, DistanceMeasure measure, boolean runSequential)
       throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
     if (runSequential) {
-      createCanopyFromVectorsSeq(input, output);
+      createCanopyFromVectorsSeq(input, output, measure);
     } else {
-      createCanopyFromVectorsMR(input, output);
+      createCanopyFromVectorsMR(input, output, measure);
     }
   }
 
   /**
    * @param input the Path to the input VectorWritable data
    * @param output the Path to the initial clusters directory
+   * @param measure the DistanceMeasure
    */
-  private void createCanopyFromVectorsSeq(Path input, Path output)
+  private void createCanopyFromVectorsSeq(Path input, Path output, DistanceMeasure measure)
       throws IOException, InstantiationException, IllegalAccessException {
     Configuration conf = new Configuration();
     FileSystem fs = FileSystem.get(input.toUri(), conf);
@@ -294,7 +295,7 @@ public class MeanShiftCanopyDriver exten
         Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance();
         VectorWritable vw = (VectorWritable) reader.getValueClass().newInstance();
         while (reader.next(key, vw)) {
-          writer.append(new Text(), new MeanShiftCanopy(vw.get(), id++));
+          writer.append(new Text(), new MeanShiftCanopy(vw.get(), id++, measure));
           vw = (VectorWritable) reader.getValueClass().newInstance();
         }
       } finally {
@@ -304,9 +305,10 @@ public class MeanShiftCanopyDriver exten
     }
   }
 
-  private void createCanopyFromVectorsMR(Path input, Path output)
+  private void createCanopyFromVectorsMR(Path input, Path output, DistanceMeasure measure)
       throws IOException, InterruptedException, ClassNotFoundException {
     Configuration conf = new Configuration();
+    conf.set(KMeansConfigKeys.DISTANCE_MEASURE_KEY, measure.getClass().getName());
     Job job = new Job(conf);
     job.setJarByClass(MeanShiftCanopyDriver.class);
     job.setOutputKeyClass(Text.class);

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestClusterInterface.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestClusterInterface.java?rev=986960&r1=986959&r2=986960&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestClusterInterface.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestClusterInterface.java Wed Aug 18 21:47:30 2010
@@ -29,6 +29,8 @@ import org.apache.mahout.clustering.diri
 import org.apache.mahout.clustering.dirichlet.models.SampledNormalModel;
 import org.apache.mahout.clustering.meanshift.MeanShiftCanopy;
 import org.apache.mahout.common.MahoutTestCase;
+import org.apache.mahout.common.distance.DistanceMeasure;
+import org.apache.mahout.common.distance.ManhattanDistanceMeasure;
 import org.apache.mahout.math.DenseVector;
 import org.apache.mahout.math.SequentialAccessSparseVector;
 import org.apache.mahout.math.Vector;
@@ -42,6 +44,7 @@ public class TestClusterInterface extend
 
   private static final Type MODEL_TYPE = new TypeToken<Model<Vector>>() {}.getType();
   private static final Type CLUSTER_TYPE = new TypeToken<DirichletCluster<Vector>>() {}.getType();
+  private static final DistanceMeasure measure = new ManhattanDistanceMeasure();
 
   public void testDirichletNormalModel() {
     double[] d = { 1.1, 2.2, 3.3 };
@@ -173,7 +176,7 @@ public class TestClusterInterface extend
   public void testCanopyAsFormatString() {
     double[] d = { 1.1, 2.2, 3.3 };
     Vector m = new DenseVector(d);
-    Cluster cluster = new Canopy(m, 123);
+    Cluster cluster = new Canopy(m, 123, measure);
     String formatString = cluster.asFormatString(null);
     System.out.println(formatString);
     assertEquals("format", "C-123{n=0 c=[1.100, 2.200, 3.300] r=[]}", formatString);
@@ -183,7 +186,7 @@ public class TestClusterInterface extend
     double[] d = { 1.1, 0.0, 3.3 };
     Vector m = new SequentialAccessSparseVector(3);
     m.assign(d);
-    Cluster cluster = new Canopy(m, 123);
+    Cluster cluster = new Canopy(m, 123, measure);
     String formatString = cluster.asFormatString(null);
     System.out.println(formatString);
     assertEquals("format", "C-123{n=0 c=[0:1.100, 2:3.300] r=[]}", formatString);
@@ -192,7 +195,7 @@ public class TestClusterInterface extend
   public void testCanopyAsFormatStringWithBindings() {
     double[] d = { 1.1, 2.2, 3.3 };
     Vector m = new DenseVector(d);
-    Cluster cluster = new Canopy(m, 123);
+    Cluster cluster = new Canopy(m, 123, measure);
     String[] bindings = { "fee", null, null };
     String formatString = cluster.asFormatString(bindings);
     System.out.println(formatString);
@@ -203,7 +206,7 @@ public class TestClusterInterface extend
     double[] d = { 1.1, 0.0, 3.3 };
     Vector m = new SequentialAccessSparseVector(3);
     m.assign(d);
-    Cluster cluster = new Canopy(m, 123);
+    Cluster cluster = new Canopy(m, 123, measure);
     String formatString = cluster.asFormatString(null);
     System.out.println(formatString);
     assertEquals("format", "C-123{n=0 c=[0:1.100, 2:3.300] r=[]}", formatString);
@@ -212,7 +215,7 @@ public class TestClusterInterface extend
   public void testClusterAsFormatString() {
     double[] d = { 1.1, 2.2, 3.3 };
     Vector m = new DenseVector(d);
-    Cluster cluster = new org.apache.mahout.clustering.kmeans.Cluster(m, 123);
+    Cluster cluster = new org.apache.mahout.clustering.kmeans.Cluster(m, 123, measure);
     String formatString = cluster.asFormatString(null);
     System.out.println(formatString);
     assertEquals("format", "CL-123{n=0 c=[1.100, 2.200, 3.300] r=[]}", formatString);
@@ -222,7 +225,7 @@ public class TestClusterInterface extend
     double[] d = { 1.1, 0.0, 3.3 };
     Vector m = new SequentialAccessSparseVector(3);
     m.assign(d);
-    Cluster cluster = new org.apache.mahout.clustering.kmeans.Cluster(m, 123);
+    Cluster cluster = new org.apache.mahout.clustering.kmeans.Cluster(m, 123, measure);
     String formatString = cluster.asFormatString(null);
     System.out.println(formatString);
     assertEquals("format", "CL-123{n=0 c=[0:1.100, 2:3.300] r=[]}", formatString);
@@ -231,7 +234,7 @@ public class TestClusterInterface extend
   public void testClusterAsFormatStringWithBindings() {
     double[] d = { 1.1, 2.2, 3.3 };
     Vector m = new DenseVector(d);
-    Cluster cluster = new org.apache.mahout.clustering.kmeans.Cluster(m, 123);
+    Cluster cluster = new org.apache.mahout.clustering.kmeans.Cluster(m, 123, measure);
     String[] bindings = { "fee", null, "foo" };
     String formatString = cluster.asFormatString(bindings);
     System.out.println(formatString);
@@ -242,7 +245,7 @@ public class TestClusterInterface extend
     double[] d = { 1.1, 0.0, 3.3 };
     Vector m = new SequentialAccessSparseVector(3);
     m.assign(d);
-    Cluster cluster = new org.apache.mahout.clustering.kmeans.Cluster(m, 123);
+    Cluster cluster = new org.apache.mahout.clustering.kmeans.Cluster(m, 123, measure);
     String formatString = cluster.asFormatString(null);
     System.out.println(formatString);
     assertEquals("format", "CL-123{n=0 c=[0:1.100, 2:3.300] r=[]}", formatString);
@@ -251,7 +254,7 @@ public class TestClusterInterface extend
   public void testMSCanopyAsFormatString() {
     double[] d = { 1.1, 2.2, 3.3 };
     Vector m = new DenseVector(d);
-    Cluster cluster = new MeanShiftCanopy(m, 123);
+    Cluster cluster = new MeanShiftCanopy(m, 123, measure);
     String formatString = cluster.asFormatString(null);
     System.out.println(formatString);
     assertEquals("format", "MSC-123{n=0 c=[1.100, 2.200, 3.300] r=[]}", formatString);
@@ -261,7 +264,7 @@ public class TestClusterInterface extend
     double[] d = { 1.1, 0.0, 3.3 };
     Vector m = new SequentialAccessSparseVector(3);
     m.assign(d);
-    Cluster cluster = new MeanShiftCanopy(m, 123);
+    Cluster cluster = new MeanShiftCanopy(m, 123, measure);
     String formatString = cluster.asFormatString(null);
     System.out.println(formatString);
     assertEquals("format", "MSC-123{n=0 c=[0:1.100, 2:3.300] r=[]}", formatString);
@@ -270,7 +273,7 @@ public class TestClusterInterface extend
   public void testMSCanopyAsFormatStringWithBindings() {
     double[] d = { 1.1, 2.2, 3.3 };
     Vector m = new DenseVector(d);
-    Cluster cluster = new MeanShiftCanopy(m, 123);
+    Cluster cluster = new MeanShiftCanopy(m, 123, measure);
     String[] bindings = { "fee", null, "foo" };
     String formatString = cluster.asFormatString(bindings);
     System.out.println(formatString);
@@ -281,7 +284,7 @@ public class TestClusterInterface extend
     double[] d = { 1.1, 0.0, 3.3 };
     Vector m = new SequentialAccessSparseVector(3);
     m.assign(d);
-    Cluster cluster = new MeanShiftCanopy(m, 123);
+    Cluster cluster = new MeanShiftCanopy(m, 123, measure);
     String[] bindings = { "fee", null, "foo" };
     String formatString = cluster.asFormatString(bindings);
     System.out.println(formatString);

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java?rev=986960&r1=986959&r2=986960&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java Wed Aug 18 21:47:30 2010
@@ -288,7 +288,7 @@ public class TestCanopyCreation extends 
     ClusteringTestUtils.writePointsToFile(points, getTestTempFilePath("testdata/file2"), fs, config);
     // now run the Canopy Driver
     Path output = getTestTempDirPath("output");
-    CanopyDriver.runJob(getTestTempDirPath("testdata"), output, ManhattanDistanceMeasure.class.getName(), 3.1, 2.1, false, false);
+    CanopyDriver.runJob(getTestTempDirPath("testdata"), output, manhattanDistanceMeasure, 3.1, 2.1, false, false);
 
     // verify output from sequence file
     Path path = new Path(output, "clusters-0/part-r-00000");
@@ -319,7 +319,7 @@ public class TestCanopyCreation extends 
     ClusteringTestUtils.writePointsToFile(points, getTestTempFilePath("testdata/file2"), fs, job);
     // now run the Canopy Driver
     Path output = getTestTempDirPath("output");
-    CanopyDriver.runJob(getTestTempDirPath("testdata"), output, EuclideanDistanceMeasure.class.getName(), 3.1, 2.1, false, false);
+    CanopyDriver.runJob(getTestTempDirPath("testdata"), output, euclideanDistanceMeasure, 3.1, 2.1, false, false);
 
     // verify output from sequence file
     Path path = new Path(output, "clusters-0/part-r-00000");
@@ -354,7 +354,7 @@ public class TestCanopyCreation extends 
     List<Canopy> canopies = new ArrayList<Canopy>();
     int nextCanopyId = 0;
     for (Vector centroid : manhattanCentroids) {
-      canopies.add(new Canopy(centroid, nextCanopyId++));
+      canopies.add(new Canopy(centroid, nextCanopyId++, manhattanDistanceMeasure));
     }
     mapper.config(canopies);
     List<VectorWritable> points = getPointsWritable();
@@ -389,7 +389,7 @@ public class TestCanopyCreation extends 
     List<Canopy> canopies = new ArrayList<Canopy>();
     int nextCanopyId = 0;
     for (Vector centroid : euclideanCentroids) {
-      canopies.add(new Canopy(centroid, nextCanopyId++));
+      canopies.add(new Canopy(centroid, nextCanopyId++, euclideanDistanceMeasure));
     }
     mapper.config(canopies);
     List<VectorWritable> points = getPointsWritable();
@@ -416,7 +416,7 @@ public class TestCanopyCreation extends 
     ClusteringTestUtils.writePointsToFile(points, getTestTempFilePath("testdata/file1"), fs, config);
     // now run the Canopy Driver in sequential mode
     Path output = getTestTempDirPath("output");
-    CanopyDriver.runJob(getTestTempDirPath("testdata"), output, ManhattanDistanceMeasure.class.getName(), 3.1, 2.1, true, true);
+    CanopyDriver.runJob(getTestTempDirPath("testdata"), output, manhattanDistanceMeasure, 3.1, 2.1, true, true);
 
     // verify output from sequence file
     Path path = new Path(output, "clusters-0/part-r-00000");
@@ -494,7 +494,7 @@ public class TestCanopyCreation extends 
     ClusteringTestUtils.writePointsToFile(points, getTestTempFilePath("testdata/file2"), fs, conf);
     // now run the Job
     Path output = getTestTempDirPath("output");
-    CanopyDriver.runJob(getTestTempDirPath("testdata"), output, ManhattanDistanceMeasure.class.getName(), 3.1, 2.1, true, false);
+    CanopyDriver.runJob(getTestTempDirPath("testdata"), output, manhattanDistanceMeasure, 3.1, 2.1, true, false);
     Path path = new Path(output, "clusteredPoints/part-m-00000");
     SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
     int count = 0;
@@ -547,7 +547,7 @@ public class TestCanopyCreation extends 
     // now run the Canopy Driver. User defined measure happens to be a Manhattan
     // subclass so results are same.
     Path output = getTestTempDirPath("output");
-    CanopyDriver.runJob(getTestTempDirPath("testdata"), output, UserDefinedDistanceMeasure.class.getName(), 3.1, 2.1, false, false);
+    CanopyDriver.runJob(getTestTempDirPath("testdata"), output, new UserDefinedDistanceMeasure(), 3.1, 2.1, false, false);
 
     // verify output from sequence file
     Configuration job = new Configuration();

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestMapReduce.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestMapReduce.java?rev=986960&r1=986959&r2=986960&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestMapReduce.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestMapReduce.java Wed Aug 18 21:47:30 2010
@@ -441,7 +441,6 @@ public class TestMapReduce extends Mahou
     in.reset(out.getData(), out.getLength());
     model2.readFields(in);
     assertEquals("models", model.toString(), model2.toString());
-    assertEquals("ids", 5, model.getId());
   }
 
   public void testSampledNormalModelWritableSerialization() throws Exception {
@@ -454,7 +453,6 @@ public class TestMapReduce extends Mahou
     in.reset(out.getData(), out.getLength());
     model2.readFields(in);
     assertEquals("models", model.toString(), model2.toString());
-    assertEquals("ids", 5, model.getId());
   }
 
   public void testAsymmetricSampledNormalModelWritableSerialization() throws Exception {
@@ -468,7 +466,6 @@ public class TestMapReduce extends Mahou
     in.reset(out.getData(), out.getLength());
     model2.readFields(in);
     assertEquals("models", model.toString(), model2.toString());
-    assertEquals("ids", 5, model.getId());
   }
 
   public void testClusterWritableSerialization() throws Exception {
@@ -483,7 +480,6 @@ public class TestMapReduce extends Mahou
     assertEquals("count", cluster.getTotalCount(), cluster2.getTotalCount());
     assertNotNull("model null", cluster2.getModel());
     assertEquals("model", cluster.getModel().toString(), cluster2.getModel().toString());
-    assertEquals("ids", 5, cluster.getId());
   }
 
 }

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java?rev=986960&r1=986959&r2=986960&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java Wed Aug 18 21:47:30 2010
@@ -52,6 +52,8 @@ import org.apache.mahout.math.VectorWrit
 public class TestFuzzyKmeansClustering extends MahoutTestCase {
 
   private FileSystem fs;
+  
+  private DistanceMeasure measure = new EuclideanDistanceMeasure();
 
   private static void rmr(String path) {
     File f = new File(path);
@@ -151,6 +153,7 @@ public class TestFuzzyKmeansClustering e
 
   public void testReferenceImplementation() throws Exception {
     List<Vector> points = TestKmeansClustering.getPoints(TestKmeansClustering.reference);
+      EuclideanDistanceMeasure measure = new EuclideanDistanceMeasure();
     for (int k = 0; k < points.size(); k++) {
       System.out.println("test k= " + k);
 
@@ -158,7 +161,7 @@ public class TestFuzzyKmeansClustering e
       // pick k initial cluster centers at random
       for (int i = 0; i < k + 1; i++) {
         Vector vec = tweakValue(points.get(i));
-        SoftCluster cluster = new SoftCluster(vec, i);
+        SoftCluster cluster = new SoftCluster(vec, i, measure);
         // add the center so the centroid will be correct upon output
         //cluster.addPoint(cluster.getCenter(), 1);
         clusterList.add(cluster);
@@ -167,13 +170,13 @@ public class TestFuzzyKmeansClustering e
       // run reference FuzzyKmeans algorithm
       List<List<SoftCluster>> clusters = FuzzyKMeansClusterer.clusterPoints(points,
                                                                             clusterList,
-                                                                            new EuclideanDistanceMeasure(),
+                                                                            measure,
                                                                             0.001,
                                                                             2,
                                                                             2);
       computeCluster(points,
                      clusters.get(clusters.size() - 1),
-                     new FuzzyKMeansClusterer(new EuclideanDistanceMeasure(), 0.001, 2),
+                     new FuzzyKMeansClusterer(measure, 0.001, 2),
                      pointClusterInfo);
 
       // iterate for each cluster
@@ -204,7 +207,7 @@ public class TestFuzzyKmeansClustering e
       for (int i = 0; i < k + 1; i++) {
         Vector vec = tweakValue(points.get(i).get());
 
-        SoftCluster cluster = new SoftCluster(vec, i);
+        SoftCluster cluster = new SoftCluster(vec, i, measure);
         // add the center so the centroid will be correct upon output
         cluster.observe(cluster.getCenter(), 1);
         /*
@@ -267,7 +270,7 @@ public class TestFuzzyKmeansClustering e
       for (int i = 0; i < k + 1; i++) {
         Vector vec = tweakValue(points.get(i).get());
 
-        SoftCluster cluster = new SoftCluster(vec, i);
+        SoftCluster cluster = new SoftCluster(vec, i, measure);
         // add the center so the centroid will be correct upon output
         cluster.observe(cluster.getCenter(), 1);
         /*
@@ -321,7 +324,7 @@ public class TestFuzzyKmeansClustering e
       for (int i = 0; i < k + 1; i++) {
         Vector vec = tweakValue(points.get(i).get());
 
-        SoftCluster cluster = new SoftCluster(vec, i);
+        SoftCluster cluster = new SoftCluster(vec, i, measure);
         cluster.observe(cluster.getCenter(), 1);
         clusterList.add(cluster);
       }
@@ -383,7 +386,7 @@ public class TestFuzzyKmeansClustering e
       for (int i = 0; i < k + 1; i++) {
         Vector vec = tweakValue(points.get(i).get());
 
-        SoftCluster cluster = new SoftCluster(vec, i);
+        SoftCluster cluster = new SoftCluster(vec, i, measure);
         cluster.observe(cluster.getCenter(), 1);
         clusterList.add(cluster);
       }
@@ -439,7 +442,7 @@ public class TestFuzzyKmeansClustering e
       for (int i = 0; i < k + 1; i++) {
         Vector vec = tweakValue(points.get(i).get());
 
-        SoftCluster cluster = new SoftCluster(vec, i);
+        SoftCluster cluster = new SoftCluster(vec, i, measure);
         // cluster.addPoint(cluster.getCenter(), 1);
         clusterList.add(cluster);
       }
@@ -493,7 +496,7 @@ public class TestFuzzyKmeansClustering e
       List<SoftCluster> reference = new ArrayList<SoftCluster>();
       for (int i = 0; i < k + 1; i++) {
         Vector vec = tweakValue(points.get(i).get());
-        reference.add(new SoftCluster(vec, i));
+        reference.add(new SoftCluster(vec, i, measure));
       }
       List<Vector> pointsVectors = new ArrayList<Vector>();
       for (VectorWritable point : points) {
@@ -526,7 +529,7 @@ public class TestFuzzyKmeansClustering e
       for (int i = 0; i < k + 1; i++) {
         Vector vec = tweakValue(points.get(i).get());
 
-        SoftCluster cluster = new SoftCluster(vec, i);
+        SoftCluster cluster = new SoftCluster(vec, i, measure);
         cluster.observe(cluster.getCenter(), 1);
         clusterList.add(cluster);
       }
@@ -601,7 +604,7 @@ public class TestFuzzyKmeansClustering e
       List<SoftCluster> reference = new ArrayList<SoftCluster>();
       for (int i = 0; i < k + 1; i++) {
         Vector vec = tweakValue(points.get(i).get());
-        reference.add(new SoftCluster(vec, i));
+        reference.add(new SoftCluster(vec, i, measure));
       }
       Map<Integer, List<WeightedVectorWritable>> refClusters = new HashMap<Integer, List<WeightedVectorWritable>>();
       List<Vector> pointsVectors = new ArrayList<Vector>();

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java?rev=986960&r1=986959&r2=986960&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java Wed Aug 18 21:47:30 2010
@@ -113,7 +113,7 @@ public class TestKmeansClustering extend
       List<Cluster> clusters = new ArrayList<Cluster>();
       for (int i = 0; i < k + 1; i++) {
         Vector vec = points.get(i);
-        clusters.add(new Cluster(vec, i));
+        clusters.add(new Cluster(vec, i, measure));
       }
       // iterate clusters until they converge
       int maxIter = 10;
@@ -153,7 +153,7 @@ public class TestKmeansClustering extend
       List<Cluster> clusters = new ArrayList<Cluster>();
 
       for (int i = 0; i < k + 1; i++) {
-        Cluster cluster = new Cluster(points.get(i).get(), i);
+        Cluster cluster = new Cluster(points.get(i).get(), i, measure);
         // add the center so the centroid will be correct upon output
         cluster.observe(cluster.getCenter(), 1);
         clusters.add(cluster);
@@ -200,7 +200,7 @@ public class TestKmeansClustering extend
       for (int i = 0; i < k + 1; i++) {
         Vector vec = points.get(i).get();
 
-        Cluster cluster = new Cluster(vec, i);
+        Cluster cluster = new Cluster(vec, i, measure);
         // add the center so the centroid will be correct upon output
         cluster.observe(cluster.getCenter(), 1);
         clusters.add(cluster);
@@ -258,7 +258,7 @@ public class TestKmeansClustering extend
       List<Cluster> clusters = new ArrayList<Cluster>();
       for (int i = 0; i < k + 1; i++) {
         Vector vec = points.get(i).get();
-        Cluster cluster = new Cluster(vec, i);
+        Cluster cluster = new Cluster(vec, i, measure);
         // add the center so the centroid will be correct upon output
         // cluster.addPoint(cluster.getCenter());
         clusters.add(cluster);
@@ -296,7 +296,7 @@ public class TestKmeansClustering extend
       List<Cluster> reference = new ArrayList<Cluster>();
       for (int i = 0; i < k + 1; i++) {
         Vector vec = points.get(i).get();
-        reference.add(new Cluster(vec, i));
+        reference.add(new Cluster(vec, i, measure));
       }
       List<Vector> pointsVectors = new ArrayList<Vector>();
       for (VectorWritable point : points) {
@@ -331,6 +331,7 @@ public class TestKmeansClustering extend
 
   /** Story: User wishes to run kmeans job on reference data */
   public void testKMeansSeqJob() throws Exception {
+    DistanceMeasure measure = new EuclideanDistanceMeasure();
     List<VectorWritable> points = getPointsWritable(reference);
 
     Path pointsPath = getTestTempDirPath("points");
@@ -348,7 +349,7 @@ public class TestKmeansClustering extend
       for (int i = 0; i < k + 1; i++) {
         Vector vec = points.get(i).get();
 
-        Cluster cluster = new Cluster(vec, i);
+        Cluster cluster = new Cluster(vec, i, measure);
         // add the center so the centroid will be correct upon output
         cluster.observe(cluster.getCenter(), 1);
         writer.append(new Text(cluster.getIdentifier()), cluster);
@@ -386,6 +387,7 @@ public class TestKmeansClustering extend
 
   /** Story: User wishes to run kmeans job on reference data */
   public void testKMeansMRJob() throws Exception {
+    DistanceMeasure measure = new EuclideanDistanceMeasure();
     List<VectorWritable> points = getPointsWritable(reference);
 
     Path pointsPath = getTestTempDirPath("points");
@@ -403,7 +405,7 @@ public class TestKmeansClustering extend
       for (int i = 0; i < k + 1; i++) {
         Vector vec = points.get(i).get();
 
-        Cluster cluster = new Cluster(vec, i);
+        Cluster cluster = new Cluster(vec, i, measure);
         // add the center so the centroid will be correct upon output
         cluster.observe(cluster.getCenter(), 1);
         writer.append(new Text(cluster.getIdentifier()), cluster);
@@ -455,13 +457,13 @@ public class TestKmeansClustering extend
 
     Path outputPath = getTestTempDirPath("output");
     // now run the Canopy job
-    CanopyDriver.runJob(pointsPath, outputPath, ManhattanDistanceMeasure.class.getName(), 3.1, 2.1, false, false);
+    CanopyDriver.runJob(pointsPath, outputPath, new ManhattanDistanceMeasure(), 3.1, 2.1, false, false);
 
     // now run the KMeans job
     KMeansDriver.runJob(pointsPath,
                         new Path(outputPath, "clusters-0"),
                         outputPath,
-                        EuclideanDistanceMeasure.class.getName(),
+                        new EuclideanDistanceMeasure(),
                         0.001,
                         10,
                         1,

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestRandomSeedGenerator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestRandomSeedGenerator.java?rev=986960&r1=986959&r2=986960&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestRandomSeedGenerator.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestRandomSeedGenerator.java Wed Aug 18 21:47:30 2010
@@ -32,6 +32,7 @@ import org.apache.hadoop.mapreduce.Job;
 import org.apache.mahout.clustering.AbstractCluster;
 import org.apache.mahout.clustering.ClusteringTestUtils;
 import org.apache.mahout.common.MahoutTestCase;
+import org.apache.mahout.common.distance.ManhattanDistanceMeasure;
 import org.apache.mahout.math.RandomAccessSparseVector;
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
@@ -84,7 +85,7 @@ public class TestRandomSeedGenerator ext
     Path output = getTestTempDirPath("random-output");
     ClusteringTestUtils.writePointsToFile(points, input, fs, conf);
     
-    RandomSeedGenerator.buildRandom(input, output, 4);
+    RandomSeedGenerator.buildRandom(input, output, 4, new ManhattanDistanceMeasure());
     
     SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(output, "part-randomSeed"), conf);
     Writable key = (Writable) reader.getKeyClass().newInstance();

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java?rev=986960&r1=986959&r2=986960&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java Wed Aug 18 21:47:30 2010
@@ -84,7 +84,7 @@ public class TestMeanShift extends Mahou
     int nextCanopyId = 0;
     List<MeanShiftCanopy> canopies = new ArrayList<MeanShiftCanopy>();
     for (Vector point : raw) {
-      canopies.add(new MeanShiftCanopy(point, nextCanopyId++));
+      canopies.add(new MeanShiftCanopy(point, nextCanopyId++, euclideanDistanceMeasure));
     }
     return canopies;
   }
@@ -119,7 +119,7 @@ public class TestMeanShift extends Mahou
     // add all points to the canopies
     int nextCanopyId = 0;
     for (Vector aRaw : raw) {
-      clusterer.mergeCanopy(new MeanShiftCanopy(aRaw, nextCanopyId++), canopies);
+      clusterer.mergeCanopy(new MeanShiftCanopy(aRaw, nextCanopyId++, euclideanDistanceMeasure), canopies);
     }
     boolean done = false;
     int iter = 1;
@@ -145,7 +145,7 @@ public class TestMeanShift extends Mahou
     List<Vector> points = new ArrayList<Vector>();
     for (Vector v : raw)
       points.add(v);
-    List<MeanShiftCanopy> canopies = MeanShiftCanopyClusterer.clusterPoints(points, new EuclideanDistanceMeasure(), 0.5, 4, 1, 10);
+    List<MeanShiftCanopy> canopies = MeanShiftCanopyClusterer.clusterPoints(points, euclideanDistanceMeasure, 0.5, 4, 1, 10);
     printCanopies(canopies);
     printImage(canopies);
   }
@@ -162,7 +162,7 @@ public class TestMeanShift extends Mahou
     List<MeanShiftCanopy> refCanopies = new ArrayList<MeanShiftCanopy>();
     int nextCanopyId = 0;
     for (Vector aRaw : raw) {
-      clusterer.mergeCanopy(new MeanShiftCanopy(aRaw, nextCanopyId++), refCanopies);
+      clusterer.mergeCanopy(new MeanShiftCanopy(aRaw, nextCanopyId++, euclideanDistanceMeasure), refCanopies);
     }
 
     Configuration conf = new Configuration();
@@ -222,7 +222,7 @@ public class TestMeanShift extends Mahou
     List<MeanShiftCanopy> mapperReference = new ArrayList<MeanShiftCanopy>();
     int nextCanopyId = 0;
     for (Vector aRaw : raw) {
-      clusterer.mergeCanopy(new MeanShiftCanopy(aRaw, nextCanopyId++), mapperReference);
+      clusterer.mergeCanopy(new MeanShiftCanopy(aRaw, nextCanopyId++, euclideanDistanceMeasure), mapperReference);
     }
     for (MeanShiftCanopy canopy : mapperReference) {
       clusterer.shiftToMean(canopy);

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayCanopy.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayCanopy.java?rev=986960&r1=986959&r2=986960&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayCanopy.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayCanopy.java Wed Aug 18 21:47:30 2010
@@ -50,7 +50,7 @@ class DisplayCanopy extends DisplayClust
     writeSampleData(samples);
     //boolean b = true;
     //if (b) {
-      new CanopyDriver().buildClusters(samples, output, ManhattanDistanceMeasure.class.getName(), T1, T2, true);
+      new CanopyDriver().buildClusters(samples, output, new ManhattanDistanceMeasure(), T1, T2, true);
       loadClusters(output);
     //} else {
     //  List<Vector> points = new ArrayList<Vector>();

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java?rev=986960&r1=986959&r2=986960&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java Wed Aug 18 21:47:30 2010
@@ -84,8 +84,8 @@ public class DisplayDirichlet extends Di
     for (Model<VectorWritable>[] models : result) {
       List<Cluster> clusters = new ArrayList<Cluster>();
       for (Model<VectorWritable> cluster : models) {
-        if (isSignificant(cluster)) {
-          clusters.add(cluster);
+        if (isSignificant((Cluster)cluster)) {
+          clusters.add((Cluster)cluster);
         }
       }
       CLUSTERS.add(clusters);

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java?rev=986960&r1=986959&r2=986960&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java Wed Aug 18 21:47:30 2010
@@ -57,11 +57,11 @@ class DisplayFuzzyKMeans extends Display
     int m = 3;
     //if (b) {
       writeSampleData(samples);
-      Path clusters = RandomSeedGenerator.buildRandom(samples, new Path(output, "clusters-0"), 3);
+      Path clusters = RandomSeedGenerator.buildRandom(samples, new Path(output, "clusters-0"), 3, measure);
       FuzzyKMeansDriver.runJob(samples,
                                clusters,
                                output,
-                               measure.getClass().getName(),
+                               measure,
                                threshold,
                                numIterations,
                                1,

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java?rev=986960&r1=986959&r2=986960&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java Wed Aug 18 21:47:30 2010
@@ -51,9 +51,9 @@ class DisplayKMeans extends DisplayClust
     int maxIter = 10;
     double distanceThreshold = 0.001;
     //if (b) {
-      Path clusters = RandomSeedGenerator.buildRandom(samples, new Path(output, "clusters-0"), 3);
-      KMeansDriver.runJob(samples, clusters, output, measure.getClass().getName(), distanceThreshold, maxIter, 1, true, true);
-      loadClusters(output);
+    Path clusters = RandomSeedGenerator.buildRandom(samples, new Path(output, "clusters-0"), 3, measure);
+    KMeansDriver.runJob(samples, clusters, output, measure, distanceThreshold, maxIter, 1, true, true);
+    loadClusters(output);
     //} else {
     //  List<Vector> points = new ArrayList<Vector>();
     //  for (VectorWritable sample : SAMPLE_DATA) {

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMeanShift.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMeanShift.java?rev=986960&r1=986959&r2=986960&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMeanShift.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMeanShift.java Wed Aug 18 21:47:30 2010
@@ -112,7 +112,7 @@ final class DisplayMeanShift extends Dis
     writeSampleData(samples);
     boolean b = true;
     if (b) {
-      MeanShiftCanopyDriver.runJob(samples, output, measure.getClass().getName(), t1, t2, 0.005, 20, false, true, true);
+      MeanShiftCanopyDriver.runJob(samples, output, measure, t1, t2, 0.005, 20, false, true, true);
       loadClusters(output);
     } else {
       List<Vector> points = new ArrayList<Vector>();

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java?rev=986960&r1=986959&r2=986960&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java Wed Aug 18 21:47:30 2010
@@ -25,6 +25,8 @@ import org.apache.mahout.clustering.cano
 import org.apache.mahout.clustering.syntheticcontrol.Constants;
 import org.apache.mahout.common.HadoopUtil;
 import org.apache.mahout.common.commandline.DefaultOptionCreator;
+import org.apache.mahout.common.distance.DistanceMeasure;
+import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
 import org.apache.mahout.utils.clustering.ClusterDumper;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -44,7 +46,7 @@ public final class Job extends CanopyDri
       log.info("Running with default arguments");
       Path output = new Path("output");
       HadoopUtil.overwriteOutput(output);
-      job(new Path("testdata"), output, "org.apache.mahout.common.distance.EuclideanDistanceMeasure", 80, 55);
+      job(new Path("testdata"), output, new EuclideanDistanceMeasure(), 80, 55);
     }
   }
 
@@ -60,22 +62,20 @@ public final class Job extends CanopyDri
    *          the String denoting the input directory path
    * @param output
    *          the String denoting the output directory path
-   * @param measureClassName
-   *          the String class name of the DistanceMeasure to use
+   * @param measure
+   *          the DistanceMeasure to use
    * @param t1
    *          the canopy T1 threshold
    * @param t2
    *          the canopy T2 threshold
    */
-  private static void job(Path input, Path output, String measureClassName, double t1, double t2)
-      throws IOException,
+  private static void job(Path input, Path output, DistanceMeasure measure, double t1, double t2) throws IOException,
       InstantiationException, IllegalAccessException, InterruptedException, ClassNotFoundException {
     Path directoryContainingConvertedInput = new Path(output, Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT);
     InputDriver.runJob(input, directoryContainingConvertedInput, "org.apache.mahout.math.RandomAccessSparseVector");
-    CanopyDriver.runJob(directoryContainingConvertedInput, output, measureClassName, t1, t2, true, false);
+    CanopyDriver.runJob(directoryContainingConvertedInput, output, measure, t1, t2, true, false);
     // run ClusterDumper
-    ClusterDumper clusterDumper = new ClusterDumper(new Path(output, "clusters-0"),
-                                                    new Path(output, "clusteredPoints"));
+    ClusterDumper clusterDumper = new ClusterDumper(new Path(output, "clusters-0"), new Path(output, "clusteredPoints"));
     clusterDumper.printClusters(null);
   }
 
@@ -102,8 +102,10 @@ public final class Job extends CanopyDri
     String measureClass = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
     double t1 = Double.parseDouble(getOption(DefaultOptionCreator.T1_OPTION));
     double t2 = Double.parseDouble(getOption(DefaultOptionCreator.T2_OPTION));
+    ClassLoader ccl = Thread.currentThread().getContextClassLoader();
+    DistanceMeasure measure = (DistanceMeasure) ((Class<?>) ccl.loadClass(measureClass)).newInstance();
 
-    job(input, output, measureClass, t1, t2);
+    job(input, output, measure, t1, t2);
     return 0;
   }
 

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java?rev=986960&r1=986959&r2=986960&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java Wed Aug 18 21:47:30 2010
@@ -31,6 +31,8 @@ import org.apache.mahout.clustering.synt
 import org.apache.mahout.clustering.syntheticcontrol.canopy.InputDriver;
 import org.apache.mahout.common.HadoopUtil;
 import org.apache.mahout.common.commandline.DefaultOptionCreator;
+import org.apache.mahout.common.distance.DistanceMeasure;
+import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
 import org.apache.mahout.common.distance.SquaredEuclideanDistanceMeasure;
 import org.apache.mahout.utils.clustering.ClusterDumper;
 import org.slf4j.Logger;
@@ -51,16 +53,7 @@ public final class Job extends FuzzyKMea
       log.info("Running with default arguments");
       Path output = new Path("output");
       HadoopUtil.overwriteOutput(output);
-      new Job().job(new Path("testdata"),
-                    output,
-                    "org.apache.mahout.common.distance.EuclideanDistanceMeasure",
-                    80,
-                    55,
-                    10,
-                    1,
-                    (float) 2,
-                    0.5,
-                    true);
+      new Job().job(new Path("testdata"), output, new EuclideanDistanceMeasure(), 80, 55, 10, 1, (float) 2, 0.5, true);
 
     }
   }
@@ -109,14 +102,17 @@ public final class Job extends FuzzyKMea
     if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
       HadoopUtil.overwriteOutput(output);
     }
+    ClassLoader ccl = Thread.currentThread().getContextClassLoader();
+    DistanceMeasure measure = (DistanceMeasure) ((Class<?>) ccl.loadClass(measureClass)).newInstance();
+
     if (hasOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)) {
       clusters = RandomSeedGenerator.buildRandom(input, clusters, Integer.parseInt(argMap
-          .get(DefaultOptionCreator.NUM_CLUSTERS_OPTION)));
+          .get(DefaultOptionCreator.NUM_CLUSTERS_OPTION)), measure);
     }
     boolean runClustering = hasOption(DefaultOptionCreator.CLUSTERING_OPTION);
     double t1 = Double.parseDouble(getOption(DefaultOptionCreator.T1_OPTION));
     double t2 = Double.parseDouble(getOption(DefaultOptionCreator.T2_OPTION));
-    job(input, output, measureClass, t1, t2, maxIterations, numReduceTasks, fuzziness, convergenceDelta, runClustering);
+    job(input, output, measure, t1, t2, maxIterations, numReduceTasks, fuzziness, convergenceDelta, runClustering);
     return 0;
   }
 
@@ -149,26 +145,26 @@ public final class Job extends FuzzyKMea
    */
   private void job(Path input,
                    Path output,
-                   String measureClass,
+                   DistanceMeasure measure,
                    double t1,
                    double t2,
                    int maxIterations,
                    int numReducerTasks,
                    float fuzziness,
                    double convergenceDelta,
-                   boolean runClustering)
-      throws IOException, InstantiationException, IllegalAccessException, InterruptedException, ClassNotFoundException {
+                   boolean runClustering) throws IOException, InstantiationException, IllegalAccessException, InterruptedException,
+      ClassNotFoundException {
 
     Path directoryContainingConvertedInput = new Path(output, Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT);
     log.info("Preparing Input");
     InputDriver.runJob(input, directoryContainingConvertedInput, "org.apache.mahout.math.RandomAccessSparseVector");
     log.info("Running Canopy to get initial clusters");
-    CanopyDriver.runJob(directoryContainingConvertedInput, output, measureClass, t1, t2, false, false);
+    CanopyDriver.runJob(directoryContainingConvertedInput, output, measure, t1, t2, false, false);
     log.info("Running FuzzyKMeans");
     FuzzyKMeansDriver.runJob(directoryContainingConvertedInput,
                              new Path(output, Cluster.INITIAL_CLUSTERS_DIR),
                              output,
-                             measureClass,
+                             measure,
                              convergenceDelta,
                              maxIterations,
                              numReducerTasks,
@@ -178,8 +174,7 @@ public final class Job extends FuzzyKMea
                              0.0,
                              false);
     // run ClusterDumper
-    ClusterDumper clusterDumper = new ClusterDumper(new Path(output, "clusters-3"),
-                                                    new Path(output, "clusteredPoints"));
+    ClusterDumper clusterDumper = new ClusterDumper(new Path(output, "clusters-3"), new Path(output, "clusteredPoints"));
     clusterDumper.printClusters(null);
   }
 }

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java?rev=986960&r1=986959&r2=986960&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java Wed Aug 18 21:47:30 2010
@@ -29,6 +29,8 @@ import org.apache.mahout.clustering.synt
 import org.apache.mahout.clustering.syntheticcontrol.canopy.InputDriver;
 import org.apache.mahout.common.HadoopUtil;
 import org.apache.mahout.common.commandline.DefaultOptionCreator;
+import org.apache.mahout.common.distance.DistanceMeasure;
+import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
 import org.apache.mahout.common.distance.SquaredEuclideanDistanceMeasure;
 import org.apache.mahout.utils.clustering.ClusterDumper;
 import org.slf4j.Logger;
@@ -49,7 +51,7 @@ public final class Job extends KMeansDri
       log.info("Running with default arguments");
       Path output = new Path("output");
       HadoopUtil.overwriteOutput(output);
-      new Job().job(new Path("testdata"), output, "org.apache.mahout.common.distance.EuclideanDistanceMeasure", 80, 55, 0.5, 10);
+      new Job().job(new Path("testdata"), output, new EuclideanDistanceMeasure(), 80, 55, 0.5, 10);
     }
   }
 
@@ -90,12 +92,15 @@ public final class Job extends KMeansDri
     if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
       HadoopUtil.overwriteOutput(output);
     }
+    ClassLoader ccl = Thread.currentThread().getContextClassLoader();
+    Class<?> cl = ccl.loadClass(measureClass);
+    DistanceMeasure measure = (DistanceMeasure) cl.newInstance();
     if (hasOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)) {
       clusters = RandomSeedGenerator.buildRandom(input, clusters, Integer.parseInt(argMap
-          .get(DefaultOptionCreator.NUM_CLUSTERS_OPTION)));
+          .get(DefaultOptionCreator.NUM_CLUSTERS_OPTION)), measure);
     }
     boolean runClustering = hasOption(DefaultOptionCreator.CLUSTERING_OPTION);
-    runJob(input, clusters, output, measureClass, convergenceDelta, maxIterations, numReduceTasks, runClustering, false);
+    runJob(input, clusters, output, measure, convergenceDelta, maxIterations, numReduceTasks, runClustering, false);
     return 0;
   }
 
@@ -111,8 +116,8 @@ public final class Job extends KMeansDri
    *          the String denoting the input directory path
    * @param output
    *          the String denoting the output directory path
-   * @param measureClass
-   *          the String class name of the DistanceMeasure to use
+   * @param measure
+   *          the DistanceMeasure to use
    * @param t1
    *          the canopy T1 threshold
    * @param t2
@@ -126,24 +131,31 @@ public final class Job extends KMeansDri
    * @throws ClassNotFoundException 
    * @throws InterruptedException 
    */
-  private void job(Path input, Path output, String measureClass, double t1, double t2, double convergenceDelta, int maxIterations)
-      throws IOException, InstantiationException, IllegalAccessException, InterruptedException, ClassNotFoundException {
+  private void job(Path input,
+                   Path output,
+                   DistanceMeasure measure,
+                   double t1,
+                   double t2,
+                   double convergenceDelta,
+                   int maxIterations) throws IOException, InstantiationException, IllegalAccessException, InterruptedException,
+      ClassNotFoundException {
     HadoopUtil.overwriteOutput(output);
 
     Path directoryContainingConvertedInput = new Path(output, Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT);
     log.info("Preparing Input");
     InputDriver.runJob(input, directoryContainingConvertedInput, "org.apache.mahout.math.RandomAccessSparseVector");
     log.info("Running Canopy to get initial clusters");
-    CanopyDriver.runJob(directoryContainingConvertedInput, output, measureClass, t1, t2, false, false);
+    CanopyDriver.runJob(directoryContainingConvertedInput, output, measure, t1, t2, false, false);
     log.info("Running KMeans");
     KMeansDriver.runJob(directoryContainingConvertedInput,
                         new Path(output, Cluster.INITIAL_CLUSTERS_DIR),
                         output,
-                        measureClass,
+                        measure,
                         convergenceDelta,
                         maxIterations,
                         1,
-                        true, false);
+                        true,
+                        false);
     // run ClusterDumper
     ClusterDumper clusterDumper = new ClusterDumper(new Path(output, "clusters-" + maxIterations), new Path(output,
                                                                                                             "clusteredPoints"));

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputMapper.java?rev=986960&r1=986959&r2=986960&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputMapper.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/InputMapper.java Wed Aug 18 21:47:30 2010
@@ -26,12 +26,14 @@ import org.apache.hadoop.io.LongWritable
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.mahout.clustering.meanshift.MeanShiftCanopy;
+import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
 import org.apache.mahout.math.DenseVector;
 import org.apache.mahout.math.Vector;
 
-public class InputMapper extends Mapper<LongWritable,Text,Text,MeanShiftCanopy> {
-  
+public class InputMapper extends Mapper<LongWritable, Text, Text, MeanShiftCanopy> {
+
   private static final Pattern SPACE = Pattern.compile(" ");
+
   private int nextCanopyId;
 
   @Override
@@ -49,7 +51,7 @@ public class InputMapper extends Mapper<
     for (Double d : doubles) {
       point.set(index++, d);
     }
-    MeanShiftCanopy canopy = new MeanShiftCanopy(point, nextCanopyId++);
+    MeanShiftCanopy canopy = new MeanShiftCanopy(point, nextCanopyId++, new EuclideanDistanceMeasure());
     context.write(new Text(), canopy);
   }
 }

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java?rev=986960&r1=986959&r2=986960&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java Wed Aug 18 21:47:30 2010
@@ -27,6 +27,8 @@ import org.apache.mahout.clustering.mean
 import org.apache.mahout.clustering.syntheticcontrol.Constants;
 import org.apache.mahout.common.HadoopUtil;
 import org.apache.mahout.common.commandline.DefaultOptionCreator;
+import org.apache.mahout.common.distance.DistanceMeasure;
+import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
 import org.apache.mahout.utils.clustering.ClusterDumper;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -46,7 +48,7 @@ public final class Job extends MeanShift
       log.info("Running with default arguments");
       Path output = new Path("output");
       HadoopUtil.overwriteOutput(output);
-      new Job().job(new Path("testdata"), output, "org.apache.mahout.common.distance.EuclideanDistanceMeasure", 47.6, 1, 0.5, 10);
+      new Job().job(new Path("testdata"), output, new EuclideanDistanceMeasure(), 47.6, 1, 0.5, 10);
     }
   }
 
@@ -82,8 +84,10 @@ public final class Job extends MeanShift
     double convergenceDelta = Double.parseDouble(getOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION));
     int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION));
     boolean inputIsCanopies = hasOption(INPUT_IS_CANOPIES_OPTION);
+    ClassLoader ccl = Thread.currentThread().getContextClassLoader();
+    DistanceMeasure measure = (DistanceMeasure) ((Class<?>) ccl.loadClass(measureClass)).newInstance();
 
-    runJob(input, output, measureClass, t1, t2, convergenceDelta, maxIterations, inputIsCanopies, runClustering, false);
+    runJob(input, output, measure, t1, t2, convergenceDelta, maxIterations, inputIsCanopies, runClustering, false);
     return 0;
   }
 
@@ -99,8 +103,8 @@ public final class Job extends MeanShift
    *          the String denoting the input directory path
    * @param output
    *          the String denoting the output directory path
-   * @param measureClassName
-   *          the String class name of the DistanceMeasure to use
+   * @param measure
+   *          the DistanceMeasure to use
    * @param t1
    *          the meanshift canopy T1 threshold
    * @param t2
@@ -112,26 +116,27 @@ public final class Job extends MeanShift
    */
   private void job(Path input,
                    Path output,
-                   String measureClassName,
+                   DistanceMeasure measure,
                    double t1,
                    double t2,
                    double convergenceDelta,
-                   int maxIterations)
-      throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
+                   int maxIterations) throws IOException, InterruptedException, ClassNotFoundException, InstantiationException,
+      IllegalAccessException {
     Path directoryContainingConvertedInput = new Path(output, Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT);
     InputDriver.runJob(input, directoryContainingConvertedInput);
     MeanShiftCanopyDriver.runJob(directoryContainingConvertedInput,
                                  output,
-                                 measureClassName,
+                                 measure,
                                  t1,
                                  t2,
                                  convergenceDelta,
                                  maxIterations,
                                  true,
-                                 true, false);
+                                 true,
+                                 false);
     // run ClusterDumper
-    ClusterDumper clusterDumper = new ClusterDumper(new Path(output, "clusters-" + maxIterations),
-                                                    new Path(output, "clusteredPoints"));
+    ClusterDumper clusterDumper = new ClusterDumper(new Path(output, "clusters-" + maxIterations), new Path(output,
+                                                                                                            "clusteredPoints"));
     clusterDumper.printClusters(null);
   }
 

Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwDriver.java?rev=986960&r1=986959&r2=986960&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwDriver.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwDriver.java Wed Aug 18 21:47:30 2010
@@ -36,6 +36,7 @@ import org.apache.mahout.clustering.Weig
 import org.apache.mahout.clustering.dirichlet.DirichletCluster;
 import org.apache.mahout.common.AbstractJob;
 import org.apache.mahout.common.commandline.DefaultOptionCreator;
+import org.apache.mahout.common.distance.DistanceMeasure;
 import org.apache.mahout.math.VectorWritable;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -58,8 +59,8 @@ public final class CDbwDriver extends Ab
   }
 
   @Override
-  public int run(String[] args)
-      throws ClassNotFoundException, InstantiationException, IllegalAccessException, IOException, InterruptedException {
+  public int run(String[] args) throws ClassNotFoundException, InstantiationException, IllegalAccessException, IOException,
+      InterruptedException {
     addInputOption();
     addOutputOption();
     addOption(DefaultOptionCreator.distanceMeasureOption().create());
@@ -74,7 +75,10 @@ public final class CDbwDriver extends Ab
     String distanceMeasureClass = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
     int numReducers = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION));
     int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION));
-    job(input, null, output, distanceMeasureClass, maxIterations, numReducers);
+    ClassLoader ccl = Thread.currentThread().getContextClassLoader();
+    DistanceMeasure measure = (DistanceMeasure) ((Class<?>) ccl.loadClass(distanceMeasureClass)).newInstance();
+
+    job(input, null, output, measure, maxIterations, numReducers);
     return 0;
   }
 
@@ -87,8 +91,8 @@ public final class CDbwDriver extends Ab
               the directory pathname for input clustered points [clusterId :: VectorWritable]
    * @param output
    *          the directory pathname for output reference points [clusterId :: VectorWritable]
-   * @param distanceMeasureClass
-   *          the String ModelDistribution class name to use
+   * @param measure
+   *          the DistanceMeasure to use
    * @param numIterations
    *          the number of iterations
    * @param numReducers
@@ -98,21 +102,20 @@ public final class CDbwDriver extends Ab
   public static void runJob(Path clustersIn,
                             Path clusteredPointsIn,
                             Path output,
-                            String distanceMeasureClass,
+                            DistanceMeasure measure,
                             int numIterations,
-                            int numReducers)
-      throws ClassNotFoundException, InstantiationException, IllegalAccessException,
-        IOException, InterruptedException {
-    job(clustersIn, clusteredPointsIn, output, distanceMeasureClass, numIterations, numReducers);
+                            int numReducers) throws ClassNotFoundException, InstantiationException, IllegalAccessException,
+      IOException, InterruptedException {
+    job(clustersIn, clusteredPointsIn, output, measure, numIterations, numReducers);
   }
 
   private static void job(Path clustersIn,
                           Path clusteredPointsIn,
                           Path output,
-                          String distanceMeasureClass,
+                          DistanceMeasure measure,
                           int numIterations,
-                          int numReducers)
-      throws InstantiationException, IllegalAccessException, IOException, InterruptedException, ClassNotFoundException {
+                          int numReducers) throws InstantiationException, IllegalAccessException, IOException,
+      InterruptedException, ClassNotFoundException {
     Path stateIn = new Path(output, "representativePoints-0");
     writeInitialState(stateIn, clustersIn);
 
@@ -120,14 +123,14 @@ public final class CDbwDriver extends Ab
       log.info("Iteration {}", iteration);
       // point the output to a new directory per iteration
       Path stateOut = new Path(output, "representativePoints-" + (iteration + 1));
-      runIteration(clusteredPointsIn, stateIn, stateOut, distanceMeasureClass, numReducers);
+      runIteration(clusteredPointsIn, stateIn, stateOut, measure, numReducers);
       // now point the input to the old output directory
       stateIn = stateOut;
     }
 
     Configuration conf = new Configuration();
     conf.set(STATE_IN_KEY, stateIn.toString());
-    conf.set(DISTANCE_MEASURE_KEY, distanceMeasureClass);
+    conf.set(DISTANCE_MEASURE_KEY, measure.getClass().getName());
     CDbwEvaluator evaluator = new CDbwEvaluator(conf, clustersIn);
     // now print out the Results
     System.out.println("CDbw = " + evaluator.CDbw());
@@ -169,20 +172,16 @@ public final class CDbwDriver extends Ab
    *          the directory pathname for input state
    * @param stateOut
    *          the directory pathname for output state
-   * @param distanceMeasureClass
-   *          the class name of the DistanceMeasure class
+   * @param measure
+   *          the DistanceMeasure
    * @param numReducers
    *          the number of Reducers desired
    */
-  private static void runIteration(Path input,
-                                   Path stateIn,
-                                   Path stateOut,
-                                   String distanceMeasureClass,
-                                   int numReducers)
+  private static void runIteration(Path input, Path stateIn, Path stateOut, DistanceMeasure measure, int numReducers)
       throws IOException, InterruptedException, ClassNotFoundException {
     Configuration conf = new Configuration();
     conf.set(STATE_IN_KEY, stateIn.toString());
-    conf.set(DISTANCE_MEASURE_KEY, distanceMeasureClass);
+    conf.set(DISTANCE_MEASURE_KEY, measure.getClass().getName());
     Job job = new Job(conf);
     job.setJarByClass(CDbwDriver.class);
     job.setOutputKeyClass(IntWritable.class);

Modified: mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java?rev=986960&r1=986959&r2=986960&view=diff
==============================================================================
--- mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java (original)
+++ mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java Wed Aug 18 21:47:30 2010
@@ -42,6 +42,7 @@ import org.apache.mahout.clustering.kmea
 import org.apache.mahout.clustering.meanshift.MeanShiftCanopyDriver;
 import org.apache.mahout.common.MahoutTestCase;
 import org.apache.mahout.common.distance.CosineDistanceMeasure;
+import org.apache.mahout.common.distance.DistanceMeasure;
 import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
 import org.apache.mahout.math.NamedVector;
 import org.apache.mahout.math.Vector;
@@ -136,48 +137,54 @@ public class TestClusterDumper extends M
   }
 
   public void testCanopy() throws Exception { // now run the Job
+    DistanceMeasure measure = new EuclideanDistanceMeasure();
+
     Path output = getTestTempDirPath("output");
-    CanopyDriver.runJob(getTestTempDirPath("testdata"), output, EuclideanDistanceMeasure.class.getName(), 8, 4, true, false);
+    CanopyDriver.runJob(getTestTempDirPath("testdata"), output, measure, 8, 4, true, false);
     // run ClusterDumper
     ClusterDumper clusterDumper = new ClusterDumper(new Path(output, "clusters-0"), new Path(output, "clusteredPoints"));
     clusterDumper.printClusters(termDictionary);
   }
 
   public void testKmeans() throws Exception {
+    DistanceMeasure measure = new EuclideanDistanceMeasure();
     // now run the Canopy job to prime kMeans canopies
     Path output = getTestTempDirPath("output");
-    CanopyDriver.runJob(getTestTempDirPath("testdata"), output, EuclideanDistanceMeasure.class.getName(), 8, 4, false, false);
+    CanopyDriver.runJob(getTestTempDirPath("testdata"), output, measure, 8, 4, false, false);
     // now run the KMeans job
-    KMeansDriver.runJob(getTestTempDirPath("testdata"), new Path(output, "clusters-0"), output, EuclideanDistanceMeasure.class
-        .getName(), 0.001, 10, 1, true, false);
+    KMeansDriver.runJob(getTestTempDirPath("testdata"), new Path(output, "clusters-0"), output, measure, 0.001, 10, 1, true, false);
     // run ClusterDumper
     ClusterDumper clusterDumper = new ClusterDumper(new Path(output, "clusters-2"), new Path(output, "clusteredPoints"));
     clusterDumper.printClusters(termDictionary);
   }
 
   public void testFuzzyKmeans() throws Exception {
+    DistanceMeasure measure = new EuclideanDistanceMeasure();
     // now run the Canopy job to prime kMeans canopies
     Path output = getTestTempDirPath("output");
-    CanopyDriver.runJob(getTestTempDirPath("testdata"), output, EuclideanDistanceMeasure.class.getName(), 8, 4, false, false);
+    CanopyDriver.runJob(getTestTempDirPath("testdata"), output, measure, 8, 4, false, false);
     // now run the Fuzzy KMeans job
-    FuzzyKMeansDriver.runJob(getTestTempDirPath("testdata"), new Path(output, "clusters-0"), output, EuclideanDistanceMeasure.class
-        .getName(), 0.001, 10, 1, (float) 1.1, true, true, 0, false);
+    FuzzyKMeansDriver.runJob(getTestTempDirPath("testdata"),
+                             new Path(output, "clusters-0"),
+                             output,
+                             measure,
+                             0.001,
+                             10,
+                             1,
+                             (float) 1.1,
+                             true,
+                             true,
+                             0,
+                             false);
     // run ClusterDumper
     ClusterDumper clusterDumper = new ClusterDumper(new Path(output, "clusters-3"), new Path(output, "clusteredPoints"));
     clusterDumper.printClusters(termDictionary);
   }
 
   public void testMeanShift() throws Exception {
+    DistanceMeasure measure = new CosineDistanceMeasure();
     Path output = getTestTempDirPath("output");
-    MeanShiftCanopyDriver.runJob(getTestTempDirPath("testdata"),
-                                 output,
-                                 CosineDistanceMeasure.class.getName(),
-                                 0.5,
-                                 0.01,
-                                 0.05,
-                                 10,
-                                 false,
-                                 true, false);
+    MeanShiftCanopyDriver.runJob(getTestTempDirPath("testdata"), output, measure, 0.5, 0.01, 0.05, 10, false, true, false);
     // run ClusterDumper
     ClusterDumper clusterDumper = new ClusterDumper(new Path(output, "clusters-1"), new Path(output, "clusteredPoints"));
     clusterDumper.printClusters(termDictionary);

Modified: mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java?rev=986960&r1=986959&r2=986960&view=diff
==============================================================================
--- mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java (original)
+++ mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java Wed Aug 18 21:47:30 2010
@@ -41,6 +41,7 @@ import org.apache.mahout.clustering.kmea
 import org.apache.mahout.clustering.kmeans.TestKmeansClustering;
 import org.apache.mahout.clustering.meanshift.MeanShiftCanopyDriver;
 import org.apache.mahout.common.MahoutTestCase;
+import org.apache.mahout.common.distance.DistanceMeasure;
 import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
 import org.apache.mahout.math.DenseVector;
 import org.apache.mahout.math.Vector;
@@ -48,8 +49,8 @@ import org.apache.mahout.math.VectorWrit
 
 public class TestCDbwEvaluator extends MahoutTestCase {
 
-  private static final double[][] reference = { { 1, 1 }, { 2, 1 }, { 1, 2 }, { 2, 2 }, { 3, 3 },
-      { 4, 4 }, { 5, 4 }, { 4, 5 }, { 5, 5 } };
+  private static final double[][] reference = { { 1, 1 }, { 2, 1 }, { 1, 2 }, { 2, 2 }, { 3, 3 }, { 4, 4 }, { 5, 4 }, { 4, 5 },
+      { 5, 5 } };
 
   private Map<Integer, List<VectorWritable>> representativePoints;
 
@@ -91,13 +92,14 @@ public class TestCDbwEvaluator extends M
    * Initialize synthetic data using 4 clusters dC units from origin having 4 representative points dP from each center
    * @param dC a double cluster center offset
    * @param dP a double representative point offset
+   * @param measure TODO
    */
-  private void initData(double dC, double dP) {
+  private void initData(double dC, double dP, DistanceMeasure measure) {
     clusters = new HashMap<Integer, Cluster>();
-    clusters.put(1, new Canopy(new DenseVector(new double[] { -dC, -dC }), 1));
-    clusters.put(3, new Canopy(new DenseVector(new double[] { -dC, dC }), 3));
-    clusters.put(5, new Canopy(new DenseVector(new double[] { dC, dC }), 5));
-    clusters.put(7, new Canopy(new DenseVector(new double[] { dC, -dC }), 7));
+    clusters.put(1, new Canopy(new DenseVector(new double[] { -dC, -dC }), 1, measure));
+    clusters.put(3, new Canopy(new DenseVector(new double[] { -dC, dC }), 3, measure));
+    clusters.put(5, new Canopy(new DenseVector(new double[] { dC, dC }), 5, measure));
+    clusters.put(7, new Canopy(new DenseVector(new double[] { dC, -dC }), 7, measure));
     representativePoints = new HashMap<Integer, List<VectorWritable>>();
     for (Cluster cluster : clusters.values()) {
       List<VectorWritable> points = new ArrayList<VectorWritable>();
@@ -111,8 +113,9 @@ public class TestCDbwEvaluator extends M
   }
 
   public void testCDbw0() {
-    initData(1, 0.25);
-    CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters, new EuclideanDistanceMeasure());
+    DistanceMeasure measure = new EuclideanDistanceMeasure();
+    initData(1, 0.25, measure);
+    CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters, measure);
     assertEquals("inter cluster density", 0.0, evaluator.interClusterDensity());
     assertEquals("separation", 1.5, evaluator.separation());
     assertEquals("intra cluster density", 0.8944271909999157, evaluator.intraClusterDensity());
@@ -120,8 +123,9 @@ public class TestCDbwEvaluator extends M
   }
 
   public void testCDbw1() {
-    initData(1, 0.5);
-    CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters, new EuclideanDistanceMeasure());
+    DistanceMeasure measure = new EuclideanDistanceMeasure();
+    initData(1, 0.5, measure);
+    CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters, measure);
     assertEquals("inter cluster density", 0.0, evaluator.interClusterDensity());
     assertEquals("separation", 1.0, evaluator.separation());
     assertEquals("intra cluster density", 0.44721359549995787, evaluator.intraClusterDensity());
@@ -129,8 +133,9 @@ public class TestCDbwEvaluator extends M
   }
 
   public void testCDbw2() {
-    initData(1, 0.75);
-    CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters, new EuclideanDistanceMeasure());
+    DistanceMeasure measure = new EuclideanDistanceMeasure();
+    initData(1, 0.75, measure);
+    CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters, measure);
     assertEquals("inter cluster density", 1.017921815355728, evaluator.interClusterDensity());
     assertEquals("separation", 0.24777966925931558, evaluator.separation());
     assertEquals("intra cluster density", 0.29814239699997197, evaluator.intraClusterDensity());
@@ -138,62 +143,89 @@ public class TestCDbwEvaluator extends M
   }
 
   public void testCanopy() throws Exception { // now run the Job
-    CanopyDriver.runJob(getTestTempDirPath("testdata"), getTestTempDirPath("output"),
-                        EuclideanDistanceMeasure.class.getName(), 3.1, 2.1, true, false);
+    DistanceMeasure measure = new EuclideanDistanceMeasure();
+    CanopyDriver.runJob(getTestTempDirPath("testdata"), getTestTempDirPath("output"), measure, 3.1, 2.1, true, false);
     int numIterations = 2;
     Path output = getTestTempDirPath("output");
-    CDbwDriver.runJob(new Path(output, "clusters-0"), new Path(output, "clusteredPoints"), output,
-                      EuclideanDistanceMeasure.class.getName(), numIterations, 1);
+    CDbwDriver.runJob(new Path(output, "clusters-0"), new Path(output, "clusteredPoints"), output, measure, numIterations, 1);
     checkRefPoints(numIterations);
   }
 
   public void testKmeans() throws Exception {
+    DistanceMeasure measure = new EuclideanDistanceMeasure();
     // now run the Canopy job to prime kMeans canopies
-    CanopyDriver.runJob(getTestTempDirPath("testdata"), getTestTempDirPath("output"),
-                        EuclideanDistanceMeasure.class.getName(), 3.1, 2.1, false, false);
+    CanopyDriver.runJob(getTestTempDirPath("testdata"), getTestTempDirPath("output"), measure, 3.1, 2.1, false, false);
     // now run the KMeans job
     Path output = getTestTempDirPath("output");
-    KMeansDriver.runJob(getTestTempDirPath("testdata"), new Path(output, "clusters-0"), output,
-                        EuclideanDistanceMeasure.class.getName(), 0.001, 10, 1, true, false);
+    KMeansDriver.runJob(getTestTempDirPath("testdata"), new Path(output, "clusters-0"), output, measure, 0.001, 10, 1, true, false);
     int numIterations = 2;
-    CDbwDriver.runJob(new Path(output, "clusters-2"), new Path(output, "clusteredPoints"), output,
-                      EuclideanDistanceMeasure.class.getName(), numIterations, 1);
+    CDbwDriver.runJob(new Path(output, "clusters-2"), new Path(output, "clusteredPoints"), output, measure, numIterations, 1);
     checkRefPoints(numIterations);
   }
 
   public void testFuzzyKmeans() throws Exception {
+    DistanceMeasure measure = new EuclideanDistanceMeasure();
     // now run the Canopy job to prime kMeans canopies
-    CanopyDriver.runJob(getTestTempDirPath("testdata"), getTestTempDirPath("output"),
-                        EuclideanDistanceMeasure.class.getName(), 3.1, 2.1, false, false);
+    CanopyDriver.runJob(getTestTempDirPath("testdata"), getTestTempDirPath("output"), measure, 3.1, 2.1, false, false);
     // now run the KMeans job
     Path output = getTestTempDirPath("output");
-    FuzzyKMeansDriver.runJob(getTestTempDirPath("testdata"), new Path(output, "clusters-0"), output,
-                             EuclideanDistanceMeasure.class.getName(), 0.001, 10, 1, 2, true, true, 0, false);
+    FuzzyKMeansDriver.runJob(getTestTempDirPath("testdata"),
+                             new Path(output, "clusters-0"),
+                             output,
+                             measure,
+                             0.001,
+                             10,
+                             1,
+                             2,
+                             true,
+                             true,
+                             0,
+                             false);
     int numIterations = 2;
-    CDbwDriver.runJob(new Path(output, "clusters-4"), new Path(output, "clusteredPoints"), output,
-                      EuclideanDistanceMeasure.class.getName(), numIterations, 1);
+    CDbwDriver.runJob(new Path(output, "clusters-4"), new Path(output, "clusteredPoints"), output, measure, numIterations, 1);
     checkRefPoints(numIterations);
   }
 
   public void testMeanShift() throws Exception {
-    MeanShiftCanopyDriver.runJob(getTestTempDirPath("testdata"), getTestTempDirPath("output"),
-                                 EuclideanDistanceMeasure.class.getName(), 2.1, 1.0, 0.001, 10, false, true, false);
+    DistanceMeasure measure = new EuclideanDistanceMeasure();
+    MeanShiftCanopyDriver.runJob(getTestTempDirPath("testdata"),
+                                 getTestTempDirPath("output"),
+                                 measure,
+                                 2.1,
+                                 1.0,
+                                 0.001,
+                                 10,
+                                 false,
+                                 true,
+                                 false);
     int numIterations = 2;
     Path output = getTestTempDirPath("output");
-    CDbwDriver.runJob(new Path(output, "clusters-2"), new Path(output, "clusteredPoints"), output,
-                      EuclideanDistanceMeasure.class.getName(), numIterations, 1);
+    CDbwDriver.runJob(new Path(output, "clusters-2"), new Path(output, "clusteredPoints"), output, measure, numIterations, 1);
     checkRefPoints(numIterations);
   }
 
   public void testDirichlet() throws Exception {
     Vector prototype = new DenseVector(2);
-    DirichletDriver.runJob(getTestTempDirPath("testdata"), getTestTempDirPath("output"),
-                           L1ModelDistribution.class.getName(), prototype.getClass().getName(),
-                           15, 5, 1.0, 1, true, true, 0, false);
+    DirichletDriver.runJob(getTestTempDirPath("testdata"),
+                           getTestTempDirPath("output"),
+                           L1ModelDistribution.class.getName(),
+                           prototype.getClass().getName(),
+                           15,
+                           5,
+                           1.0,
+                           1,
+                           true,
+                           true,
+                           0,
+                           false);
     int numIterations = 2;
     Path output = getTestTempDirPath("output");
-    CDbwDriver.runJob(new Path(output, "clusters-5"), new Path(output, "clusteredPoints"), output,
-                      EuclideanDistanceMeasure.class.getName(), numIterations, 1);
+    CDbwDriver.runJob(new Path(output, "clusters-5"),
+                      new Path(output, "clusteredPoints"),
+                      output,
+                      new EuclideanDistanceMeasure(),
+                      numIterations,
+                      1);
     checkRefPoints(numIterations);
   }