You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by je...@apache.org on 2010/04/22 22:47:40 UTC

svn commit: r937051 - in /lucene/mahout/trunk: core/src/main/java/org/apache/mahout/clustering/ core/src/main/java/org/apache/mahout/clustering/canopy/ core/src/main/java/org/apache/mahout/clustering/dirichlet/ core/src/main/java/org/apache/mahout/clus...

Author: jeastman
Date: Thu Apr 22 20:47:39 2010
New Revision: 937051

URL: http://svn.apache.org/viewvc?rev=937051&view=rev
Log:
MAHOUT-236: Improvements to consistency of all clustering algorithms:
- Cleaned up cluster asFormatString(x) so all clusters have similar naming
- Added ClusterDumper tests for fuzzyK and MeanShift
- Cleaned up cluster job intermediate file nomenclature:
- All initial clusters go into clusters-0 directory
- All cluster outputs go into clusters-i directory, where i is iteration number (1..n)
- All clustered points go into clusteredPoints directory
- Moved intermediate file nomenclature to standard constants in ClusterBase

all tests run but I think FuzzyKMeans is broken or my pilot error since it produces anomalous outputs with ClusterDumper and CDbw tests. I will debug that next.

Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterBase.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusteringJob.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletCluster.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/SoftCluster.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansConfigKeys.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyJob.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestClusterInterface.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
    lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
    lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterBase.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterBase.java?rev=937051&r1=937050&r2=937051&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterBase.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterBase.java Thu Apr 22 20:47:39 2010
@@ -40,6 +40,16 @@ import com.google.gson.reflect.TypeToken
  *
  */
 public abstract class ClusterBase implements Writable, Cluster {
+  
+  // default directory for all clustered points
+  public static final String CLUSTERED_POINTS_DIR = "/clusteredPoints";
+
+  // default directory for initial clusters to prime iterative clustering algorithms
+  public static final String INITIAL_CLUSTERS_DIR = "/clusters-0";
+
+  // default directory for output of clusters per iteration
+  public static final String CLUSTERS_DIR = "/clusters-";
+
 
   // this cluster's clusterId
   private int id;

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java?rev=937051&r1=937050&r2=937051&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java Thu Apr 22 20:47:39 2010
@@ -122,12 +122,12 @@ public class Canopy extends ClusterBase 
   
   @Override
   public String toString() {
-    return getIdentifier() + " - " + getCenter().asFormatString();
+    return getIdentifier() + ": " + getCenter().asFormatString();
   }
   
   @Override
   public String getIdentifier() {
-    return "C" + getId();
+    return "C-" + getId();
   }
   
   /**

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusteringJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusteringJob.java?rev=937051&r1=937050&r2=937051&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusteringJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusteringJob.java Thu Apr 22 20:47:39 2010
@@ -27,6 +27,7 @@ import org.apache.commons.cli2.builder.A
 import org.apache.commons.cli2.builder.DefaultOptionBuilder;
 import org.apache.commons.cli2.builder.GroupBuilder;
 import org.apache.commons.cli2.commandline.Parser;
+import org.apache.mahout.clustering.ClusterBase;
 import org.apache.mahout.common.CommandLineUtil;
 import org.apache.mahout.common.distance.SquaredEuclideanDistanceMeasure;
 import org.slf4j.Logger;
@@ -42,12 +43,7 @@ import org.slf4j.LoggerFactory;
 public final class CanopyClusteringJob {
   
   private static final Logger log = LoggerFactory.getLogger(CanopyClusteringJob.class);
-  
-  /** The default name of the canopies output sub-directory. */
-  public static final String DEFAULT_CANOPIES_OUTPUT_DIRECTORY = "/canopies";
-  /** The default name of the directory used to output clusters. */
-  public static final String DEFAULT_CLUSTER_OUTPUT_DIRECTORY = ClusterDriver.DEFAULT_CLUSTER_OUTPUT_DIRECTORY;
-  
+    
   private CanopyClusteringJob() { }
   
   /**
@@ -135,9 +131,10 @@ public final class CanopyClusteringJob {
    */
   public static void runJob(String input, String output,
                             String measureClassName, double t1, double t2) throws IOException {
-    CanopyDriver.runJob(input, output + DEFAULT_CANOPIES_OUTPUT_DIRECTORY,
+    String canopyOutputDir = output + ClusterBase.CLUSTERS_DIR + "0";
+    CanopyDriver.runJob(input, canopyOutputDir,
       measureClassName, t1, t2);
-    ClusterDriver.runJob(input, output + DEFAULT_CANOPIES_OUTPUT_DIRECTORY, output,
+    ClusterDriver.runJob(input, canopyOutputDir, output,
       measureClassName, t1, t2);
   }
   

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java?rev=937051&r1=937050&r2=937051&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java Thu Apr 22 20:47:39 2010
@@ -46,7 +46,7 @@ import org.slf4j.LoggerFactory;
 
 public final class ClusterDriver {
   
-  public static final String DEFAULT_CLUSTER_OUTPUT_DIRECTORY = "/clusters";
+  public static final String DEFAULT_CLUSTERED_POINTS_DIRECTORY = "/clusteredPoints";
   
   private static final Logger log = LoggerFactory.getLogger(ClusterDriver.class);
   
@@ -159,7 +159,7 @@ public final class ClusterDriver {
     conf.setOutputFormat(SequenceFileOutputFormat.class);
     
     FileInputFormat.setInputPaths(conf, new Path(points));
-    Path outPath = new Path(output + DEFAULT_CLUSTER_OUTPUT_DIRECTORY);
+    Path outPath = new Path(output + DEFAULT_CLUSTERED_POINTS_DIRECTORY);
     FileOutputFormat.setOutputPath(conf, outPath);
     
     conf.setMapperClass(ClusterMapper.class);

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletCluster.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletCluster.java?rev=937051&r1=937050&r2=937051&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletCluster.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletCluster.java Thu Apr 22 20:47:39 2010
@@ -31,54 +31,55 @@ import com.google.gson.GsonBuilder;
 import com.google.gson.reflect.TypeToken;
 
 public class DirichletCluster<O> implements Writable, Cluster {
-  
+
   @Override
   public void readFields(DataInput in) throws IOException {
     this.totalCount = in.readDouble();
     this.model = readModel(in);
   }
-  
+
   @Override
   public void write(DataOutput out) throws IOException {
     out.writeDouble(totalCount);
     writeModel(out, model);
   }
-  
+
   private Model<O> model; // the model for this iteration
-  
+
   private double totalCount; // total count of observations for the model
-  
+
   public DirichletCluster(Model<O> model, double totalCount) {
     super();
     this.model = model;
     this.totalCount = totalCount;
   }
-  
+
   public DirichletCluster(Model<O> model) {
     super();
     this.model = model;
     this.totalCount = 0.0;
   }
-  
+
   public DirichletCluster() {
     super();
   }
-  
+
   public Model<O> getModel() {
     return model;
   }
-  
+
   public void setModel(Model<O> model) {
     this.model = model;
     this.totalCount += model.count();
   }
-  
+
   public double getTotalCount() {
     return totalCount;
   }
-  
-  private static final Type clusterType = new TypeToken<DirichletCluster<Vector>>() { }.getType();
-  
+
+  private static final Type clusterType = new TypeToken<DirichletCluster<Vector>>() {
+  }.getType();
+
   /** Reads a typed Model instance from the input stream */
   public static <O> Model<O> readModel(DataInput in) throws IOException {
     String modelClassName = in.readUTF();
@@ -95,18 +96,18 @@ public class DirichletCluster<O> impleme
     model.readFields(in);
     return model;
   }
-  
+
   /** Writes a typed Model instance to the output stream */
   public static void writeModel(DataOutput out, Model<?> model) throws IOException {
     out.writeUTF(model.getClass().getName());
     model.write(out);
   }
-  
+
   @Override
   public String asFormatString(String[] bindings) {
-    return model.toString();
+    return "C-" + model.getId() + ": " + model.toString();
   }
-  
+
   @Override
   public String asJsonString() {
     GsonBuilder builder = new GsonBuilder();
@@ -129,5 +130,5 @@ public class DirichletCluster<O> impleme
   public int getNumPoints() {
     return model.getNumPoints();
   }
-  
+
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java?rev=937051&r1=937050&r2=937051&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java Thu Apr 22 20:47:39 2010
@@ -41,10 +41,9 @@ import org.apache.hadoop.mapred.JobClien
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.SequenceFileInputFormat;
 import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.apache.mahout.clustering.ClusterBase;
 import org.apache.mahout.clustering.dirichlet.models.VectorModelDistribution;
-import org.apache.mahout.clustering.fuzzykmeans.FuzzyKMeansDriver;
 import org.apache.mahout.clustering.kmeans.KMeansDriver;
-import org.apache.mahout.clustering.meanshift.MeanShiftCanopyClusterMapper;
 import org.apache.mahout.common.CommandLineUtil;
 import org.apache.mahout.common.commandline.DefaultOptionCreator;
 import org.apache.mahout.math.Vector;
@@ -54,6 +53,7 @@ import org.slf4j.LoggerFactory;
 
 public class DirichletDriver {
   
+
   public static final String STATE_IN_KEY = "org.apache.mahout.clustering.dirichlet.stateIn";
   
   public static final String MODEL_FACTORY_KEY = "org.apache.mahout.clustering.dirichlet.modelFactory";
@@ -215,20 +215,20 @@ public class DirichletDriver {
                                             NoSuchMethodException,
                                             InvocationTargetException {
     
-    String stateIn = output + "/state-0";
-    writeInitialState(output, stateIn, modelFactory, modelPrototype, prototypeSize, numClusters, alpha_0);
+    String clustersIn = output + ClusterBase.INITIAL_CLUSTERS_DIR;
+    writeInitialState(output, clustersIn, modelFactory, modelPrototype, prototypeSize, numClusters, alpha_0);
     
-    for (int iteration = 0; iteration < maxIterations; iteration++) {
+    for (int iteration = 1; iteration <= maxIterations; iteration++) {
       log.info("Iteration {}", iteration);
       // point the output to a new directory per iteration
-      String stateOut = output + "/state-" + (iteration + 1);
-      runIteration(input, stateIn, stateOut, modelFactory, modelPrototype, prototypeSize, numClusters,
+      String clustersOut = output + ClusterBase.CLUSTERS_DIR + iteration;
+      runIteration(input, clustersIn, clustersOut, modelFactory, modelPrototype, prototypeSize, numClusters,
         alpha_0, numReducers);
       // now point the input to the old output directory
-      stateIn = stateOut;
+      clustersIn = clustersOut;
     }
     // now cluster the most likely points
-    runClustering(input, stateIn, output + "/clusters");
+    runClustering(input, clustersIn, output + ClusterBase.CLUSTERED_POINTS_DIR);
   }
   
   private static void writeInitialState(String output,

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterMapper.java?rev=937051&r1=937050&r2=937051&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterMapper.java Thu Apr 22 20:47:39 2010
@@ -42,7 +42,7 @@ public class FuzzyKMeansClusterMapper ex
   @Override
   public void map(WritableComparable<?> key, VectorWritable point, OutputCollector<IntWritable, VectorWritable> output,
       Reporter reporter) throws IOException {
-    clusterer.outputPointWithClusterProbabilities(key.toString(), (NamedVector) point.get(), clusters, output);
+    clusterer.outputPointWithClusterProbabilities(key.toString(), point.get(), clusters, output);
   }
 
   /**

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java?rev=937051&r1=937050&r2=937051&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java Thu Apr 22 20:47:39 2010
@@ -45,6 +45,7 @@ import org.apache.hadoop.mapred.JobClien
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.SequenceFileInputFormat;
 import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.apache.mahout.clustering.ClusterBase;
 import org.apache.mahout.clustering.kmeans.RandomSeedGenerator;
 import org.apache.mahout.common.CommandLineUtil;
 import org.apache.mahout.common.HadoopUtil;
@@ -225,26 +226,26 @@ public final class FuzzyKMeansDriver {
                             float m) {
     
     boolean converged = false;
-    int iteration = 0;
+    int iteration = 1;
     
     // iterate until the clusters converge
-    while (!converged && (iteration < maxIterations)) {
+    while (!converged && (iteration <= maxIterations)) {
       log.info("Iteration {}", iteration);
       
       // point the output to a new directory per iteration
-      String clustersOut = output + File.separator + "clusters-" + iteration;
+      String clustersOut = output + ClusterBase.CLUSTERS_DIR + iteration;
       converged = runIteration(input, clustersIn, clustersOut, measureClass,
         convergenceDelta, numMapTasks, numReduceTasks, iteration, m);
       
       // now point the input to the old output directory
-      clustersIn = output + File.separator + "clusters-" + iteration;
+      clustersIn = clustersOut;
       iteration++;
     }
     
     // now actually cluster the points
     log.info("Clustering ");
     
-    runClustering(input, clustersIn, output + File.separator + "points", measureClass,
+    runClustering(input, clustersIn, output + ClusterBase.CLUSTERED_POINTS_DIR, measureClass,
       convergenceDelta, numMapTasks, m);
   }
   

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/SoftCluster.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/SoftCluster.java?rev=937051&r1=937050&r2=937051&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/SoftCluster.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/SoftCluster.java Thu Apr 22 20:47:39 2010
@@ -157,15 +157,15 @@ public class SoftCluster extends Cluster
   
   @Override
   public String toString() {
-    return getIdentifier() + " - " + getCenter().asFormatString();
+    return getIdentifier() + ": " + getCenter().asFormatString();
   }
   
   @Override
   public String getIdentifier() {
     if (converged) {
-      return "V" + this.getId();
+      return "V-" + this.getId();
     } else {
-      return "C" + this.getId();
+      return "C-" + this.getId();
     }
   }
   

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java?rev=937051&r1=937050&r2=937051&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java Thu Apr 22 20:47:39 2010
@@ -168,12 +168,12 @@ public class Cluster extends ClusterBase
   
   @Override
   public String toString() {
-    return getIdentifier() + " - " + getCenter().asFormatString();
+    return getIdentifier() + ": " + getCenter().asFormatString();
   }
   
   @Override
   public String getIdentifier() {
-    return (converged ? "V" : "C") + getId();
+    return (converged ? "V-" : "C-") + getId();
   }
   
   /**

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java?rev=937051&r1=937050&r2=937051&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java Thu Apr 22 20:47:39 2010
@@ -43,7 +43,7 @@ public class KMeansClusterMapper extends
                   VectorWritable point,
                   OutputCollector<IntWritable,VectorWritable> output,
                   Reporter reporter) throws IOException {
-    clusterer.outputPointWithClusterInfo((NamedVector) point.get(), clusters, output);
+    clusterer.outputPointWithClusterInfo(point.get(), clusters, output);
   }
   
   @Override

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java?rev=937051&r1=937050&r2=937051&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java Thu Apr 22 20:47:39 2010
@@ -81,21 +81,21 @@ public class KMeansClusterer {
     output.collect(new Text(nearestCluster.getIdentifier()), new KMeansInfo(1, point));
   }
   
-  public void outputPointWithClusterInfo(NamedVector point,
+  public void outputPointWithClusterInfo(Vector vector,
                                          List<Cluster> clusters,
                                          OutputCollector<IntWritable,VectorWritable> output) throws IOException {
     Cluster nearestCluster = null;
     double nearestDistance = Double.MAX_VALUE;
     for (Cluster cluster : clusters) {
       Vector clusterCenter = cluster.getCenter();
-      double distance = measure.distance(clusterCenter.getLengthSquared(), clusterCenter, point);
+      double distance = measure.distance(clusterCenter.getLengthSquared(), clusterCenter, vector);
       if ((distance < nearestDistance) || (nearestCluster == null)) {
         nearestCluster = cluster;
         nearestDistance = distance;
       }
     }
     
-    output.collect(new IntWritable(nearestCluster.getId()), new VectorWritable(point));
+    output.collect(new IntWritable(nearestCluster.getId()), new VectorWritable(vector));
   }
   
   /**

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansConfigKeys.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansConfigKeys.java?rev=937051&r1=937050&r2=937051&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansConfigKeys.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansConfigKeys.java Thu Apr 22 20:47:39 2010
@@ -25,9 +25,7 @@ public interface KMeansConfigKeys {
   String DISTANCE_MEASURE_KEY = "org.apache.mahout.clustering.kmeans.measure";
   /** Configuration key for convergence threshold. */
   String CLUSTER_CONVERGENCE_KEY = "org.apache.mahout.clustering.kmeans.convergence";
-  /** Configuration key for ?? */
+  /** Configuration key for iteration cluster path */
   String CLUSTER_PATH_KEY = "org.apache.mahout.clustering.kmeans.path";
-  /** The number of iterations that have taken place */
-  String ITERATION_NUMBER = "org.apache.mahout.clustering.kmeans.iteration";
   
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java?rev=937051&r1=937050&r2=937051&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java Thu Apr 22 20:47:39 2010
@@ -39,6 +39,7 @@ import org.apache.hadoop.mapred.JobClien
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.SequenceFileInputFormat;
 import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.apache.mahout.clustering.ClusterBase;
 import org.apache.mahout.common.CommandLineUtil;
 import org.apache.mahout.common.HadoopUtil;
 import org.apache.mahout.common.distance.SquaredEuclideanDistanceMeasure;
@@ -47,10 +48,7 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 public final class KMeansDriver {
-  
-  /** The name of the directory used to output final results. */
-  public static final String DEFAULT_OUTPUT_DIRECTORY = "/points";
-  
+    
   private static final Logger log = LoggerFactory.getLogger(KMeansDriver.class);
   
   private KMeansDriver() {}
@@ -206,19 +204,19 @@ public final class KMeansDriver {
         new Object[] {convergenceDelta, maxIterations, numReduceTasks, VectorWritable.class.getName()});
     }
     boolean converged = false;
-    int iteration = 0;
-    while (!converged && (iteration < maxIterations)) {
+    int iteration = 1;
+    while (!converged && (iteration <= maxIterations)) {
       log.info("Iteration {}", iteration);
       // point the output to a new directory per iteration
-      String clustersOut = output + "/clusters-" + iteration;
+      String clustersOut = output + ClusterBase.CLUSTERS_DIR + iteration;
       converged = runIteration(input, clustersIn, clustersOut, measureClass, delta, numReduceTasks, iteration);
       // now point the input to the old output directory
-      clustersIn = output + "/clusters-" + iteration;
+      clustersIn = clustersOut;
       iteration++;
     }
     // now actually cluster the points
     log.info("Clustering ");
-    runClustering(input, clustersIn, output + DEFAULT_OUTPUT_DIRECTORY, measureClass, delta);
+    runClustering(input, clustersIn, output + ClusterBase.CLUSTERED_POINTS_DIR, measureClass, delta);
   }
   
   /**
@@ -265,7 +263,6 @@ public final class KMeansDriver {
     conf.set(KMeansConfigKeys.CLUSTER_PATH_KEY, clustersIn);
     conf.set(KMeansConfigKeys.DISTANCE_MEASURE_KEY, measureClass);
     conf.set(KMeansConfigKeys.CLUSTER_CONVERGENCE_KEY, convergenceDelta);
-    conf.set(KMeansConfigKeys.ITERATION_NUMBER, String.valueOf(iteration));
     
     try {
       JobClient.runJob(conf);

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java?rev=937051&r1=937050&r2=937051&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java Thu Apr 22 20:47:39 2010
@@ -148,7 +148,7 @@ public class MeanShiftCanopy extends Clu
   
   @Override
   public String getIdentifier() {
-    return (converged ? "V" : "C") + getId();
+    return (converged ? "V-" : "C-") + getId();
   }
   
   void init(MeanShiftCanopy canopy) {

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyJob.java?rev=937051&r1=937050&r2=937051&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyJob.java Thu Apr 22 20:47:39 2010
@@ -31,6 +31,7 @@ import org.apache.hadoop.conf.Configurat
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.JobConf;
+import org.apache.mahout.clustering.ClusterBase;
 import org.apache.mahout.common.CommandLineUtil;
 import org.apache.mahout.common.commandline.DefaultOptionCreator;
 import org.slf4j.Logger;
@@ -148,7 +149,7 @@ public class MeanShiftCanopyJob {
     }
     fs.mkdirs(outPath);
 
-    String clustersIn = output + "/initial-canopies";
+    String clustersIn = output + ClusterBase.INITIAL_CLUSTERS_DIR;
     if (inputIsCanopies) {
       clustersIn = input;
     } else {
@@ -157,21 +158,22 @@ public class MeanShiftCanopyJob {
 
     // iterate until the clusters converge
     boolean converged = false;
-    int iteration = 0;
-    while (!converged && (iteration < maxIterations)) {
+    int iteration = 1;
+    while (!converged && (iteration <= maxIterations)) {
       log.info("Iteration {}", iteration);
       // point the output to a new directory per iteration
-      String clustersOut = output + "/canopies-" + iteration;
+      String clustersOut = output + ClusterBase.CLUSTERS_DIR + iteration;
       String controlOut = output + CONTROL_CONVERGED;
       MeanShiftCanopyDriver.runJob(clustersIn, clustersOut, controlOut, measureClassName, t1, t2, convergenceDelta);
       converged = FileSystem.get(conf).exists(new Path(controlOut));
       // now point the input to the old output directory
-      clustersIn = output + "/canopies-" + iteration;
+      clustersIn = clustersOut;
       iteration++;
     }
 
     // now cluster the points
-    MeanShiftCanopyDriver.runClustering((inputIsCanopies ? input : output + "/initial-canopies"), clustersIn, output + "/clusters");
+    MeanShiftCanopyDriver.runClustering((inputIsCanopies ? input : output + ClusterBase.INITIAL_CLUSTERS_DIR), clustersIn, output
+        + ClusterBase.CLUSTERED_POINTS_DIR);
   }
 
 }

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestClusterInterface.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestClusterInterface.java?rev=937051&r1=937050&r2=937051&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestClusterInterface.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestClusterInterface.java Thu Apr 22 20:47:39 2010
@@ -108,7 +108,7 @@ public class TestClusterInterface extend
     NormalModel model = new NormalModel(5, m, 0.75);
     Cluster cluster = new DirichletCluster<VectorWritable>(model, 35.0);
     String format = cluster.asFormatString(null);
-    assertEquals("format", "nm{n=0 m=[1.100, 2.200, 3.300] sd=0.75}", format);
+    assertEquals("format", "C-5: nm{n=0 m=[1.100, 2.200, 3.300] sd=0.75}", format);
   }
 
   public void testDirichletNormalModelClusterAsJsonString() {
@@ -131,7 +131,7 @@ public class TestClusterInterface extend
     AsymmetricSampledNormalModel model = new AsymmetricSampledNormalModel(5, m, m);
     Cluster cluster = new DirichletCluster<VectorWritable>(model, 35.0);
     String format = cluster.asFormatString(null);
-    assertEquals("format", "asnm{n=0 m=[1.100, 2.200, 3.300] sd=[1.100, 2.200, 3.300]}", format);
+    assertEquals("format", "C-5: asnm{n=0 m=[1.100, 2.200, 3.300] sd=[1.100, 2.200, 3.300]}", format);
   }
 
   public void testDirichletAsymmetricSampledNormalModelClusterAsJsonString() {
@@ -155,7 +155,7 @@ public class TestClusterInterface extend
     L1Model model = new L1Model(5, m);
     Cluster cluster = new DirichletCluster<VectorWritable>(model, 35.0);
     String format = cluster.asFormatString(null);
-    assertEquals("format", "l1m{n=0 c=[1.100, 2.200, 3.300]}", format);
+    assertEquals("format", "C-5: l1m{n=0 c=[1.100, 2.200, 3.300]}", format);
   }
 
   public void testDirichletL1ModelClusterAsJsonString() {
@@ -179,7 +179,7 @@ public class TestClusterInterface extend
     Cluster cluster = new Canopy(m, 123);
     String formatString = cluster.asFormatString(null);
     System.out.println(formatString);
-    assertEquals("format", "C123: [1.100, 2.200, 3.300]", formatString);
+    assertEquals("format", "C-123: [1.100, 2.200, 3.300]", formatString);
   }
 
   public void testCanopyAsFormatStringSparse() {
@@ -189,7 +189,7 @@ public class TestClusterInterface extend
     Cluster cluster = new Canopy(m, 123);
     String formatString = cluster.asFormatString(null);
     System.out.println(formatString);
-    assertEquals("format", "C123: [0:1.100, 2:3.300]", formatString);
+    assertEquals("format", "C-123: [0:1.100, 2:3.300]", formatString);
   }
 
   public void testCanopyAsFormatStringWithBindings() {
@@ -199,7 +199,7 @@ public class TestClusterInterface extend
     String[] bindings = { "fee", null, null };
     String formatString = cluster.asFormatString(bindings);
     System.out.println(formatString);
-    assertEquals("format", "C123: [fee:1.100, 1:2.200, 2:3.300]", formatString);
+    assertEquals("format", "C-123: [fee:1.100, 1:2.200, 2:3.300]", formatString);
   }
 
   public void testCanopyAsFormatStringSparseWithBindings() {
@@ -209,7 +209,7 @@ public class TestClusterInterface extend
     Cluster cluster = new Canopy(m, 123);
     String formatString = cluster.asFormatString(null);
     System.out.println(formatString);
-    assertEquals("format", "C123: [0:1.100, 2:3.300]", formatString);
+    assertEquals("format", "C-123: [0:1.100, 2:3.300]", formatString);
   }
 
   public void testClusterAsFormatString() {
@@ -218,7 +218,7 @@ public class TestClusterInterface extend
     Cluster cluster = new org.apache.mahout.clustering.kmeans.Cluster(m, 123);
     String formatString = cluster.asFormatString(null);
     System.out.println(formatString);
-    assertEquals("format", "C123: [1.100, 2.200, 3.300]", formatString);
+    assertEquals("format", "C-123: [1.100, 2.200, 3.300]", formatString);
   }
 
   public void testClusterAsFormatStringSparse() {
@@ -228,7 +228,7 @@ public class TestClusterInterface extend
     Cluster cluster = new org.apache.mahout.clustering.kmeans.Cluster(m, 123);
     String formatString = cluster.asFormatString(null);
     System.out.println(formatString);
-    assertEquals("format", "C123: [0:1.100, 2:3.300]", formatString);
+    assertEquals("format", "C-123: [0:1.100, 2:3.300]", formatString);
   }
 
   public void testClusterAsFormatStringWithBindings() {
@@ -238,7 +238,7 @@ public class TestClusterInterface extend
     String[] bindings = { "fee", null, "foo" };
     String formatString = cluster.asFormatString(bindings);
     System.out.println(formatString);
-    assertEquals("format", "C123: [fee:1.100, 1:2.200, foo:3.300]", formatString);
+    assertEquals("format", "C-123: [fee:1.100, 1:2.200, foo:3.300]", formatString);
   }
 
   public void testClusterAsFormatStringSparseWithBindings() {
@@ -248,7 +248,7 @@ public class TestClusterInterface extend
     Cluster cluster = new org.apache.mahout.clustering.kmeans.Cluster(m, 123);
     String formatString = cluster.asFormatString(null);
     System.out.println(formatString);
-    assertEquals("format", "C123: [0:1.100, 2:3.300]", formatString);
+    assertEquals("format", "C-123: [0:1.100, 2:3.300]", formatString);
   }
 
   public void testMSCanopyAsFormatString() {
@@ -257,7 +257,7 @@ public class TestClusterInterface extend
     Cluster cluster = new MeanShiftCanopy(m, 123);
     String formatString = cluster.asFormatString(null);
     System.out.println(formatString);
-    assertEquals("format", "C123: [1.100, 2.200, 3.300]", formatString);
+    assertEquals("format", "C-123: [1.100, 2.200, 3.300]", formatString);
   }
 
   public void testMSCanopyAsFormatStringSparse() {
@@ -267,7 +267,7 @@ public class TestClusterInterface extend
     Cluster cluster = new MeanShiftCanopy(m, 123);
     String formatString = cluster.asFormatString(null);
     System.out.println(formatString);
-    assertEquals("format", "C123: [0:1.100, 2:3.300]", formatString);
+    assertEquals("format", "C-123: [0:1.100, 2:3.300]", formatString);
   }
 
   public void testMSCanopyAsFormatStringWithBindings() {
@@ -277,7 +277,7 @@ public class TestClusterInterface extend
     String[] bindings = { "fee", null, "foo" };
     String formatString = cluster.asFormatString(bindings);
     System.out.println(formatString);
-    assertEquals("format", "C123: [fee:1.100, 1:2.200, foo:3.300]", formatString);
+    assertEquals("format", "C-123: [fee:1.100, 1:2.200, foo:3.300]", formatString);
   }
 
   public void testMSCanopyAsFormatStringSparseWithBindings() {
@@ -288,7 +288,7 @@ public class TestClusterInterface extend
     String[] bindings = { "fee", null, "foo" };
     String formatString = cluster.asFormatString(bindings);
     System.out.println(formatString);
-    assertEquals("format", "C123: [fee:1.100, foo:3.300]", formatString);
+    assertEquals("format", "C-123: [fee:1.100, foo:3.300]", formatString);
   }
 
 }

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java?rev=937051&r1=937050&r2=937051&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java Thu Apr 22 20:47:39 2010
@@ -352,12 +352,12 @@ public class TestCanopyCreation extends 
     Text key = new Text();
     Canopy canopy = new Canopy();
     assertTrue("more to come", reader.next(key, canopy));
-    assertEquals("1st key", "C0", key.toString());
+    assertEquals("1st key", "C-0", key.toString());
     // Canopy canopy = new Canopy(value); //Canopy.decodeCanopy(value.toString());
     assertEquals("1st x value", 1.5, canopy.getCenter().get(0));
     assertEquals("1st y value", 1.5, canopy.getCenter().get(1));
     assertTrue("more to come", reader.next(key, canopy));
-    assertEquals("2nd key", "C1", key.toString());
+    assertEquals("2nd key", "C-1", key.toString());
     // canopy = Canopy.decodeCanopy(canopy.toString());
     assertEquals("1st x value", 4.333333333333334, canopy.getCenter().get(0));
     assertEquals("1st y value", 4.333333333333334, canopy.getCenter().get(1));
@@ -388,11 +388,11 @@ public class TestCanopyCreation extends 
     Text key = new Text();
     Canopy value = new Canopy();
     assertTrue("more to come", reader.next(key, value));
-    assertEquals("1st key", "C0", key.toString());
+    assertEquals("1st key", "C-0", key.toString());
     assertEquals("1st x value", 1.8, value.getCenter().get(0));
     assertEquals("1st y value", 1.8, value.getCenter().get(1));
     assertTrue("more to come", reader.next(key, value));
-    assertEquals("2nd key", "C1", key.toString());
+    assertEquals("2nd key", "C-1", key.toString());
     assertEquals("1st x value", 4.433333333333334, value.getCenter().get(0));
     assertEquals("1st y value", 4.433333333333334, value.getCenter().get(1));
     assertFalse("more to come", reader.next(key, value));
@@ -493,7 +493,7 @@ public class TestCanopyCreation extends 
     // now run the Job
     CanopyClusteringJob.runJob("testdata", "output", ManhattanDistanceMeasure.class.getName(), 3.1, 2.1);
     // TODO: change
-    Path path = new Path("output/clusters/part-00000");
+    Path path = new Path("output/clusteredPoints/part-00000");
     SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
     int count = 0;
     /*
@@ -525,7 +525,7 @@ public class TestCanopyCreation extends 
     ClusteringTestUtils.writePointsToFile(points, "testdata/file2", fs, conf);
     // now run the Job
     CanopyClusteringJob.runJob("testdata", "output", EuclideanDistanceMeasure.class.getName(), 3.1, 2.1);
-    Path path = new Path("output/clusters/part-00000");
+    Path path = new Path("output/clusteredPoints/part-00000");
     SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
     int count = 0;
     /*
@@ -566,12 +566,12 @@ public class TestCanopyCreation extends 
     Text key = new Text();
     Canopy value = new Canopy();
     assertTrue("more to come", reader.next(key, value));
-    assertEquals("1st key", "C0", key.toString());
+    assertEquals("1st key", "C-0", key.toString());
     
     assertEquals("1st x value", 1.5, value.getCenter().get(0));
     assertEquals("1st y value", 1.5, value.getCenter().get(1));
     assertTrue("more to come", reader.next(key, value));
-    assertEquals("2nd key", "C1", key.toString());
+    assertEquals("2nd key", "C-1", key.toString());
     
     assertEquals("1st x value", 4.333333333333334, value.getCenter().get(0));
     assertEquals("1st y value", 4.333333333333334, value.getCenter().get(1));

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java?rev=937051&r1=937050&r2=937051&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java Thu Apr 22 20:47:39 2010
@@ -205,11 +205,11 @@ public class TestFuzzyKmeansClustering e
         EuclideanDistanceMeasure.class.getName(), 0.001, 2, 1, k + 1, 2);
       
       // now compare the expected clusters with actual
-      File outDir = new File("output/points");
+      File outDir = new File("output/clusteredPoints");
       assertTrue("output dir exists?", outDir.exists());
       outDir.list();
       // assertEquals("output dir files?", 4, outFiles.length);
-      SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path("output/points/part-00000"), conf);
+      SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path("output/clusteredPoints/part-00000"), conf);
       IntWritable key = new IntWritable();
       VectorWritable out = new VectorWritable();
       while (reader.next(key, out)) {

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java?rev=937051&r1=937050&r2=937051&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java Thu Apr 22 20:47:39 2010
@@ -373,10 +373,10 @@ public class TestKmeansClustering extend
       KMeansDriver.runJob("testdata/points", "testdata/clusters", "output", EuclideanDistanceMeasure.class
           .getName(), 0.001, 10, k + 1);
       // now compare the expected clusters with actual
-      File outDir = new File("output/points");
+      File outDir = new File("output/clusteredPoints");
       assertTrue("output dir exists?", outDir.exists());
       // assertEquals("output dir files?", 4, outFiles.length);
-      SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path("output/points/part-00000"), conf);
+      SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path("output/clusteredPoints/part-00000"), conf);
       int[] expect = expectedNumPoints[k];
       DummyOutputCollector<IntWritable,VectorWritable> collector = new DummyOutputCollector<IntWritable,VectorWritable>();
       // The key is the clusterId
@@ -424,12 +424,12 @@ public class TestKmeansClustering extend
         .getName(), 0.001, 10, 1);
     
     // now compare the expected clusters with actual
-    File outDir = new File("output/points");
+    File outDir = new File("output/clusteredPoints");
     assertTrue("output dir exists?", outDir.exists());
     String[] outFiles = outDir.list();
     assertEquals("output dir files?", 4, outFiles.length);
     DummyOutputCollector<IntWritable,VectorWritable> collector = new DummyOutputCollector<IntWritable,VectorWritable>();
-    SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path("output/points/part-00000"), conf);
+    SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path("output/clusteredPoints/part-00000"), conf);
     
     // The key is the clusterId
     IntWritable clusterId = new IntWritable(0);

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java?rev=937051&r1=937050&r2=937051&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java Thu Apr 22 20:47:39 2010
@@ -204,7 +204,7 @@ public class TestMeanShift extends Mahou
     for (Map.Entry<String, MeanShiftCanopy> stringMeanShiftCanopyEntry : refCanopyMap.entrySet()) {
       MeanShiftCanopy ref = stringMeanShiftCanopyEntry.getValue();
 
-      MeanShiftCanopy canopy = canopyMap.get((ref.isConverged() ? "V" : "C") + ref.getCanopyId());
+      MeanShiftCanopy canopy = canopyMap.get((ref.isConverged() ? "V-" : "C-") + ref.getCanopyId());
       assertEquals("ids", ref.getCanopyId(), canopy.getCanopyId());
       assertEquals("centers(" + ref.getIdentifier() + ')', ref.getCenter().asFormatString(), canopy.getCenter().asFormatString());
       assertEquals("bound points", ref.getBoundPoints().size(), canopy.getBoundPoints().size());
@@ -272,7 +272,7 @@ public class TestMeanShift extends Mahou
     for (Map.Entry<String, MeanShiftCanopy> mapEntry : reducerReferenceMap.entrySet()) {
       MeanShiftCanopy refCanopy = mapEntry.getValue();
 
-      List<MeanShiftCanopy> values = reduceCollector.getValue(new Text((refCanopy.isConverged() ? "V" : "C")
+      List<MeanShiftCanopy> values = reduceCollector.getValue(new Text((refCanopy.isConverged() ? "V-" : "C-")
           + refCanopy.getCanopyId()));
       assertEquals("values", 1, values.size());
       MeanShiftCanopy reducerCanopy = values.get(0);
@@ -306,7 +306,7 @@ public class TestMeanShift extends Mahou
     // now run the Job
     MeanShiftCanopyJob.runJob("testdata", "output", EuclideanDistanceMeasure.class.getName(), 4, 1, 0.5, 10);
     JobConf conf = new JobConf(MeanShiftCanopyDriver.class);
-    Path outPart = new Path("output/canopies-2/part-00000");
+    Path outPart = new Path("output/clusters-3/part-00000");
     SequenceFile.Reader reader = new SequenceFile.Reader(fs, outPart, conf);
     Text key = new Text();
     MeanShiftCanopy value = new MeanShiftCanopy();

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java?rev=937051&r1=937050&r2=937051&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java Thu Apr 22 20:47:39 2010
@@ -31,6 +31,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.JobClient;
 import org.apache.hadoop.mapred.JobConf;
+import org.apache.mahout.clustering.ClusterBase;
 import org.apache.mahout.clustering.canopy.CanopyClusteringJob;
 import org.apache.mahout.clustering.canopy.CanopyDriver;
 import org.apache.mahout.clustering.kmeans.KMeansDriver;
@@ -150,10 +151,10 @@ public final class Job {
       "org.apache.mahout.math.RandomAccessSparseVector");
     log.info("Running Canopy to get initial clusters");
     CanopyDriver.runJob(directoryContainingConvertedInput,
-      output + CanopyClusteringJob.DEFAULT_CANOPIES_OUTPUT_DIRECTORY, measureClass, t1, t2);
+      output + ClusterBase.INITIAL_CLUSTERS_DIR, measureClass, t1, t2);
     log.info("Running KMeans");
     KMeansDriver.runJob(directoryContainingConvertedInput,
-      output + CanopyClusteringJob.DEFAULT_CANOPIES_OUTPUT_DIRECTORY, output, measureClass, convergenceDelta,
+      output + ClusterBase.INITIAL_CLUSTERS_DIR, output, measureClass, convergenceDelta,
       maxIterations, 1);
   }
 }

Modified: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java?rev=937051&r1=937050&r2=937051&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java (original)
+++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java Thu Apr 22 20:47:39 2010
@@ -37,7 +37,9 @@ import org.apache.mahout.clustering.cano
 import org.apache.mahout.clustering.canopy.CanopyDriver;
 import org.apache.mahout.clustering.dirichlet.DirichletDriver;
 import org.apache.mahout.clustering.dirichlet.models.L1ModelDistribution;
+import org.apache.mahout.clustering.fuzzykmeans.FuzzyKMeansDriver;
 import org.apache.mahout.clustering.kmeans.KMeansDriver;
+import org.apache.mahout.clustering.meanshift.MeanShiftCanopyJob;
 import org.apache.mahout.common.MahoutTestCase;
 import org.apache.mahout.common.RandomUtils;
 import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
@@ -135,17 +137,34 @@ public class TestClusterDumper extends M
   public void testCanopy() throws Exception { // now run the Job
     CanopyClusteringJob.runJob("testdata/points", "output", EuclideanDistanceMeasure.class.getName(), 8, 4);
     // run ClusterDumper
-    ClusterDumper clusterDumper = new ClusterDumper("output/canopies", null);
+    ClusterDumper clusterDumper = new ClusterDumper("output/clusters-0", null);
     clusterDumper.printClusters();
   }
 
   public void testKmeans() throws Exception {
     // now run the Canopy job to prime kMeans canopies
-    CanopyDriver.runJob("testdata/points", "testdata/canopies", EuclideanDistanceMeasure.class.getName(), 8, 4);
+    CanopyDriver.runJob("testdata/points", "output/clusters-0", EuclideanDistanceMeasure.class.getName(), 8, 4);
     // now run the KMeans job
-    KMeansDriver.runJob("testdata/points", "testdata/canopies", "output", EuclideanDistanceMeasure.class.getName(),
+    KMeansDriver.runJob("testdata/points", "output/clusters-0", "output", EuclideanDistanceMeasure.class.getName(),
         0.001, 10, 1);
     // run ClusterDumper
+    ClusterDumper clusterDumper = new ClusterDumper("output/clusters-2", null);
+    clusterDumper.printClusters();
+  }
+
+  public void testFuzzyKmeans() throws Exception {
+    // now run the Canopy job to prime kMeans canopies
+    CanopyDriver.runJob("testdata/points", "output/clusters-0", EuclideanDistanceMeasure.class.getName(), 8, 4);
+    // now run the KMeans job
+    FuzzyKMeansDriver.runJob("testdata/points", "output/clusters-0", "output", EuclideanDistanceMeasure.class.getName(), 0.001, 10, 1, 1, 2);
+    // run ClusterDumper
+    ClusterDumper clusterDumper = new ClusterDumper("output/clusters-3", null);
+    clusterDumper.printClusters();
+  }
+
+  public void testMeanShift() throws Exception {
+    MeanShiftCanopyJob.runJob("testdata/points", "output", EuclideanDistanceMeasure.class.getName(), 9, 1.0, 0.001, 10);
+    // run ClusterDumper
     ClusterDumper clusterDumper = new ClusterDumper("output/clusters-1", null);
     clusterDumper.printClusters();
   }
@@ -156,7 +175,7 @@ public class TestClusterDumper extends M
         L1ModelDistribution.class.getName(), prototype.getClass().getName(), prototype
             .size(), 15, 10, 1.0, 1);
     // run ClusterDumper
-    ClusterDumper clusterDumper = new ClusterDumper("output/state-10", null);
+    ClusterDumper clusterDumper = new ClusterDumper("output/clusters-10", null);
     clusterDumper.printClusters();
   }
 }

Modified: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java?rev=937051&r1=937050&r2=937051&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java (original)
+++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java Thu Apr 22 20:47:39 2010
@@ -79,34 +79,34 @@ public class TestCDbwEvaluator extends M
   public void testCanopy() throws Exception { // now run the Job
     CanopyClusteringJob.runJob("testdata", "output", EuclideanDistanceMeasure.class.getName(), 3.1, 2.1);
     int numIterations = 2;
-    CDbwDriver.runJob("output/canopies", "output/clusters", "output", EuclideanDistanceMeasure.class.getName(), numIterations, 1);
+    CDbwDriver.runJob("output/clusters-0", "output/clusteredPoints", "output", EuclideanDistanceMeasure.class.getName(), numIterations, 1);
     checkRefPoints(numIterations);
   }
 
   public void testKmeans() throws Exception {
     // now run the Canopy job to prime kMeans canopies
-    CanopyDriver.runJob("testdata", "output/canopies", EuclideanDistanceMeasure.class.getName(), 3.1, 2.1);
+    CanopyDriver.runJob("testdata", "output/clusters-0", EuclideanDistanceMeasure.class.getName(), 3.1, 2.1);
     // now run the KMeans job
-    KMeansDriver.runJob("testdata", "output/canopies", "output", EuclideanDistanceMeasure.class.getName(), 0.001, 10, 1);
+    KMeansDriver.runJob("testdata", "output/clusters-0", "output", EuclideanDistanceMeasure.class.getName(), 0.001, 10, 1);
     int numIterations = 2;
-    CDbwDriver.runJob("output/clusters-1", "output/points", "output", EuclideanDistanceMeasure.class.getName(), numIterations, 1);
+    CDbwDriver.runJob("output/clusters-2", "output/clusteredPoints", "output", EuclideanDistanceMeasure.class.getName(), numIterations, 1);
     checkRefPoints(numIterations);
   }
 
   public void testFuzzyKmeans() throws Exception {
     // now run the Canopy job to prime kMeans canopies
-    CanopyDriver.runJob("testdata", "output/canopies", EuclideanDistanceMeasure.class.getName(), 3.1, 2.1);
+    CanopyDriver.runJob("testdata", "output/clusters-0", EuclideanDistanceMeasure.class.getName(), 3.1, 2.1);
     // now run the KMeans job
-    FuzzyKMeansDriver.runJob("testdata", "output/canopies", "output", EuclideanDistanceMeasure.class.getName(), 0.001, 10, 1, 1, 2);
+    FuzzyKMeansDriver.runJob("testdata", "output/clusters-0", "output", EuclideanDistanceMeasure.class.getName(), 0.001, 10, 1, 1, 2);
     int numIterations = 2;
-    CDbwDriver.runJob("output/clusters-3", "output/points", "output", EuclideanDistanceMeasure.class.getName(), numIterations, 1);
+    CDbwDriver.runJob("output/clusters-4", "output/clusteredPoints", "output", EuclideanDistanceMeasure.class.getName(), numIterations, 1);
     checkRefPoints(numIterations);
   }
 
   public void testMeanShift() throws Exception {
     MeanShiftCanopyJob.runJob("testdata", "output", EuclideanDistanceMeasure.class.getName(), 2.1, 1.0, 0.001, 10);
     int numIterations = 2;
-    CDbwDriver.runJob("output/canopies-1", "output/clusters", "output", EuclideanDistanceMeasure.class.getName(), numIterations, 1);
+    CDbwDriver.runJob("output/clusters-2", "output/clusteredPoints", "output", EuclideanDistanceMeasure.class.getName(), numIterations, 1);
     checkRefPoints(numIterations);
   }
 
@@ -115,7 +115,7 @@ public class TestCDbwEvaluator extends M
     DirichletDriver.runJob("testdata", "output", L1ModelDistribution.class.getName(), prototype.getClass().getName(), prototype
         .size(), 15, 5, 1.0, 1);
     int numIterations = 2;
-    CDbwDriver.runJob("output/state-5", "output/clusters", "output", EuclideanDistanceMeasure.class.getName(), numIterations, 1);
+    CDbwDriver.runJob("output/clusters-5", "output/clusteredPoints", "output", EuclideanDistanceMeasure.class.getName(), numIterations, 1);
     checkRefPoints(numIterations);
   }