You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ro...@apache.org on 2010/02/13 22:08:12 UTC

svn commit: r909914 [5/5] - in /lucene/mahout/trunk/core/src: main/java/org/apache/mahout/clustering/ main/java/org/apache/mahout/clustering/canopy/ main/java/org/apache/mahout/clustering/dirichlet/ main/java/org/apache/mahout/clustering/dirichlet/mode...

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterer.java?rev=909914&r1=909913&r2=909914&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterer.java Sat Feb 13 21:07:53 2010
@@ -12,7 +12,7 @@
 import org.apache.mahout.math.Vector;
 
 public class MeanShiftCanopyClusterer {
-
+  
   private double convergenceDelta = 0;
   // the next canopyId to be allocated
   private int nextCanopyId = 0;
@@ -26,6 +26,7 @@
   public double getT1() {
     return t1;
   }
+  
   public double getT2() {
     return t2;
   }
@@ -37,15 +38,17 @@
   public MeanShiftCanopyClusterer(JobConf job) {
     configure(job);
   }
+  
   /**
    * Configure the Canopy and its distance measure
-   *
-   * @param job the JobConf for this job
+   * 
+   * @param job
+   *          the JobConf for this job
    */
   public void configure(JobConf job) {
     try {
       measure = Class.forName(job.get(MeanShiftCanopyConfigKeys.DISTANCE_MEASURE_KEY)).asSubclass(
-          DistanceMeasure.class).newInstance();
+        DistanceMeasure.class).newInstance();
       measure.configure(job);
     } catch (ClassNotFoundException e) {
       throw new IllegalStateException(e);
@@ -59,13 +62,14 @@
     t2 = Double.parseDouble(job.get(MeanShiftCanopyConfigKeys.T2_KEY));
     convergenceDelta = Double.parseDouble(job.get(MeanShiftCanopyConfigKeys.CLUSTER_CONVERGENCE_KEY));
   }
+  
   /**
    * Configure the Canopy for unit tests
-   *
-   * @param aDelta the convergence criteria
+   * 
+   * @param aDelta
+   *          the convergence criteria
    */
-  public void config(DistanceMeasure aMeasure, double aT1, double aT2,
-                            double aDelta) {
+  public void config(DistanceMeasure aMeasure, double aT1, double aT2, double aDelta) {
     nextCanopyId = 100; // so canopyIds will sort properly
     measure = aMeasure;
     t1 = aT1;
@@ -74,15 +78,17 @@
   }
   
   /**
-   * Merge the given canopy into the canopies list. If it touches any existing canopy (norm<T1) then add the center of
-   * each to the other. If it covers any other canopies (norm<T2), then merge the given canopy with the closest covering
-   * canopy. If the given canopy does not cover any other canopies, add it to the canopies list.
-   *
-   * @param aCanopy  a MeanShiftCanopy to be merged
-   * @param canopies the List<Canopy> to be appended
+   * Merge the given canopy into the canopies list. If it touches any existing canopy (norm<T1) then add the
+   * center of each to the other. If it covers any other canopies (norm<T2), then merge the given canopy with
+   * the closest covering canopy. If the given canopy does not cover any other canopies, add it to the
+   * canopies list.
+   * 
+   * @param aCanopy
+   *          a MeanShiftCanopy to be merged
+   * @param canopies
+   *          the List<Canopy> to be appended
    */
-  public void mergeCanopy(MeanShiftCanopy aCanopy,
-                                 List<MeanShiftCanopy> canopies) {
+  public void mergeCanopy(MeanShiftCanopy aCanopy, List<MeanShiftCanopy> canopies) {
     MeanShiftCanopy closestCoveringCanopy = null;
     double closestNorm = Double.MAX_VALUE;
     for (MeanShiftCanopy canopy : canopies) {
@@ -91,7 +97,7 @@
         aCanopy.touch(canopy);
       }
       if (norm < t2) {
-        if (closestCoveringCanopy == null || norm < closestNorm) {
+        if ((closestCoveringCanopy == null) || (norm < closestNorm)) {
           closestNorm = norm;
           closestCoveringCanopy = canopy;
         }
@@ -103,24 +109,24 @@
       closestCoveringCanopy.merge(aCanopy);
     }
   }
-
+  
   /** Emit the new canopy to the collector, keyed by the canopy's Id */
-  static void emitCanopy(MeanShiftCanopy canopy,
-                         OutputCollector<Text, WritableComparable<?>> collector)
-      throws IOException {
+  static void emitCanopy(MeanShiftCanopy canopy, OutputCollector<Text,WritableComparable<?>> collector) throws IOException {
     String identifier = canopy.getIdentifier();
     collector.collect(new Text(identifier), new Text("new " + canopy.toString()));
   }
   
   /**
    * Shift the center to the new centroid of the cluster
-   *
-   * @param canopy the canopy to shift.
+   * 
+   * @param canopy
+   *          the canopy to shift.
    * @return if the cluster is converged
    */
   public boolean shiftToMean(MeanShiftCanopy canopy) {
     Vector centroid = canopy.computeCentroid();
-    canopy.setConverged(new EuclideanDistanceMeasure().distance(centroid, canopy.getCenter()) < convergenceDelta);
+    canopy
+        .setConverged(new EuclideanDistanceMeasure().distance(centroid, canopy.getCenter()) < convergenceDelta);
     canopy.setCenter(centroid);
     canopy.setNumPoints(1);
     canopy.setPointTotal(centroid.clone());
@@ -129,9 +135,11 @@
   
   /**
    * Return if the point is covered by this canopy
-   *
-   * @param canopy a canopy.
-   * @param point a Vector point
+   * 
+   * @param canopy
+   *          a canopy.
+   * @param point
+   *          a Vector point
    * @return if the point is covered
    */
   boolean covers(MeanShiftCanopy canopy, Vector point) {
@@ -141,8 +149,10 @@
   /**
    * Return if the point is closely covered by the canopy
    * 
-   * @param canopy a canopy.
-   * @param point a Vector point
+   * @param canopy
+   *          a canopy.
+   * @param point
+   *          a Vector point
    * @return if the point is covered
    */
   public boolean closelyBound(MeanShiftCanopy canopy, Vector point) {

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyConfigKeys.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyConfigKeys.java?rev=909914&r1=909913&r2=909914&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyConfigKeys.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyConfigKeys.java Sat Feb 13 21:07:53 2010
@@ -1,12 +1,12 @@
 package org.apache.mahout.clustering.meanshift;
 
 public interface MeanShiftCanopyConfigKeys {
-
+  
   // keys used by Driver, Mapper, Combiner & Reducer
   String DISTANCE_MEASURE_KEY = "org.apache.mahout.clustering.canopy.measure";
   String T1_KEY = "org.apache.mahout.clustering.canopy.t1";
   String T2_KEY = "org.apache.mahout.clustering.canopy.t2";
   String CONTROL_PATH_KEY = "org.apache.mahout.clustering.control.path";
   String CLUSTER_CONVERGENCE_KEY = "org.apache.mahout.clustering.canopy.convergence";
-
+  
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java?rev=909914&r1=909913&r2=909914&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java Sat Feb 13 21:07:53 2010
@@ -17,6 +17,8 @@
 
 package org.apache.mahout.clustering.meanshift;
 
+import java.io.IOException;
+
 import org.apache.commons.cli2.CommandLine;
 import org.apache.commons.cli2.Group;
 import org.apache.commons.cli2.Option;
@@ -39,43 +41,38 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.IOException;
-
-public class MeanShiftCanopyDriver {
-
-  private static final Logger log = LoggerFactory
-      .getLogger(MeanShiftCanopyDriver.class);
-
-  private MeanShiftCanopyDriver() {
-  }
-
+public final class MeanShiftCanopyDriver {
+  
+  private static final Logger log = LoggerFactory.getLogger(MeanShiftCanopyDriver.class);
+  
+  private MeanShiftCanopyDriver() { }
+  
   public static void main(String[] args) {
     DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
     ArgumentBuilder abuilder = new ArgumentBuilder();
     GroupBuilder gbuilder = new GroupBuilder();
-
+    
     Option inputOpt = DefaultOptionCreator.inputOption().create();
     Option outputOpt = DefaultOptionCreator.outputOption().create();
     Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption().create();
     Option helpOpt = DefaultOptionCreator.helpOption();
-
-    Option modelOpt = obuilder.withLongName("distanceClass").withRequired(true).withShortName("d").
-        withArgument(abuilder.withName("distanceClass").withMinimum(1).withMaximum(1).create()).
-        withDescription("The distance measure class name.").create();
-
-
-    Option threshold1Opt = obuilder.withLongName("threshold_1").withRequired(true).withShortName("t1").
-        withArgument(abuilder.withName("threshold_1").withMinimum(1).withMaximum(1).create()).
-        withDescription("The T1 distance threshold.").create();
-
-    Option threshold2Opt = obuilder.withLongName("threshold_2").withRequired(true).withShortName("t2").
-        withArgument(abuilder.withName("threshold_2").withMinimum(1).withMaximum(1).create()).
-        withDescription("The T1 distance threshold.").create();
-
-    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(modelOpt).
-        withOption(helpOpt).withOption(convergenceDeltaOpt).withOption(threshold1Opt).
-        withOption(threshold2Opt).create();
-
+    
+    Option modelOpt = obuilder.withLongName("distanceClass").withRequired(true).withShortName("d")
+        .withArgument(abuilder.withName("distanceClass").withMinimum(1).withMaximum(1).create())
+        .withDescription("The distance measure class name.").create();
+    
+    Option threshold1Opt = obuilder.withLongName("threshold_1").withRequired(true).withShortName("t1")
+        .withArgument(abuilder.withName("threshold_1").withMinimum(1).withMaximum(1).create())
+        .withDescription("The T1 distance threshold.").create();
+    
+    Option threshold2Opt = obuilder.withLongName("threshold_2").withRequired(true).withShortName("t2")
+        .withArgument(abuilder.withName("threshold_2").withMinimum(1).withMaximum(1).create())
+        .withDescription("The T1 distance threshold.").create();
+    
+    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt)
+        .withOption(modelOpt).withOption(helpOpt).withOption(convergenceDeltaOpt).withOption(threshold1Opt)
+        .withOption(threshold2Opt).create();
+    
     try {
       Parser parser = new Parser();
       parser.setGroup(group);
@@ -84,45 +81,57 @@
         CommandLineUtil.printHelp(group);
         return;
       }
-
+      
       String input = cmdLine.getValue(inputOpt).toString();
       String output = cmdLine.getValue(outputOpt).toString();
       String measureClassName = cmdLine.getValue(modelOpt).toString();
       double t1 = Double.parseDouble(cmdLine.getValue(threshold1Opt).toString());
       double t2 = Double.parseDouble(cmdLine.getValue(threshold2Opt).toString());
       double convergenceDelta = Double.parseDouble(cmdLine.getValue(convergenceDeltaOpt).toString());
-      runJob(input, output, output + MeanShiftCanopyConfigKeys.CONTROL_PATH_KEY,
+      MeanShiftCanopyDriver.runJob(input, output, output + MeanShiftCanopyConfigKeys.CONTROL_PATH_KEY,
         measureClassName, t1, t2, convergenceDelta);
     } catch (OptionException e) {
-      log.error("Exception parsing command line: ", e);
+      MeanShiftCanopyDriver.log.error("Exception parsing command line: ", e);
       CommandLineUtil.printHelp(group);
     }
   }
-
+  
   /**
    * Run the job
-   *
-   * @param input            the input pathname String
-   * @param output           the output pathname String
-   * @param control          the control path
-   * @param measureClassName the DistanceMeasure class name
-   * @param t1               the T1 distance threshold
-   * @param t2               the T2 distance threshold
-   * @param convergenceDelta the double convergence criteria
+   * 
+   * @param input
+   *          the input pathname String
+   * @param output
+   *          the output pathname String
+   * @param control
+   *          the control path
+   * @param measureClassName
+   *          the DistanceMeasure class name
+   * @param t1
+   *          the T1 distance threshold
+   * @param t2
+   *          the T2 distance threshold
+   * @param convergenceDelta
+   *          the double convergence criteria
    */
-  public static void runJob(String input, String output, String control,
-                            String measureClassName, double t1, double t2, double convergenceDelta) {
-
+  public static void runJob(String input,
+                            String output,
+                            String control,
+                            String measureClassName,
+                            double t1,
+                            double t2,
+                            double convergenceDelta) {
+    
     Configurable client = new JobClient();
     JobConf conf = new JobConf(MeanShiftCanopyDriver.class);
-
+    
     conf.setOutputKeyClass(Text.class);
     conf.setOutputValueClass(MeanShiftCanopy.class);
-
+    
     FileInputFormat.setInputPaths(conf, new Path(input));
     Path outPath = new Path(output);
     FileOutputFormat.setOutputPath(conf, outPath);
-
+    
     conf.setMapperClass(MeanShiftCanopyMapper.class);
     conf.setReducerClass(MeanShiftCanopyReducer.class);
     conf.setNumReduceTasks(1);
@@ -133,12 +142,12 @@
     conf.set(MeanShiftCanopyConfigKeys.T1_KEY, String.valueOf(t1));
     conf.set(MeanShiftCanopyConfigKeys.T2_KEY, String.valueOf(t2));
     conf.set(MeanShiftCanopyConfigKeys.CONTROL_PATH_KEY, control);
-
+    
     client.setConf(conf);
     try {
       JobClient.runJob(conf);
     } catch (IOException e) {
-      log.warn(e.toString(), e);
+      MeanShiftCanopyDriver.log.warn(e.toString(), e);
     }
   }
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyJob.java?rev=909914&r1=909913&r2=909914&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyJob.java Sat Feb 13 21:07:53 2010
@@ -17,6 +17,8 @@
 
 package org.apache.mahout.clustering.meanshift;
 
+import java.io.IOException;
+
 import org.apache.commons.cli2.CommandLine;
 import org.apache.commons.cli2.Group;
 import org.apache.commons.cli2.Option;
@@ -34,46 +36,41 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.IOException;
-
 public class MeanShiftCanopyJob {
-
+  
   protected static final String CONTROL_CONVERGED = "/control/converged";
-
-  private static final Logger log = LoggerFactory
-      .getLogger(MeanShiftCanopyJob.class);
-
-  private MeanShiftCanopyJob() {
-  }
-
+  
+  private static final Logger log = LoggerFactory.getLogger(MeanShiftCanopyJob.class);
+  
+  private MeanShiftCanopyJob() { }
+  
   public static void main(String[] args) throws IOException {
     DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
     ArgumentBuilder abuilder = new ArgumentBuilder();
     GroupBuilder gbuilder = new GroupBuilder();
-
+    
     Option inputOpt = DefaultOptionCreator.inputOption().create();
     Option outputOpt = DefaultOptionCreator.outputOption().create();
     Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption().create();
     Option maxIterOpt = DefaultOptionCreator.maxIterOption().create();
     Option helpOpt = DefaultOptionCreator.helpOption();
-
-    Option modelOpt = obuilder.withLongName("distanceClass").withRequired(true).withShortName("d").
-        withArgument(abuilder.withName("distanceClass").withMinimum(1).withMaximum(1).create()).
-        withDescription("The distance measure class name.").create();
-
-
-    Option threshold1Opt = obuilder.withLongName("threshold_1").withRequired(true).withShortName("t1").
-        withArgument(abuilder.withName("threshold_1").withMinimum(1).withMaximum(1).create()).
-        withDescription("The T1 distance threshold.").create();
-
-    Option threshold2Opt = obuilder.withLongName("threshold_2").withRequired(true).withShortName("t2").
-        withArgument(abuilder.withName("threshold_2").withMinimum(1).withMaximum(1).create()).
-        withDescription("The T1 distance threshold.").create();
-
-    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(modelOpt).
-        withOption(helpOpt).withOption(convergenceDeltaOpt).withOption(threshold1Opt).withOption(maxIterOpt).
-        withOption(threshold2Opt).create();
-
+    
+    Option modelOpt = obuilder.withLongName("distanceClass").withRequired(true).withShortName("d")
+        .withArgument(abuilder.withName("distanceClass").withMinimum(1).withMaximum(1).create())
+        .withDescription("The distance measure class name.").create();
+    
+    Option threshold1Opt = obuilder.withLongName("threshold_1").withRequired(true).withShortName("t1")
+        .withArgument(abuilder.withName("threshold_1").withMinimum(1).withMaximum(1).create())
+        .withDescription("The T1 distance threshold.").create();
+    
+    Option threshold2Opt = obuilder.withLongName("threshold_2").withRequired(true).withShortName("t2")
+        .withArgument(abuilder.withName("threshold_2").withMinimum(1).withMaximum(1).create())
+        .withDescription("The T1 distance threshold.").create();
+    
+    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt)
+        .withOption(modelOpt).withOption(helpOpt).withOption(convergenceDeltaOpt).withOption(threshold1Opt)
+        .withOption(maxIterOpt).withOption(threshold2Opt).create();
+    
     try {
       Parser parser = new Parser();
       parser.setGroup(group);
@@ -82,7 +79,7 @@
         CommandLineUtil.printHelp(group);
         return;
       }
-
+      
       String input = cmdLine.getValue(inputOpt).toString();
       String output = cmdLine.getValue(outputOpt).toString();
       String measureClassName = cmdLine.getValue(modelOpt).toString();
@@ -90,27 +87,37 @@
       double t2 = Double.parseDouble(cmdLine.getValue(threshold2Opt).toString());
       double convergenceDelta = Double.parseDouble(cmdLine.getValue(convergenceDeltaOpt).toString());
       int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterOpt).toString());
-      runJob(input, output, measureClassName, t1, t2, convergenceDelta,
-          maxIterations);
+      MeanShiftCanopyJob.runJob(input, output, measureClassName, t1, t2, convergenceDelta, maxIterations);
     } catch (OptionException e) {
-      log.error("Exception parsing command line: ", e);
+      MeanShiftCanopyJob.log.error("Exception parsing command line: ", e);
       CommandLineUtil.printHelp(group);
     }
   }
-
+  
   /**
    * Run the job
-   *
-   * @param input            the input pathname String
-   * @param output           the output pathname String
-   * @param measureClassName the DistanceMeasure class name
-   * @param t1               the T1 distance threshold
-   * @param t2               the T2 distance threshold
-   * @param convergenceDelta the double convergence criteria
-   * @param maxIterations    an int number of iterations
+   * 
+   * @param input
+   *          the input pathname String
+   * @param output
+   *          the output pathname String
+   * @param measureClassName
+   *          the DistanceMeasure class name
+   * @param t1
+   *          the T1 distance threshold
+   * @param t2
+   *          the T2 distance threshold
+   * @param convergenceDelta
+   *          the double convergence criteria
+   * @param maxIterations
+   *          an int number of iterations
    */
-  public static void runJob(String input, String output,
-                            String measureClassName, double t1, double t2, double convergenceDelta,
+  public static void runJob(String input,
+                            String output,
+                            String measureClassName,
+                            double t1,
+                            double t2,
+                            double convergenceDelta,
                             int maxIterations) throws IOException {
     // delete the output directory
     Configuration conf = new JobConf(MeanShiftCanopyDriver.class);
@@ -124,18 +131,18 @@
     boolean converged = false;
     int iteration = 0;
     String clustersIn = input;
-    while (!converged && iteration < maxIterations) {
-      log.info("Iteration {}", iteration);
+    while (!converged && (iteration < maxIterations)) {
+      MeanShiftCanopyJob.log.info("Iteration {}", iteration);
       // point the output to a new directory per iteration
       String clustersOut = output + "/canopies-" + iteration;
-      String controlOut = output + CONTROL_CONVERGED;
-      MeanShiftCanopyDriver.runJob(clustersIn, clustersOut, controlOut,
-          measureClassName, t1, t2, convergenceDelta);
+      String controlOut = output + MeanShiftCanopyJob.CONTROL_CONVERGED;
+      MeanShiftCanopyDriver.runJob(clustersIn, clustersOut, controlOut, measureClassName, t1, t2,
+        convergenceDelta);
       converged = FileSystem.get(conf).exists(new Path(controlOut));
       // now point the input to the old output directory
       clustersIn = output + "/canopies-" + iteration;
       iteration++;
     }
   }
-
+  
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyMapper.java?rev=909914&r1=909913&r2=909914&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyMapper.java Sat Feb 13 21:07:53 2010
@@ -17,6 +17,10 @@
 
 package org.apache.mahout.clustering.meanshift;
 
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapred.JobConf;
@@ -25,26 +29,23 @@
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.Reporter;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
 public class MeanShiftCanopyMapper extends MapReduceBase implements
-    Mapper<WritableComparable<?>, MeanShiftCanopy, Text, MeanShiftCanopy> {
-
+    Mapper<WritableComparable<?>,MeanShiftCanopy,Text,MeanShiftCanopy> {
+  
   private final List<MeanShiftCanopy> canopies = new ArrayList<MeanShiftCanopy>();
   
   private MeanShiftCanopyClusterer clusterer;
-  private OutputCollector<Text, MeanShiftCanopy> output;
-
+  private OutputCollector<Text,MeanShiftCanopy> output;
+  
   @Override
-  public void map(WritableComparable<?> key, MeanShiftCanopy canopy,
-                  OutputCollector<Text, MeanShiftCanopy> output, Reporter reporter)
-      throws IOException {
+  public void map(WritableComparable<?> key,
+                  MeanShiftCanopy canopy,
+                  OutputCollector<Text,MeanShiftCanopy> output,
+                  Reporter reporter) throws IOException {
     this.output = output;
     clusterer.mergeCanopy(canopy.shallowCopy(), canopies);
   }
-
+  
   @Override
   public void close() throws IOException {
     for (MeanShiftCanopy canopy : canopies) {
@@ -53,11 +54,11 @@
     }
     super.close();
   }
-
+  
   @Override
   public void configure(JobConf job) {
     super.configure(job);
     clusterer = new MeanShiftCanopyClusterer(job);
   }
-
+  
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java?rev=909914&r1=909913&r2=909914&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java Sat Feb 13 21:07:53 2010
@@ -17,6 +17,11 @@
 
 package org.apache.mahout.clustering.meanshift;
 
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Text;
@@ -26,30 +31,26 @@
 import org.apache.hadoop.mapred.Reducer;
 import org.apache.hadoop.mapred.Reporter;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
 public class MeanShiftCanopyReducer extends MapReduceBase implements
-    Reducer<Text, MeanShiftCanopy, Text, MeanShiftCanopy> {
-
+    Reducer<Text,MeanShiftCanopy,Text,MeanShiftCanopy> {
+  
   private final List<MeanShiftCanopy> canopies = new ArrayList<MeanShiftCanopy>();
   private MeanShiftCanopyClusterer clusterer;
   private boolean allConverged = true;
-
+  
   private JobConf conf;
-
+  
   @Override
-  public void reduce(Text key, Iterator<MeanShiftCanopy> values,
-                     OutputCollector<Text, MeanShiftCanopy> output, Reporter reporter)
-      throws IOException {
-
+  public void reduce(Text key,
+                     Iterator<MeanShiftCanopy> values,
+                     OutputCollector<Text,MeanShiftCanopy> output,
+                     Reporter reporter) throws IOException {
+    
     while (values.hasNext()) {
       MeanShiftCanopy canopy = values.next();
       clusterer.mergeCanopy(canopy.shallowCopy(), canopies);
     }
-
+    
     for (MeanShiftCanopy canopy : canopies) {
       boolean converged = clusterer.shiftToMean(canopy);
       if (converged) {
@@ -58,16 +59,16 @@
       allConverged = converged && allConverged;
       output.collect(new Text(canopy.getIdentifier()), canopy);
     }
-
+    
   }
-
+  
   @Override
   public void configure(JobConf job) {
     super.configure(job);
     this.conf = job;
     clusterer = new MeanShiftCanopyClusterer(job);
   }
-
+  
   @Override
   public void close() throws IOException {
     if (allConverged) {
@@ -76,5 +77,5 @@
     }
     super.close();
   }
-
+  
 }

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java?rev=909914&r1=909913&r2=909914&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java Sat Feb 13 21:07:53 2010
@@ -26,6 +26,7 @@
 import org.apache.hadoop.mapred.Reducer;
 import org.apache.hadoop.mapred.lib.IdentityReducer;
 import org.apache.mahout.clustering.ClusteringTestUtils;
+import org.apache.mahout.common.DummyReporter;
 import org.apache.mahout.common.MahoutTestCase;
 import org.apache.mahout.math.RandomAccessSparseVector;
 import org.apache.mahout.math.Vector;
@@ -254,7 +255,7 @@
 
     List<Canopy> canopies = new ArrayList<Canopy>();
     for (VectorWritable point : points) {
-      clusterer.addPointToCanopies(point.get(), canopies);
+      clusterer.addPointToCanopies(point.get(), canopies, new DummyReporter());
     }
 
     System.out.println("testIterativeManhattan");
@@ -269,7 +270,7 @@
 
     List<Canopy> canopies = new ArrayList<Canopy>();
     for (VectorWritable point : points) {
-      clusterer.addPointToCanopies(point.get(), canopies);
+      clusterer.addPointToCanopies(point.get(), canopies, new DummyReporter());
     }
 
     System.out.println("testIterativeEuclidean");