You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ro...@apache.org on 2010/02/13 22:08:12 UTC
svn commit: r909914 [5/5] - in /lucene/mahout/trunk/core/src:
main/java/org/apache/mahout/clustering/
main/java/org/apache/mahout/clustering/canopy/
main/java/org/apache/mahout/clustering/dirichlet/
main/java/org/apache/mahout/clustering/dirichlet/mode...
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterer.java?rev=909914&r1=909913&r2=909914&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterer.java Sat Feb 13 21:07:53 2010
@@ -12,7 +12,7 @@
import org.apache.mahout.math.Vector;
public class MeanShiftCanopyClusterer {
-
+
private double convergenceDelta = 0;
// the next canopyId to be allocated
private int nextCanopyId = 0;
@@ -26,6 +26,7 @@
public double getT1() {
return t1;
}
+
public double getT2() {
return t2;
}
@@ -37,15 +38,17 @@
public MeanShiftCanopyClusterer(JobConf job) {
configure(job);
}
+
/**
* Configure the Canopy and its distance measure
- *
- * @param job the JobConf for this job
+ *
+ * @param job
+ * the JobConf for this job
*/
public void configure(JobConf job) {
try {
measure = Class.forName(job.get(MeanShiftCanopyConfigKeys.DISTANCE_MEASURE_KEY)).asSubclass(
- DistanceMeasure.class).newInstance();
+ DistanceMeasure.class).newInstance();
measure.configure(job);
} catch (ClassNotFoundException e) {
throw new IllegalStateException(e);
@@ -59,13 +62,14 @@
t2 = Double.parseDouble(job.get(MeanShiftCanopyConfigKeys.T2_KEY));
convergenceDelta = Double.parseDouble(job.get(MeanShiftCanopyConfigKeys.CLUSTER_CONVERGENCE_KEY));
}
+
/**
* Configure the Canopy for unit tests
- *
- * @param aDelta the convergence criteria
+ *
+ * @param aDelta
+ * the convergence criteria
*/
- public void config(DistanceMeasure aMeasure, double aT1, double aT2,
- double aDelta) {
+ public void config(DistanceMeasure aMeasure, double aT1, double aT2, double aDelta) {
nextCanopyId = 100; // so canopyIds will sort properly
measure = aMeasure;
t1 = aT1;
@@ -74,15 +78,17 @@
}
/**
- * Merge the given canopy into the canopies list. If it touches any existing canopy (norm<T1) then add the center of
- * each to the other. If it covers any other canopies (norm<T2), then merge the given canopy with the closest covering
- * canopy. If the given canopy does not cover any other canopies, add it to the canopies list.
- *
- * @param aCanopy a MeanShiftCanopy to be merged
- * @param canopies the List<Canopy> to be appended
+ * Merge the given canopy into the canopies list. If it touches any existing canopy (norm<T1) then add the
+ * center of each to the other. If it covers any other canopies (norm<T2), then merge the given canopy with
+ * the closest covering canopy. If the given canopy does not cover any other canopies, add it to the
+ * canopies list.
+ *
+ * @param aCanopy
+ * a MeanShiftCanopy to be merged
+ * @param canopies
+ * the List<Canopy> to be appended
*/
- public void mergeCanopy(MeanShiftCanopy aCanopy,
- List<MeanShiftCanopy> canopies) {
+ public void mergeCanopy(MeanShiftCanopy aCanopy, List<MeanShiftCanopy> canopies) {
MeanShiftCanopy closestCoveringCanopy = null;
double closestNorm = Double.MAX_VALUE;
for (MeanShiftCanopy canopy : canopies) {
@@ -91,7 +97,7 @@
aCanopy.touch(canopy);
}
if (norm < t2) {
- if (closestCoveringCanopy == null || norm < closestNorm) {
+ if ((closestCoveringCanopy == null) || (norm < closestNorm)) {
closestNorm = norm;
closestCoveringCanopy = canopy;
}
@@ -103,24 +109,24 @@
closestCoveringCanopy.merge(aCanopy);
}
}
-
+
/** Emit the new canopy to the collector, keyed by the canopy's Id */
- static void emitCanopy(MeanShiftCanopy canopy,
- OutputCollector<Text, WritableComparable<?>> collector)
- throws IOException {
+ static void emitCanopy(MeanShiftCanopy canopy, OutputCollector<Text,WritableComparable<?>> collector) throws IOException {
String identifier = canopy.getIdentifier();
collector.collect(new Text(identifier), new Text("new " + canopy.toString()));
}
/**
* Shift the center to the new centroid of the cluster
- *
- * @param canopy the canopy to shift.
+ *
+ * @param canopy
+ * the canopy to shift.
* @return if the cluster is converged
*/
public boolean shiftToMean(MeanShiftCanopy canopy) {
Vector centroid = canopy.computeCentroid();
- canopy.setConverged(new EuclideanDistanceMeasure().distance(centroid, canopy.getCenter()) < convergenceDelta);
+ canopy
+ .setConverged(new EuclideanDistanceMeasure().distance(centroid, canopy.getCenter()) < convergenceDelta);
canopy.setCenter(centroid);
canopy.setNumPoints(1);
canopy.setPointTotal(centroid.clone());
@@ -129,9 +135,11 @@
/**
* Return if the point is covered by this canopy
- *
- * @param canopy a canopy.
- * @param point a Vector point
+ *
+ * @param canopy
+ * a canopy.
+ * @param point
+ * a Vector point
* @return if the point is covered
*/
boolean covers(MeanShiftCanopy canopy, Vector point) {
@@ -141,8 +149,10 @@
/**
* Return if the point is closely covered by the canopy
*
- * @param canopy a canopy.
- * @param point a Vector point
+ * @param canopy
+ * a canopy.
+ * @param point
+ * a Vector point
* @return if the point is covered
*/
public boolean closelyBound(MeanShiftCanopy canopy, Vector point) {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyConfigKeys.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyConfigKeys.java?rev=909914&r1=909913&r2=909914&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyConfigKeys.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyConfigKeys.java Sat Feb 13 21:07:53 2010
@@ -1,12 +1,12 @@
package org.apache.mahout.clustering.meanshift;
public interface MeanShiftCanopyConfigKeys {
-
+
// keys used by Driver, Mapper, Combiner & Reducer
String DISTANCE_MEASURE_KEY = "org.apache.mahout.clustering.canopy.measure";
String T1_KEY = "org.apache.mahout.clustering.canopy.t1";
String T2_KEY = "org.apache.mahout.clustering.canopy.t2";
String CONTROL_PATH_KEY = "org.apache.mahout.clustering.control.path";
String CLUSTER_CONVERGENCE_KEY = "org.apache.mahout.clustering.canopy.convergence";
-
+
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java?rev=909914&r1=909913&r2=909914&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java Sat Feb 13 21:07:53 2010
@@ -17,6 +17,8 @@
package org.apache.mahout.clustering.meanshift;
+import java.io.IOException;
+
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
@@ -39,43 +41,38 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.io.IOException;
-
-public class MeanShiftCanopyDriver {
-
- private static final Logger log = LoggerFactory
- .getLogger(MeanShiftCanopyDriver.class);
-
- private MeanShiftCanopyDriver() {
- }
-
+public final class MeanShiftCanopyDriver {
+
+ private static final Logger log = LoggerFactory.getLogger(MeanShiftCanopyDriver.class);
+
+ private MeanShiftCanopyDriver() { }
+
public static void main(String[] args) {
DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
ArgumentBuilder abuilder = new ArgumentBuilder();
GroupBuilder gbuilder = new GroupBuilder();
-
+
Option inputOpt = DefaultOptionCreator.inputOption().create();
Option outputOpt = DefaultOptionCreator.outputOption().create();
Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption().create();
Option helpOpt = DefaultOptionCreator.helpOption();
-
- Option modelOpt = obuilder.withLongName("distanceClass").withRequired(true).withShortName("d").
- withArgument(abuilder.withName("distanceClass").withMinimum(1).withMaximum(1).create()).
- withDescription("The distance measure class name.").create();
-
-
- Option threshold1Opt = obuilder.withLongName("threshold_1").withRequired(true).withShortName("t1").
- withArgument(abuilder.withName("threshold_1").withMinimum(1).withMaximum(1).create()).
- withDescription("The T1 distance threshold.").create();
-
- Option threshold2Opt = obuilder.withLongName("threshold_2").withRequired(true).withShortName("t2").
- withArgument(abuilder.withName("threshold_2").withMinimum(1).withMaximum(1).create()).
- withDescription("The T1 distance threshold.").create();
-
- Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(modelOpt).
- withOption(helpOpt).withOption(convergenceDeltaOpt).withOption(threshold1Opt).
- withOption(threshold2Opt).create();
-
+
+ Option modelOpt = obuilder.withLongName("distanceClass").withRequired(true).withShortName("d")
+ .withArgument(abuilder.withName("distanceClass").withMinimum(1).withMaximum(1).create())
+ .withDescription("The distance measure class name.").create();
+
+ Option threshold1Opt = obuilder.withLongName("threshold_1").withRequired(true).withShortName("t1")
+ .withArgument(abuilder.withName("threshold_1").withMinimum(1).withMaximum(1).create())
+ .withDescription("The T1 distance threshold.").create();
+
+ Option threshold2Opt = obuilder.withLongName("threshold_2").withRequired(true).withShortName("t2")
+ .withArgument(abuilder.withName("threshold_2").withMinimum(1).withMaximum(1).create())
+ .withDescription("The T1 distance threshold.").create();
+
+ Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt)
+ .withOption(modelOpt).withOption(helpOpt).withOption(convergenceDeltaOpt).withOption(threshold1Opt)
+ .withOption(threshold2Opt).create();
+
try {
Parser parser = new Parser();
parser.setGroup(group);
@@ -84,45 +81,57 @@
CommandLineUtil.printHelp(group);
return;
}
-
+
String input = cmdLine.getValue(inputOpt).toString();
String output = cmdLine.getValue(outputOpt).toString();
String measureClassName = cmdLine.getValue(modelOpt).toString();
double t1 = Double.parseDouble(cmdLine.getValue(threshold1Opt).toString());
double t2 = Double.parseDouble(cmdLine.getValue(threshold2Opt).toString());
double convergenceDelta = Double.parseDouble(cmdLine.getValue(convergenceDeltaOpt).toString());
- runJob(input, output, output + MeanShiftCanopyConfigKeys.CONTROL_PATH_KEY,
+ MeanShiftCanopyDriver.runJob(input, output, output + MeanShiftCanopyConfigKeys.CONTROL_PATH_KEY,
measureClassName, t1, t2, convergenceDelta);
} catch (OptionException e) {
- log.error("Exception parsing command line: ", e);
+ MeanShiftCanopyDriver.log.error("Exception parsing command line: ", e);
CommandLineUtil.printHelp(group);
}
}
-
+
/**
* Run the job
- *
- * @param input the input pathname String
- * @param output the output pathname String
- * @param control the control path
- * @param measureClassName the DistanceMeasure class name
- * @param t1 the T1 distance threshold
- * @param t2 the T2 distance threshold
- * @param convergenceDelta the double convergence criteria
+ *
+ * @param input
+ * the input pathname String
+ * @param output
+ * the output pathname String
+ * @param control
+ * the control path
+ * @param measureClassName
+ * the DistanceMeasure class name
+ * @param t1
+ * the T1 distance threshold
+ * @param t2
+ * the T2 distance threshold
+ * @param convergenceDelta
+ * the double convergence criteria
*/
- public static void runJob(String input, String output, String control,
- String measureClassName, double t1, double t2, double convergenceDelta) {
-
+ public static void runJob(String input,
+ String output,
+ String control,
+ String measureClassName,
+ double t1,
+ double t2,
+ double convergenceDelta) {
+
Configurable client = new JobClient();
JobConf conf = new JobConf(MeanShiftCanopyDriver.class);
-
+
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(MeanShiftCanopy.class);
-
+
FileInputFormat.setInputPaths(conf, new Path(input));
Path outPath = new Path(output);
FileOutputFormat.setOutputPath(conf, outPath);
-
+
conf.setMapperClass(MeanShiftCanopyMapper.class);
conf.setReducerClass(MeanShiftCanopyReducer.class);
conf.setNumReduceTasks(1);
@@ -133,12 +142,12 @@
conf.set(MeanShiftCanopyConfigKeys.T1_KEY, String.valueOf(t1));
conf.set(MeanShiftCanopyConfigKeys.T2_KEY, String.valueOf(t2));
conf.set(MeanShiftCanopyConfigKeys.CONTROL_PATH_KEY, control);
-
+
client.setConf(conf);
try {
JobClient.runJob(conf);
} catch (IOException e) {
- log.warn(e.toString(), e);
+ MeanShiftCanopyDriver.log.warn(e.toString(), e);
}
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyJob.java?rev=909914&r1=909913&r2=909914&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyJob.java Sat Feb 13 21:07:53 2010
@@ -17,6 +17,8 @@
package org.apache.mahout.clustering.meanshift;
+import java.io.IOException;
+
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
@@ -34,46 +36,41 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.io.IOException;
-
public class MeanShiftCanopyJob {
-
+
protected static final String CONTROL_CONVERGED = "/control/converged";
-
- private static final Logger log = LoggerFactory
- .getLogger(MeanShiftCanopyJob.class);
-
- private MeanShiftCanopyJob() {
- }
-
+
+ private static final Logger log = LoggerFactory.getLogger(MeanShiftCanopyJob.class);
+
+ private MeanShiftCanopyJob() { }
+
public static void main(String[] args) throws IOException {
DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
ArgumentBuilder abuilder = new ArgumentBuilder();
GroupBuilder gbuilder = new GroupBuilder();
-
+
Option inputOpt = DefaultOptionCreator.inputOption().create();
Option outputOpt = DefaultOptionCreator.outputOption().create();
Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption().create();
Option maxIterOpt = DefaultOptionCreator.maxIterOption().create();
Option helpOpt = DefaultOptionCreator.helpOption();
-
- Option modelOpt = obuilder.withLongName("distanceClass").withRequired(true).withShortName("d").
- withArgument(abuilder.withName("distanceClass").withMinimum(1).withMaximum(1).create()).
- withDescription("The distance measure class name.").create();
-
-
- Option threshold1Opt = obuilder.withLongName("threshold_1").withRequired(true).withShortName("t1").
- withArgument(abuilder.withName("threshold_1").withMinimum(1).withMaximum(1).create()).
- withDescription("The T1 distance threshold.").create();
-
- Option threshold2Opt = obuilder.withLongName("threshold_2").withRequired(true).withShortName("t2").
- withArgument(abuilder.withName("threshold_2").withMinimum(1).withMaximum(1).create()).
- withDescription("The T1 distance threshold.").create();
-
- Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(modelOpt).
- withOption(helpOpt).withOption(convergenceDeltaOpt).withOption(threshold1Opt).withOption(maxIterOpt).
- withOption(threshold2Opt).create();
-
+
+ Option modelOpt = obuilder.withLongName("distanceClass").withRequired(true).withShortName("d")
+ .withArgument(abuilder.withName("distanceClass").withMinimum(1).withMaximum(1).create())
+ .withDescription("The distance measure class name.").create();
+
+ Option threshold1Opt = obuilder.withLongName("threshold_1").withRequired(true).withShortName("t1")
+ .withArgument(abuilder.withName("threshold_1").withMinimum(1).withMaximum(1).create())
+ .withDescription("The T1 distance threshold.").create();
+
+ Option threshold2Opt = obuilder.withLongName("threshold_2").withRequired(true).withShortName("t2")
+ .withArgument(abuilder.withName("threshold_2").withMinimum(1).withMaximum(1).create())
+ .withDescription("The T1 distance threshold.").create();
+
+ Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt)
+ .withOption(modelOpt).withOption(helpOpt).withOption(convergenceDeltaOpt).withOption(threshold1Opt)
+ .withOption(maxIterOpt).withOption(threshold2Opt).create();
+
try {
Parser parser = new Parser();
parser.setGroup(group);
@@ -82,7 +79,7 @@
CommandLineUtil.printHelp(group);
return;
}
-
+
String input = cmdLine.getValue(inputOpt).toString();
String output = cmdLine.getValue(outputOpt).toString();
String measureClassName = cmdLine.getValue(modelOpt).toString();
@@ -90,27 +87,37 @@
double t2 = Double.parseDouble(cmdLine.getValue(threshold2Opt).toString());
double convergenceDelta = Double.parseDouble(cmdLine.getValue(convergenceDeltaOpt).toString());
int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterOpt).toString());
- runJob(input, output, measureClassName, t1, t2, convergenceDelta,
- maxIterations);
+ MeanShiftCanopyJob.runJob(input, output, measureClassName, t1, t2, convergenceDelta, maxIterations);
} catch (OptionException e) {
- log.error("Exception parsing command line: ", e);
+ MeanShiftCanopyJob.log.error("Exception parsing command line: ", e);
CommandLineUtil.printHelp(group);
}
}
-
+
/**
* Run the job
- *
- * @param input the input pathname String
- * @param output the output pathname String
- * @param measureClassName the DistanceMeasure class name
- * @param t1 the T1 distance threshold
- * @param t2 the T2 distance threshold
- * @param convergenceDelta the double convergence criteria
- * @param maxIterations an int number of iterations
+ *
+ * @param input
+ * the input pathname String
+ * @param output
+ * the output pathname String
+ * @param measureClassName
+ * the DistanceMeasure class name
+ * @param t1
+ * the T1 distance threshold
+ * @param t2
+ * the T2 distance threshold
+ * @param convergenceDelta
+ * the double convergence criteria
+ * @param maxIterations
+ * an int number of iterations
*/
- public static void runJob(String input, String output,
- String measureClassName, double t1, double t2, double convergenceDelta,
+ public static void runJob(String input,
+ String output,
+ String measureClassName,
+ double t1,
+ double t2,
+ double convergenceDelta,
int maxIterations) throws IOException {
// delete the output directory
Configuration conf = new JobConf(MeanShiftCanopyDriver.class);
@@ -124,18 +131,18 @@
boolean converged = false;
int iteration = 0;
String clustersIn = input;
- while (!converged && iteration < maxIterations) {
- log.info("Iteration {}", iteration);
+ while (!converged && (iteration < maxIterations)) {
+ MeanShiftCanopyJob.log.info("Iteration {}", iteration);
// point the output to a new directory per iteration
String clustersOut = output + "/canopies-" + iteration;
- String controlOut = output + CONTROL_CONVERGED;
- MeanShiftCanopyDriver.runJob(clustersIn, clustersOut, controlOut,
- measureClassName, t1, t2, convergenceDelta);
+ String controlOut = output + MeanShiftCanopyJob.CONTROL_CONVERGED;
+ MeanShiftCanopyDriver.runJob(clustersIn, clustersOut, controlOut, measureClassName, t1, t2,
+ convergenceDelta);
converged = FileSystem.get(conf).exists(new Path(controlOut));
// now point the input to the old output directory
clustersIn = output + "/canopies-" + iteration;
iteration++;
}
}
-
+
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyMapper.java?rev=909914&r1=909913&r2=909914&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyMapper.java Sat Feb 13 21:07:53 2010
@@ -17,6 +17,10 @@
package org.apache.mahout.clustering.meanshift;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.JobConf;
@@ -25,26 +29,23 @@
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
public class MeanShiftCanopyMapper extends MapReduceBase implements
- Mapper<WritableComparable<?>, MeanShiftCanopy, Text, MeanShiftCanopy> {
-
+ Mapper<WritableComparable<?>,MeanShiftCanopy,Text,MeanShiftCanopy> {
+
private final List<MeanShiftCanopy> canopies = new ArrayList<MeanShiftCanopy>();
private MeanShiftCanopyClusterer clusterer;
- private OutputCollector<Text, MeanShiftCanopy> output;
-
+ private OutputCollector<Text,MeanShiftCanopy> output;
+
@Override
- public void map(WritableComparable<?> key, MeanShiftCanopy canopy,
- OutputCollector<Text, MeanShiftCanopy> output, Reporter reporter)
- throws IOException {
+ public void map(WritableComparable<?> key,
+ MeanShiftCanopy canopy,
+ OutputCollector<Text,MeanShiftCanopy> output,
+ Reporter reporter) throws IOException {
this.output = output;
clusterer.mergeCanopy(canopy.shallowCopy(), canopies);
}
-
+
@Override
public void close() throws IOException {
for (MeanShiftCanopy canopy : canopies) {
@@ -53,11 +54,11 @@
}
super.close();
}
-
+
@Override
public void configure(JobConf job) {
super.configure(job);
clusterer = new MeanShiftCanopyClusterer(job);
}
-
+
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java?rev=909914&r1=909913&r2=909914&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java Sat Feb 13 21:07:53 2010
@@ -17,6 +17,11 @@
package org.apache.mahout.clustering.meanshift;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
@@ -26,30 +31,26 @@
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
public class MeanShiftCanopyReducer extends MapReduceBase implements
- Reducer<Text, MeanShiftCanopy, Text, MeanShiftCanopy> {
-
+ Reducer<Text,MeanShiftCanopy,Text,MeanShiftCanopy> {
+
private final List<MeanShiftCanopy> canopies = new ArrayList<MeanShiftCanopy>();
private MeanShiftCanopyClusterer clusterer;
private boolean allConverged = true;
-
+
private JobConf conf;
-
+
@Override
- public void reduce(Text key, Iterator<MeanShiftCanopy> values,
- OutputCollector<Text, MeanShiftCanopy> output, Reporter reporter)
- throws IOException {
-
+ public void reduce(Text key,
+ Iterator<MeanShiftCanopy> values,
+ OutputCollector<Text,MeanShiftCanopy> output,
+ Reporter reporter) throws IOException {
+
while (values.hasNext()) {
MeanShiftCanopy canopy = values.next();
clusterer.mergeCanopy(canopy.shallowCopy(), canopies);
}
-
+
for (MeanShiftCanopy canopy : canopies) {
boolean converged = clusterer.shiftToMean(canopy);
if (converged) {
@@ -58,16 +59,16 @@
allConverged = converged && allConverged;
output.collect(new Text(canopy.getIdentifier()), canopy);
}
-
+
}
-
+
@Override
public void configure(JobConf job) {
super.configure(job);
this.conf = job;
clusterer = new MeanShiftCanopyClusterer(job);
}
-
+
@Override
public void close() throws IOException {
if (allConverged) {
@@ -76,5 +77,5 @@
}
super.close();
}
-
+
}
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java?rev=909914&r1=909913&r2=909914&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java Sat Feb 13 21:07:53 2010
@@ -26,6 +26,7 @@
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.lib.IdentityReducer;
import org.apache.mahout.clustering.ClusteringTestUtils;
+import org.apache.mahout.common.DummyReporter;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.math.RandomAccessSparseVector;
import org.apache.mahout.math.Vector;
@@ -254,7 +255,7 @@
List<Canopy> canopies = new ArrayList<Canopy>();
for (VectorWritable point : points) {
- clusterer.addPointToCanopies(point.get(), canopies);
+ clusterer.addPointToCanopies(point.get(), canopies, new DummyReporter());
}
System.out.println("testIterativeManhattan");
@@ -269,7 +270,7 @@
List<Canopy> canopies = new ArrayList<Canopy>();
for (VectorWritable point : points) {
- clusterer.addPointToCanopies(point.get(), canopies);
+ clusterer.addPointToCanopies(point.get(), canopies, new DummyReporter());
}
System.out.println("testIterativeEuclidean");