You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by je...@apache.org on 2009/06/11 05:45:27 UTC
svn commit: r783617 - in /lucene/mahout/trunk/core/src:
main/java/org/apache/mahout/clustering/canopy/
test/java/org/apache/mahout/clustering/canopy/
Author: jeastman
Date: Thu Jun 11 03:45:26 2009
New Revision: 783617
URL: http://svn.apache.org/viewvc?rev=783617&view=rev
Log:
- removed CanopyCombiner
- moved combiner semantics into CanopyMapper and output canopies at close()
- dropped combiner from Canopy Driver
- updated unit tests to account for no combiner
- unit tests and synthetic control run
Removed:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyCombiner.java
Modified:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyMapper.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java?rev=783617&r1=783616&r2=783617&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java Thu Jun 11 03:45:26 2009
@@ -68,7 +68,6 @@
FileOutputFormat.setOutputPath(conf, outPath);
conf.setMapperClass(CanopyMapper.class);
- conf.setCombinerClass(CanopyCombiner.class);
conf.setReducerClass(CanopyReducer.class);
conf.setNumReduceTasks(1);
conf.setOutputFormat(SequenceFileOutputFormat.class);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyMapper.java?rev=783617&r1=783616&r2=783617&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyMapper.java Thu Jun 11 03:45:26 2009
@@ -32,15 +32,18 @@
import java.util.List;
public class CanopyMapper extends MapReduceBase implements
- Mapper<WritableComparable<?>, Text, Text, Text> {
+ Mapper<WritableComparable<?>, Text, Text, Text> {
private final List<Canopy> canopies = new ArrayList<Canopy>();
+ private OutputCollector<Text, Text> outputCollector;
+
@Override
public void map(WritableComparable<?> key, Text values,
- OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
+ OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
+ outputCollector = output;
Vector point = AbstractVector.decodeVector(values.toString());
- Canopy.emitPointToNewCanopies(point, canopies, output);
+ Canopy.addPointToCanopies(point, canopies);
}
@Override
@@ -49,4 +52,15 @@
Canopy.configure(job);
}
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.mapred.MapReduceBase#close()
+ */
+ @Override
+ public void close() throws IOException {
+ for (Canopy canopy : canopies)
+ outputCollector.collect(new Text("centroid"), new Text(canopy
+ .computeCentroid().asFormatString()));
+ super.close();
+ }
+
}
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java?rev=783617&r1=783616&r2=783617&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java Thu Jun 11 03:45:26 2009
@@ -347,21 +347,14 @@
*/
public void testCanopyMapperManhattan() throws Exception {
CanopyMapper mapper = new CanopyMapper();
- CanopyCombiner combiner = new CanopyCombiner();
DummyOutputCollector<Text,Text> collector = new DummyOutputCollector<Text,Text>();
Canopy.config(manhattanDistanceMeasure, (3.1), (2.1));
List<Vector> points = getPoints(raw);
// map the data
for (Vector point : points)
mapper.map(new Text(), new Text(point.asFormatString()), collector, null);
- assertEquals("Number of map results", 3, collector.getData().size());
- // now combine the mapper output
- Canopy.config(manhattanDistanceMeasure, (3.1), (2.1));
- Map<String, List<Text>> mapData = collector.getData();
- collector = new DummyOutputCollector<Text,Text>();
- for (Map.Entry<String, List<Text>> stringListEntry : mapData.entrySet())
- combiner.reduce(new Text(stringListEntry.getKey()), stringListEntry.getValue().iterator(), collector,
- null);
+ mapper.close();
+ assertEquals("Number of map results", 1, collector.getData().size());
// now verify the output
List<Text> data = collector.getValue("centroid");
assertEquals("Number of centroids", 3, data.size());
@@ -380,21 +373,14 @@
*/
public void testCanopyMapperEuclidean() throws Exception {
CanopyMapper mapper = new CanopyMapper();
- CanopyCombiner combiner = new CanopyCombiner();
DummyOutputCollector<Text,Text> collector = new DummyOutputCollector<Text,Text>();
Canopy.config(euclideanDistanceMeasure, (3.1), (2.1));
List<Vector> points = getPoints(raw);
// map the data
for (Vector point : points)
mapper.map(new Text(), new Text(point.asFormatString()), collector, null);
- assertEquals("Number of map results", 3, collector.getData().size());
- // now combine the mapper output
- Canopy.config(euclideanDistanceMeasure, (3.1), (2.1));
- Map<String, List<Text>> mapData = collector.getData();
- collector = new DummyOutputCollector<Text,Text>();
- for (Map.Entry<String, List<Text>> stringListEntry : mapData.entrySet())
- combiner.reduce(new Text(stringListEntry.getKey()), stringListEntry.getValue().iterator(), collector,
- null);
+ mapper.close();
+ assertEquals("Number of map results", 1, collector.getData().size());
// now verify the output
List<Text> data = collector.getValue("centroid");
assertEquals("Number of centroids", 3, data.size());