You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by je...@apache.org on 2009/06/11 05:45:27 UTC

svn commit: r783617 - in /lucene/mahout/trunk/core/src: main/java/org/apache/mahout/clustering/canopy/ test/java/org/apache/mahout/clustering/canopy/

Author: jeastman
Date: Thu Jun 11 03:45:26 2009
New Revision: 783617

URL: http://svn.apache.org/viewvc?rev=783617&view=rev
Log:
- removed CanopyCombiner
- moved combiner semantics into CanopyMapper and output canopies at close()
- dropped combiner from Canopy Driver
- updated unit tests to account for no combiner
- unit tests and synthetic control run

Removed:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyCombiner.java
Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyMapper.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java?rev=783617&r1=783616&r2=783617&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java Thu Jun 11 03:45:26 2009
@@ -68,7 +68,6 @@
     FileOutputFormat.setOutputPath(conf, outPath);
 
     conf.setMapperClass(CanopyMapper.class);
-    conf.setCombinerClass(CanopyCombiner.class);
     conf.setReducerClass(CanopyReducer.class);
     conf.setNumReduceTasks(1);
     conf.setOutputFormat(SequenceFileOutputFormat.class);

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyMapper.java?rev=783617&r1=783616&r2=783617&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyMapper.java Thu Jun 11 03:45:26 2009
@@ -32,15 +32,18 @@
 import java.util.List;
 
 public class CanopyMapper extends MapReduceBase implements
-        Mapper<WritableComparable<?>, Text, Text, Text> {
+    Mapper<WritableComparable<?>, Text, Text, Text> {
 
   private final List<Canopy> canopies = new ArrayList<Canopy>();
 
+  private OutputCollector<Text, Text> outputCollector;
+
   @Override
   public void map(WritableComparable<?> key, Text values,
-                  OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
+      OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
+    outputCollector = output;
     Vector point = AbstractVector.decodeVector(values.toString());
-    Canopy.emitPointToNewCanopies(point, canopies, output);
+    Canopy.addPointToCanopies(point, canopies);
   }
 
   @Override
@@ -49,4 +52,15 @@
     Canopy.configure(job);
   }
 
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.mapred.MapReduceBase#close()
+   */
+  @Override
+  public void close() throws IOException {
+    for (Canopy canopy : canopies)
+      outputCollector.collect(new Text("centroid"), new Text(canopy
+          .computeCentroid().asFormatString()));
+    super.close();
+  }
+
 }

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java?rev=783617&r1=783616&r2=783617&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java Thu Jun 11 03:45:26 2009
@@ -347,21 +347,14 @@
    */
   public void testCanopyMapperManhattan() throws Exception {
     CanopyMapper mapper = new CanopyMapper();
-    CanopyCombiner combiner = new CanopyCombiner();
     DummyOutputCollector<Text,Text> collector = new DummyOutputCollector<Text,Text>();
     Canopy.config(manhattanDistanceMeasure, (3.1), (2.1));
     List<Vector> points = getPoints(raw);
     // map the data
     for (Vector point : points)
       mapper.map(new Text(), new Text(point.asFormatString()), collector, null);
-    assertEquals("Number of map results", 3, collector.getData().size());
-    // now combine the mapper output
-    Canopy.config(manhattanDistanceMeasure, (3.1), (2.1));
-    Map<String, List<Text>> mapData = collector.getData();
-    collector = new DummyOutputCollector<Text,Text>();
-    for (Map.Entry<String, List<Text>> stringListEntry : mapData.entrySet())
-      combiner.reduce(new Text(stringListEntry.getKey()), stringListEntry.getValue().iterator(), collector,
-          null);
+    mapper.close();
+    assertEquals("Number of map results", 1, collector.getData().size());
     // now verify the output
     List<Text> data = collector.getValue("centroid");
     assertEquals("Number of centroids", 3, data.size());
@@ -380,21 +373,14 @@
    */
   public void testCanopyMapperEuclidean() throws Exception {
     CanopyMapper mapper = new CanopyMapper();
-    CanopyCombiner combiner = new CanopyCombiner();
     DummyOutputCollector<Text,Text> collector = new DummyOutputCollector<Text,Text>();
     Canopy.config(euclideanDistanceMeasure, (3.1), (2.1));
     List<Vector> points = getPoints(raw);
     // map the data
     for (Vector point : points)
       mapper.map(new Text(), new Text(point.asFormatString()), collector, null);
-    assertEquals("Number of map results", 3, collector.getData().size());
-    // now combine the mapper output
-    Canopy.config(euclideanDistanceMeasure, (3.1), (2.1));
-    Map<String, List<Text>> mapData = collector.getData();
-    collector = new DummyOutputCollector<Text,Text>();
-    for (Map.Entry<String, List<Text>> stringListEntry : mapData.entrySet())
-      combiner.reduce(new Text(stringListEntry.getKey()), stringListEntry.getValue().iterator(), collector,
-          null);
+    mapper.close();
+    assertEquals("Number of map results", 1, collector.getData().size());
     // now verify the output
     List<Text> data = collector.getValue("centroid");
     assertEquals("Number of centroids", 3, data.size());