You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by je...@apache.org on 2010/07/10 03:25:32 UTC

svn commit: r962739 - in /mahout/trunk/examples/src/main/java/org/apache/mahout/clustering: canopy/ dirichlet/ display/ fuzzykmeans/ kmeans/ meanshift/

Author: jeastman
Date: Sat Jul 10 01:25:32 2010
New Revision: 962739

URL: http://svn.apache.org/viewvc?rev=962739&view=rev
Log:
More refactoring to clean up the clustering display examples

Added:
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayCanopy.java
      - copied, changed from r962612, mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/canopy/DisplayCanopy.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java
      - copied, changed from r962612, mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayClustering.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java
      - copied, changed from r962612, mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/fuzzykmeans/DisplayFuzzyKMeans.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java
      - copied, changed from r962612, mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/kmeans/DisplayKMeans.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMeanShift.java
      - copied, changed from r962612, mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/meanshift/DisplayMeanShift.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/README.txt
      - copied, changed from r962612, mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/README.txt
Removed:
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/canopy/
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/fuzzykmeans/
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/kmeans/
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/meanshift/

Copied: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayCanopy.java (from r962612, mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/canopy/DisplayCanopy.java)
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayCanopy.java?p2=mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayCanopy.java&p1=mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/canopy/DisplayCanopy.java&r1=962612&r2=962739&rev=962739&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/canopy/DisplayCanopy.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayCanopy.java Sat Jul 10 01:25:32 2010
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.mahout.clustering.canopy;
+package org.apache.mahout.clustering.display;
 
 import java.awt.BasicStroke;
 import java.awt.Graphics;
@@ -23,8 +23,8 @@ import java.awt.Graphics2D;
 import java.util.ArrayList;
 import java.util.List;
 
-import org.apache.mahout.clustering.dirichlet.DisplayClustering;
-import org.apache.mahout.clustering.dirichlet.models.NormalModelDistribution;
+import org.apache.mahout.clustering.canopy.Canopy;
+import org.apache.mahout.clustering.canopy.CanopyClusterer;
 import org.apache.mahout.common.RandomUtils;
 import org.apache.mahout.common.distance.ManhattanDistanceMeasure;
 import org.apache.mahout.math.DenseVector;
@@ -48,8 +48,8 @@ class DisplayCanopy extends DisplayClust
 
   @Override
   public void paint(Graphics g) {
-    super.plotSampleData(g);
     Graphics2D g2 = (Graphics2D) g;
+    plotSampleData(g2);
     Vector dv = new DenseVector(2);
     for (Canopy canopy : canopies) {
       if (canopy.getNumPoints() > DisplayClustering.SAMPLE_DATA.size() * SIGNIFICANCE) {
@@ -68,7 +68,6 @@ class DisplayCanopy extends DisplayClust
 
   public static void main(String[] args) {
     RandomUtils.useTestSeed();
-    SIGNIFICANCE = 0.1;
     DisplayClustering.generateSamples();
     List<Vector> points = new ArrayList<Vector>();
     for (VectorWritable sample : SAMPLE_DATA) {
@@ -78,8 +77,4 @@ class DisplayCanopy extends DisplayClust
     CanopyClusterer.updateCentroids(canopies);
     new DisplayCanopy();
   }
-
-  static void generateResults() {
-    DisplayClustering.generateResults(new NormalModelDistribution());
-  }
 }

Copied: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java (from r962612, mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayClustering.java)
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java?p2=mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java&p1=mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayClustering.java&r1=962612&r2=962739&rev=962739&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayClustering.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java Sat Jul 10 01:25:32 2010
@@ -15,8 +15,9 @@
  * limitations under the License.
  */
 
-package org.apache.mahout.clustering.dirichlet;
+package org.apache.mahout.clustering.display;
 
+import java.awt.BasicStroke;
 import java.awt.Color;
 import java.awt.Frame;
 import java.awt.Graphics;
@@ -30,8 +31,8 @@ import java.awt.geom.Rectangle2D;
 import java.util.ArrayList;
 import java.util.List;
 
-import org.apache.mahout.clustering.dirichlet.models.Model;
-import org.apache.mahout.clustering.dirichlet.models.ModelDistribution;
+import org.apache.mahout.clustering.Cluster;
+import org.apache.mahout.clustering.dirichlet.UncommonDistributions;
 import org.apache.mahout.common.RandomUtils;
 import org.apache.mahout.math.DenseVector;
 import org.apache.mahout.math.Vector;
@@ -43,31 +44,26 @@ public class DisplayClustering extends F
 
   private static final Logger log = LoggerFactory.getLogger(DisplayClustering.class);
 
-  private static final List<Vector> SAMPLE_PARAMS = new ArrayList<Vector>();
-
   protected static final int DS = 72; // default scale = 72 pixels per inch
 
   protected static final int SIZE = 8; // screen size in inches
 
+  private static final List<Vector> SAMPLE_PARAMS = new ArrayList<Vector>();
+
   protected static final List<VectorWritable> SAMPLE_DATA = new ArrayList<VectorWritable>();
 
+  protected static final List<List<Cluster>> CLUSTERS = new ArrayList<List<Cluster>>();
+
   protected static double SIGNIFICANCE = 0.05;
 
   protected static final Color[] COLORS = { Color.red, Color.orange, Color.yellow, Color.green, Color.blue, Color.magenta,
       Color.lightGray };
 
-  protected static List<Model<VectorWritable>[]> result;
-
-  protected int res; // screen resolution
-
-  protected static double ALPHA_0 = 1.0;
-
-  protected static int k = 12;
-
-  protected static int numIterations = 20;
+  protected static int res; // screen resolution
 
   public DisplayClustering() {
     initialize();
+    this.setTitle("Sample Data");
   }
 
   public void initialize() {
@@ -90,7 +86,7 @@ public class DisplayClustering extends F
 
   public static void main(String[] args) throws Exception {
     RandomUtils.useTestSeed();
-    generate2dSamples();
+    generateSamples();
     new DisplayClustering();
   }
 
@@ -98,13 +94,27 @@ public class DisplayClustering extends F
   @Override
   public void paint(Graphics g) {
     Graphics2D g2 = (Graphics2D) g;
-    plotSampleData(g);
+    plotSampleData(g2);
+    plotSampleParameters(g2);
+    plotClusters(g2);
+  }
+
+  protected void plotClusters(Graphics2D g2) {
+    int cx = CLUSTERS.size() - 1;
+    for (List<Cluster> clusters : CLUSTERS) {
+      g2.setStroke(new BasicStroke(cx == 0 ? 3 : 1));
+      g2.setColor(COLORS[Math.min(DisplayClustering.COLORS.length - 1, cx--)]);
+      for (Cluster cluster : clusters) {
+        plotEllipse(g2, cluster.getCenter(), cluster.getRadius().times(3));
+      }
+    }
+  }
+
+  protected void plotSampleParameters(Graphics2D g2) {
     Vector v = new DenseVector(2);
     Vector dv = new DenseVector(2);
     g2.setColor(Color.RED);
-    int i = 0;
     for (Vector param : SAMPLE_PARAMS) {
-      i++;
       v.set(0, param.get(0));
       v.set(1, param.get(1));
       dv.set(0, param.get(2) * 3);
@@ -113,8 +123,7 @@ public class DisplayClustering extends F
     }
   }
 
-  public void plotSampleData(Graphics g) {
-    Graphics2D g2 = (Graphics2D) g;
+  protected void plotSampleData(Graphics2D g2) {
     double sx = (double) res / DS;
     g2.setTransform(AffineTransform.getScaleInstance(sx, sx));
 
@@ -142,7 +151,7 @@ public class DisplayClustering extends F
    * @param dv
    *          a Vector of rectangle dimensions
    */
-  public static void plotRectangle(Graphics2D g2, Vector v, Vector dv) {
+  protected static void plotRectangle(Graphics2D g2, Vector v, Vector dv) {
     double[] flip = { 1, -1 };
     Vector v2 = v.times(new DenseVector(flip));
     v2 = v2.minus(dv.divide(2));
@@ -162,7 +171,7 @@ public class DisplayClustering extends F
    * @param dv
    *          a Vector of ellipse dimensions
    */
-  public static void plotEllipse(Graphics2D g2, Vector v, Vector dv) {
+  protected static void plotEllipse(Graphics2D g2, Vector v, Vector dv) {
     double[] flip = { 1, -1 };
     Vector v2 = v.times(new DenseVector(flip));
     v2 = v2.minus(dv.divide(2));
@@ -172,30 +181,13 @@ public class DisplayClustering extends F
     g2.draw(new Ellipse2D.Double(x * DS, y * DS, dv.get(0) * DS, dv.get(1) * DS));
   }
 
-  private static void printModels(List<Model<VectorWritable>[]> results, int significant) {
-    int row = 0;
-    StringBuilder models = new StringBuilder();
-    for (Model<VectorWritable>[] r : results) {
-      models.append("sample[").append(row++).append("]= ");
-      for (int k = 0; k < r.length; k++) {
-        Model<VectorWritable> model = r[k];
-        if (model.count() > significant) {
-          models.append('m').append(k).append(model).append(", ");
-        }
-      }
-      models.append('\n');
-    }
-    models.append('\n');
-    log.info(models.toString());
-  }
-
-  public static void generateSamples() {
+  protected static void generateSamples() {
     generateSamples(500, 1, 1, 3);
     generateSamples(300, 1, 0, 0.5);
     generateSamples(300, 0, 2, 0.1);
   }
 
-  public static void generate2dSamples() {
+  protected static void generate2dSamples() {
     generate2dSamples(500, 1, 1, 3, 1);
     generate2dSamples(300, 1, 0, 0.5, 1);
     generate2dSamples(300, 0, 2, 0.1, 0.5);
@@ -213,7 +205,7 @@ public class DisplayClustering extends F
    * @param sd
    *          double standard deviation of the samples
    */
-  private static void generateSamples(int num, double mx, double my, double sd) {
+  protected static void generateSamples(int num, double mx, double my, double sd) {
     double[] params = { mx, my, sd, sd };
     SAMPLE_PARAMS.add(new DenseVector(params));
     log.info("Generating {} samples m=[{}, {}] sd={}", new Object[] { num, mx, my, sd });
@@ -237,7 +229,7 @@ public class DisplayClustering extends F
    * @param sdy
    *          double y-value standard deviation of the samples
    */
-  private static void generate2dSamples(int num, double mx, double my, double sdx, double sdy) {
+  protected static void generate2dSamples(int num, double mx, double my, double sdx, double sdy) {
     double[] params = { mx, my, sdx, sdy };
     SAMPLE_PARAMS.add(new DenseVector(params));
     log.info("Generating {} samples m=[{}, {}] sd=[{}, {}]", new Object[] { num, mx, my, sdx, sdy });
@@ -247,14 +239,8 @@ public class DisplayClustering extends F
     }
   }
 
-  public static void generateResults(ModelDistribution<VectorWritable> modelDist) {
-    DirichletClusterer<VectorWritable> dc = new DirichletClusterer<VectorWritable>(SAMPLE_DATA, modelDist, ALPHA_0, k, 2, 2);
-    result = dc.cluster(numIterations);
-    printModels(result, 5);
-  }
-
-  public static boolean isSignificant(Model<VectorWritable> model) {
-    return (double) model.count() / SAMPLE_DATA.size() > SIGNIFICANCE;
+  protected static boolean isSignificant(Cluster cluster) {
+    return (double) cluster.getNumPoints() / SAMPLE_DATA.size() > SIGNIFICANCE;
   }
 
 }

Added: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java?rev=962739&view=auto
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java (added)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java Sat Jul 10 01:25:32 2010
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.clustering.display;
+
+import java.awt.Graphics;
+import java.awt.Graphics2D;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.mahout.clustering.Cluster;
+import org.apache.mahout.clustering.dirichlet.DirichletClusterer;
+import org.apache.mahout.clustering.dirichlet.models.AsymmetricSampledNormalDistribution;
+import org.apache.mahout.clustering.dirichlet.models.Model;
+import org.apache.mahout.clustering.dirichlet.models.ModelDistribution;
+import org.apache.mahout.clustering.dirichlet.models.NormalModelDistribution;
+import org.apache.mahout.clustering.dirichlet.models.SampledNormalDistribution;
+import org.apache.mahout.common.RandomUtils;
+import org.apache.mahout.math.DenseVector;
+import org.apache.mahout.math.VectorWritable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class DisplayDirichlet extends DisplayClustering {
+
+  private static final Logger log = LoggerFactory.getLogger(DisplayDirichlet.class);
+
+  public DisplayDirichlet() {
+    initialize();
+    this.setTitle("Dirichlet Process Clusters - Normal Distribution (>" + (int) (SIGNIFICANCE * 100) + "% of population)");
+  }
+
+  protected static void printModels(List<Model<VectorWritable>[]> results, int significant) {
+    int row = 0;
+    StringBuilder models = new StringBuilder();
+    for (Model<VectorWritable>[] r : results) {
+      models.append("sample[").append(row++).append("]= ");
+      for (int k = 0; k < r.length; k++) {
+        Model<VectorWritable> model = r[k];
+        if (model.count() > significant) {
+          models.append('m').append(k).append(model).append(", ");
+        }
+      }
+      models.append('\n');
+    }
+    models.append('\n');
+    log.info(models.toString());
+  }
+
+  protected static void generateResults(ModelDistribution<VectorWritable> modelDist,
+                                        int numClusters,
+                                        int numIterations,
+                                        double alpha_0, int thin, int burnin) {
+    DirichletClusterer<VectorWritable> dc = new DirichletClusterer<VectorWritable>(SAMPLE_DATA,
+                                                                                   modelDist,
+                                                                                   alpha_0,
+                                                                                   numClusters,
+                                                                                   thin,
+                                                                                   burnin);
+    List<Model<VectorWritable>[]> result = dc.cluster(numIterations);
+    printModels(result, burnin);
+    for (Model<VectorWritable>[] models : result) {
+      List<Cluster> clusters = new ArrayList<Cluster>();
+      for (Model<VectorWritable> model : models) {
+        Cluster cluster = (Cluster) model;
+        if (isSignificant(cluster)) {
+          clusters.add(cluster);
+        }
+      }
+      CLUSTERS.add(clusters);
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    VectorWritable modelPrototype = new VectorWritable(new DenseVector(2));
+    ModelDistribution<VectorWritable> modelDist;
+    modelDist = new NormalModelDistribution(modelPrototype);
+    modelDist = new SampledNormalDistribution(modelPrototype);
+    modelDist = new AsymmetricSampledNormalDistribution(modelPrototype);
+    int numIterations = 40;
+    int numClusters = 40;
+    int alpha_0 = 1;
+    int thin = 3;
+    int burnin = 5;
+    
+    RandomUtils.useTestSeed();
+    generate2dSamples();
+    generateResults(modelDist, numClusters, numIterations, alpha_0, thin, burnin);
+    new DisplayDirichlet();
+  }
+
+  // Override the paint() method
+  @Override
+  public void paint(Graphics g) {
+    plotSampleData((Graphics2D) g);
+    plotClusters((Graphics2D) g);
+  }
+
+}

Copied: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java (from r962612, mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/fuzzykmeans/DisplayFuzzyKMeans.java)
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java?p2=mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java&p1=mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/fuzzykmeans/DisplayFuzzyKMeans.java&r1=962612&r2=962739&rev=962739&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/fuzzykmeans/DisplayFuzzyKMeans.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java Sat Jul 10 01:25:32 2010
@@ -15,69 +15,73 @@
  * limitations under the License.
  */
 
-package org.apache.mahout.clustering.fuzzykmeans;
+package org.apache.mahout.clustering.display;
 
-import java.awt.BasicStroke;
 import java.awt.Graphics;
 import java.awt.Graphics2D;
 import java.util.ArrayList;
 import java.util.List;
 
-import org.apache.mahout.clustering.dirichlet.DisplayClustering;
+import org.apache.mahout.clustering.fuzzykmeans.FuzzyKMeansClusterer;
+import org.apache.mahout.clustering.fuzzykmeans.SoftCluster;
 import org.apache.mahout.common.RandomUtils;
 import org.apache.mahout.common.distance.DistanceMeasure;
 import org.apache.mahout.common.distance.ManhattanDistanceMeasure;
-import org.apache.mahout.math.DenseVector;
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
 
 class DisplayFuzzyKMeans extends DisplayClustering {
 
-  private static List<List<SoftCluster>> clusters;
-  
   DisplayFuzzyKMeans() {
     initialize();
-    this.setTitle("Fuzzy K-Means Clusters (> 5% of population)");
+    this.setTitle("Fuzzy k-Means Clusters (>" + (int) (SIGNIFICANCE * 100) + "% of population)");
   }
-  
+
+  // Override the paint() method
   @Override
   public void paint(Graphics g) {
-    plotSampleData(g);
-    Graphics2D g2 = (Graphics2D) g;
-    Vector dv = new DenseVector(2);
-    int i = DisplayFuzzyKMeans.clusters.size() - 1;
-    for (List<SoftCluster> cls : clusters) {
-      g2.setStroke(new BasicStroke(i == 0 ? 3 : 1));
-      g2.setColor(COLORS[Math.min(DisplayClustering.COLORS.length - 1, i--)]);
-      for (SoftCluster cluster : cls) {
-        // if (true || cluster.getWeightedPointTotal().zSum() > sampleData.size() * 0.05) {
-        dv.assign(Math.max(cluster.std(), 0.3) * 3);
-        DisplayClustering.plotEllipse(g2, cluster.getCenter(), dv);
-        // }
-      }
-    }
+    plotSampleData((Graphics2D) g);
+    plotClusters((Graphics2D) g);
   }
-  
+
   public static void main(String[] args) {
+    DistanceMeasure measure = new ManhattanDistanceMeasure();
+    double threshold = 0.001;
+    int numClusters = 3;
+    int numIterations = 10;
+    int m = 3;
+    
     RandomUtils.useTestSeed();
     DisplayClustering.generateSamples();
     List<Vector> points = new ArrayList<Vector>();
     for (VectorWritable sample : SAMPLE_DATA) {
       points.add((Vector) sample.get());
     }
-    DistanceMeasure measure = new ManhattanDistanceMeasure();
+    int id = 0;
     List<SoftCluster> initialClusters = new ArrayList<SoftCluster>();
-    
-    k = 3;
-    int i = 0;
     for (Vector point : points) {
-      if (initialClusters.size() < Math.min(k, points.size())) {
-        initialClusters.add(new SoftCluster(point, i++));
+      if (initialClusters.size() < Math.min(numClusters, points.size())) {
+        initialClusters.add(new SoftCluster(point, id++));
       } else {
         break;
       }
     }
-    clusters = FuzzyKMeansClusterer.clusterPoints(points, initialClusters, measure, 0.001, 3, 10);
+    List<List<SoftCluster>> results = FuzzyKMeansClusterer.clusterPoints(points,
+                                                                         initialClusters,
+                                                                         measure,
+                                                                         threshold,
+                                                                         m,
+                                                                         numIterations);
+    for (List<SoftCluster> models : results) {
+      List<org.apache.mahout.clustering.Cluster> clusters = new ArrayList<org.apache.mahout.clustering.Cluster>();
+      for (SoftCluster cluster : models) {
+        org.apache.mahout.clustering.Cluster cluster2 = (org.apache.mahout.clustering.Cluster) cluster;
+        if (isSignificant(cluster2)) {
+          clusters.add(cluster2);
+        }
+      }
+      CLUSTERS.add(clusters);
+    }
     new DisplayFuzzyKMeans();
   }
 }

Copied: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java (from r962612, mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/kmeans/DisplayKMeans.java)
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java?p2=mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java&p1=mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/kmeans/DisplayKMeans.java&r1=962612&r2=962739&rev=962739&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/kmeans/DisplayKMeans.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java Sat Jul 10 01:25:32 2010
@@ -15,70 +15,71 @@
  * limitations under the License.
  */
 
-package org.apache.mahout.clustering.kmeans;
+package org.apache.mahout.clustering.display;
 
-import java.awt.BasicStroke;
 import java.awt.Graphics;
 import java.awt.Graphics2D;
 import java.util.ArrayList;
 import java.util.List;
 
-import org.apache.mahout.clustering.dirichlet.DisplayClustering;
+import org.apache.mahout.clustering.kmeans.Cluster;
+import org.apache.mahout.clustering.kmeans.KMeansClusterer;
 import org.apache.mahout.common.RandomUtils;
 import org.apache.mahout.common.distance.DistanceMeasure;
 import org.apache.mahout.common.distance.ManhattanDistanceMeasure;
-import org.apache.mahout.math.DenseVector;
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
 
 class DisplayKMeans extends DisplayClustering {
 
-  private static List<List<Cluster>> clusters;
+  static List<List<Cluster>> result;
 
   DisplayKMeans() {
     initialize();
-    this.setTitle("K-Means Clusters (> 5% of population)");
-  }
-
-  @Override
-  public void paint(Graphics g) {
-    super.plotSampleData(g);
-    Graphics2D g2 = (Graphics2D) g;
-    Vector dv = new DenseVector(2);
-    int i = DisplayKMeans.clusters.size() - 1;
-    for (List<Cluster> cls : clusters) {
-      g2.setStroke(new BasicStroke(i == 0 ? 3 : 1));
-      g2.setColor(COLORS[Math.min(DisplayClustering.COLORS.length - 1, i--)]);
-      for (Cluster cluster : cls) {
-        // if (true || cluster.getNumPoints() > sampleData.size() * 0.05) {
-        dv.assign(cluster.getStd() * 3);
-        System.out.println(cluster.getCenter().asFormatString() + ' ' + dv.asFormatString());
-        DisplayClustering.plotEllipse(g2, cluster.getCenter(), dv);
-        // }
-      }
-    }
+    this.setTitle("k-Means Clusters (>" + (int) (SIGNIFICANCE * 100) + "% of population)");
   }
 
   public static void main(String[] args) {
+    DistanceMeasure measure = new ManhattanDistanceMeasure();
+    int numClusters = 3;
+    int maxIter = 10;
+    double distanceThreshold = 0.001;
+    
     RandomUtils.useTestSeed();
     DisplayClustering.generateSamples();
     List<Vector> points = new ArrayList<Vector>();
     for (VectorWritable sample : SAMPLE_DATA) {
       points.add(sample.get());
     }
-    DistanceMeasure measure = new ManhattanDistanceMeasure();
     List<Cluster> initialClusters = new ArrayList<Cluster>();
-    k = 3;
-    int i = 0;
+    int id = 0;
     for (Vector point : points) {
-      if (initialClusters.size() < Math.min(k, points.size())) {
-        initialClusters.add(new Cluster(point, i++));
+      if (initialClusters.size() < Math.min(numClusters, points.size())) {
+        initialClusters.add(new Cluster(point, id++));
       } else {
         break;
       }
     }
-    clusters = KMeansClusterer.clusterPoints(points, initialClusters, measure, 10, 0.001);
-    System.out.println(clusters.size());
+    result = KMeansClusterer.clusterPoints(points, initialClusters, measure, maxIter, distanceThreshold);
+    for (List<Cluster> models : result) {
+      List<org.apache.mahout.clustering.Cluster> clusters = new ArrayList<org.apache.mahout.clustering.Cluster>();
+      for (Cluster cluster : models) {
+        org.apache.mahout.clustering.Cluster cluster2 = (org.apache.mahout.clustering.Cluster) cluster;
+        if (isSignificant(cluster2)) {
+          clusters.add(cluster2);
+        }
+      }
+      CLUSTERS.add(clusters);
+    }
+
+    System.out.println(result.size());
     new DisplayKMeans();
   }
+
+  // Override the paint() method
+  @Override
+  public void paint(Graphics g) {
+    plotSampleData((Graphics2D) g);
+    plotClusters((Graphics2D) g);
+  }
 }

Copied: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMeanShift.java (from r962612, mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/meanshift/DisplayMeanShift.java)
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMeanShift.java?p2=mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMeanShift.java&p1=mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/meanshift/DisplayMeanShift.java&r1=962612&r2=962739&rev=962739&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/meanshift/DisplayMeanShift.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMeanShift.java Sat Jul 10 01:25:32 2010
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.mahout.clustering.meanshift;
+package org.apache.mahout.clustering.display;
 
 import java.awt.Color;
 import java.awt.Graphics;
@@ -24,8 +24,8 @@ import java.awt.geom.AffineTransform;
 import java.util.ArrayList;
 import java.util.List;
 
-import org.apache.mahout.clustering.dirichlet.DisplayClustering;
-import org.apache.mahout.clustering.dirichlet.models.NormalModelDistribution;
+import org.apache.mahout.clustering.meanshift.MeanShiftCanopy;
+import org.apache.mahout.clustering.meanshift.MeanShiftCanopyClusterer;
 import org.apache.mahout.common.RandomUtils;
 import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
 import org.apache.mahout.math.DenseVector;
@@ -46,7 +46,7 @@ final class DisplayMeanShift extends Dis
 
   private DisplayMeanShift() {
     initialize();
-    this.setTitle("MeanShiftCanopy Clusters (> 1.5% of population)");
+    this.setTitle("k-Means Clusters (>" + (int) (SIGNIFICANCE * 100) + "% of population)");
   }
 
   @Override
@@ -71,7 +71,7 @@ final class DisplayMeanShift extends Dis
     }
     int i = 0;
     for (MeanShiftCanopy canopy : canopies) {
-      if (canopy.getBoundPoints().toList().size() > 0.015 * DisplayClustering.SAMPLE_DATA.size()) {
+      if (canopy.getBoundPoints().toList().size() >= SIGNIFICANCE * DisplayClustering.SAMPLE_DATA.size()) {
         g2.setColor(COLORS[Math.min(i++, DisplayClustering.COLORS.length - 1)]);
         int count = 0;
         Vector center = new DenseVector(2);
@@ -89,22 +89,20 @@ final class DisplayMeanShift extends Dis
   }
 
   public static void main(String[] args) {
+    t1 = 1.5;
+    t2 = 0.1;
+    SIGNIFICANCE = 0.02;
+
     RandomUtils.useTestSeed();
     DisplayClustering.generateSamples();
     List<Vector> points = new ArrayList<Vector>();
     for (VectorWritable sample : SAMPLE_DATA) {
       points.add(sample.get());
     }
-    t1 = 1.5;
-    t2 = 0.5;
     canopies = MeanShiftCanopyClusterer.clusterPoints(points, new EuclideanDistanceMeasure(), 0.005, t1, t2, 20);
     for (MeanShiftCanopy canopy : canopies) {
       log.info(canopy.toString());
     }
     new DisplayMeanShift();
   }
-
-  static void generateResults() {
-    DisplayClustering.generateResults(new NormalModelDistribution());
-  }
 }

Copied: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/README.txt (from r962612, mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/README.txt)
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/README.txt?p2=mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/README.txt&p1=mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/README.txt&r1=962612&r2=962739&rev=962739&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/README.txt (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/README.txt Sat Jul 10 01:25:32 2010
@@ -1,16 +1,18 @@
 The following classes can be run without parameters to generate a sample data set and 
-run the reference Dirichlet Process Clustering implementation over them:
+run the reference clustering implementations over them:
 
-DisplayDirichlet - generates 1000 samples from three, symmetric distributions. This is the same 
+DisplayClustering - generates 1000 samples from three, symmetric distributions. This is the same 
     data set that is used by the following clustering programs. It displays the points on a screen
     and superimposes the model parameters that were used to generate the points. You can edit the
     generateSamples() method to change the sample points used by these programs.
-  * DisplayNDirichlet - clusters the above sample points using the NormalModelDistribution
-  * DisplaySNDirichlet - clusters the above sample points using the SampledNormalDistribution
-  * DisplayASNDirichlet - clusters the above sample points using the AsymmetricSampledNormalDistribution
-  * Display2dASNDirichlet - clusters a set of asymmetric sample points (generated by DisplayDirichlet's
-    generate2dSamples() method) using the AsymmetricSampledNormalDistribution.
-  * NOTE: each of these programs displays the sample points and then superimposes all of the clusters
+    
+  * DisplayDirichlet - uses Dirichlet Process clustering
+  * DisplayCanopy - uses Canopy clustering
+  * DisplayKMeans - uses k-Means clustering
+  * DisplayFuzzyKMeans - uses Fuzzy k-Means clustering
+  * DisplayMeanShift - uses MeanShift clustering
+  
+  * NOTE: some of these programs display the sample points and then superimposes all of the clusters
     from each iteration. The last iteration's clusters are in bold red and the previous several are 
     colored (orange, yellow, green, blue, magenta) in order after which all earlier clusters are in
     light grey. This helps to visualize how the clusters converge upon a solution over multiple
@@ -18,10 +20,5 @@ DisplayDirichlet - generates 1000 sample
   * NOTE: by changing the parameter values (k, ALPHA_0, numIterations) and the display SIGNIFICANCE
     you can obtain different results.
     
-DisplayOutputState - this program can be run after any of the SampledNormalDistribution M/R Dirichlet test 
-  cases in TestMapReduce. It draws the points and the resulting clusters from the output directory in 
-  a manner similar to the above. 
-DisplayASNOutputState - similar to above but uses the AsymmetricSampledNormalDistribution.
-
   
     
\ No newline at end of file