You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by je...@apache.org on 2010/07/09 19:14:20 UTC

svn commit: r962612 - in /mahout/trunk/examples/src/main/java/org/apache/mahout/clustering: canopy/ dirichlet/ fuzzykmeans/ kmeans/ meanshift/

Author: jeastman
Date: Fri Jul  9 17:14:20 2010
New Revision: 962612

URL: http://svn.apache.org/viewvc?rev=962612&view=rev
Log:
Minor cleanup to the clustering display examples

Added:
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayClustering.java
      - copied, changed from r961880, mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayDirichlet.java
Removed:
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayDirichlet.java
Modified:
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/canopy/DisplayCanopy.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/Display2dASNDirichlet.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNDirichlet.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNOutputState.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayNDirichlet.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayOutputState.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplaySNDirichlet.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/README.txt
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/fuzzykmeans/DisplayFuzzyKMeans.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/kmeans/DisplayKMeans.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/meanshift/DisplayMeanShift.java

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/canopy/DisplayCanopy.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/canopy/DisplayCanopy.java?rev=962612&r1=962611&r2=962612&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/canopy/DisplayCanopy.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/canopy/DisplayCanopy.java Fri Jul  9 17:14:20 2010
@@ -23,7 +23,7 @@ import java.awt.Graphics2D;
 import java.util.ArrayList;
 import java.util.List;
 
-import org.apache.mahout.clustering.dirichlet.DisplayDirichlet;
+import org.apache.mahout.clustering.dirichlet.DisplayClustering;
 import org.apache.mahout.clustering.dirichlet.models.NormalModelDistribution;
 import org.apache.mahout.common.RandomUtils;
 import org.apache.mahout.common.distance.ManhattanDistanceMeasure;
@@ -31,40 +31,45 @@ import org.apache.mahout.math.DenseVecto
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
 
-class DisplayCanopy extends DisplayDirichlet {
+class DisplayCanopy extends DisplayClustering {
+
+  private static final long serialVersionUID = 1L;
 
   private static List<Canopy> canopies;
+
   private static final double T1 = 3.0;
+
   private static final double T2 = 1.6;
 
   DisplayCanopy() {
     initialize();
-    this.setTitle("Canopy Clusters (> 5% of population)");
+    this.setTitle("Canopy Clusters (>" + (int) (SIGNIFICANCE * 100) + "% of population)");
   }
-  
+
   @Override
   public void paint(Graphics g) {
     super.plotSampleData(g);
     Graphics2D g2 = (Graphics2D) g;
     Vector dv = new DenseVector(2);
     for (Canopy canopy : canopies) {
-      if (canopy.getNumPoints() > DisplayDirichlet.SAMPLE_DATA.size() * 0.05) {
+      if (canopy.getNumPoints() > DisplayClustering.SAMPLE_DATA.size() * SIGNIFICANCE) {
         g2.setStroke(new BasicStroke(2));
         g2.setColor(COLORS[1]);
         dv.assign(T1);
         Vector center = canopy.computeCentroid();
-        DisplayDirichlet.plotEllipse(g2, center, dv);
+        DisplayClustering.plotEllipse(g2, center, dv);
         g2.setStroke(new BasicStroke(3));
         g2.setColor(COLORS[0]);
         dv.assign(T2);
-        DisplayDirichlet.plotEllipse(g2, center, dv);
+        DisplayClustering.plotEllipse(g2, center, dv);
       }
     }
   }
-  
+
   public static void main(String[] args) {
     RandomUtils.useTestSeed();
-    DisplayDirichlet.generateSamples();
+    SIGNIFICANCE = 0.1;
+    DisplayClustering.generateSamples();
     List<Vector> points = new ArrayList<Vector>();
     for (VectorWritable sample : SAMPLE_DATA) {
       points.add(sample.get());
@@ -73,8 +78,8 @@ class DisplayCanopy extends DisplayDiric
     CanopyClusterer.updateCentroids(canopies);
     new DisplayCanopy();
   }
-  
+
   static void generateResults() {
-    DisplayDirichlet.generateResults(new NormalModelDistribution());
+    DisplayClustering.generateResults(new NormalModelDistribution());
   }
 }

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/Display2dASNDirichlet.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/Display2dASNDirichlet.java?rev=962612&r1=962611&r2=962612&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/Display2dASNDirichlet.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/Display2dASNDirichlet.java Fri Jul  9 17:14:20 2010
@@ -29,7 +29,7 @@ import org.apache.mahout.math.DenseVecto
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
 
-class Display2dASNDirichlet extends DisplayDirichlet {
+class Display2dASNDirichlet extends DisplayClustering {
   Display2dASNDirichlet() {
     initialize();
     this.setTitle("Dirichlet Process Clusters - 2-d Asymmetric Sampled Normal Distribution (>"
@@ -42,15 +42,15 @@ class Display2dASNDirichlet extends Disp
     Graphics2D g2 = (Graphics2D) g;
     
     Vector dv = new DenseVector(2);
-    int i = DisplayDirichlet.result.size() - 1;
+    int i = DisplayClustering.result.size() - 1;
     for (Model<VectorWritable>[] models : result) {
       g2.setStroke(new BasicStroke(i == 0 ? 3 : 1));
-      g2.setColor(COLORS[Math.min(DisplayDirichlet.COLORS.length - 1, i--)]);
+      g2.setColor(COLORS[Math.min(DisplayClustering.COLORS.length - 1, i--)]);
       for (Model<VectorWritable> m : models) {
         AsymmetricSampledNormalModel mm = (AsymmetricSampledNormalModel) m;
         dv.assign(mm.getStdDev().times(3));
-        if (DisplayDirichlet.isSignificant(mm)) {
-          DisplayDirichlet.plotEllipse(g2, mm.getMean(), dv);
+        if (DisplayClustering.isSignificant(mm)) {
+          DisplayClustering.plotEllipse(g2, mm.getMean(), dv);
         }
       }
     }
@@ -58,14 +58,15 @@ class Display2dASNDirichlet extends Disp
   
   public static void main(String[] args) {
     RandomUtils.useTestSeed();
-    k = 10;
-    DisplayDirichlet.generate2dSamples();
+    k = 20;
+    numIterations = 20;
+    DisplayClustering.generate2dSamples();
     generateResults();
     new Display2dASNDirichlet();
   }
   
   private static void generateResults() {
-    DisplayDirichlet.generateResults(new AsymmetricSampledNormalDistribution(new VectorWritable(
+    DisplayClustering.generateResults(new AsymmetricSampledNormalDistribution(new VectorWritable(
         new DenseVector(2))));
   }
 }

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNDirichlet.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNDirichlet.java?rev=962612&r1=962611&r2=962612&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNDirichlet.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNDirichlet.java Fri Jul  9 17:14:20 2010
@@ -29,7 +29,7 @@ import org.apache.mahout.math.DenseVecto
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
 
-class DisplayASNDirichlet extends DisplayDirichlet {
+class DisplayASNDirichlet extends DisplayClustering {
   DisplayASNDirichlet() {
     initialize();
     this
@@ -43,15 +43,15 @@ class DisplayASNDirichlet extends Displa
     Graphics2D g2 = (Graphics2D) g;
     
     Vector dv = new DenseVector(2);
-    int i = DisplayDirichlet.result.size() - 1;
+    int i = DisplayClustering.result.size() - 1;
     for (Model<VectorWritable>[] models : result) {
       g2.setStroke(new BasicStroke(i == 0 ? 3 : 1));
-      g2.setColor(COLORS[Math.min(DisplayDirichlet.COLORS.length - 1, i--)]);
+      g2.setColor(COLORS[Math.min(DisplayClustering.COLORS.length - 1, i--)]);
       for (Model<VectorWritable> m : models) {
         AsymmetricSampledNormalModel mm = (AsymmetricSampledNormalModel) m;
         dv.assign(mm.getStdDev().times(3));
-        if (DisplayDirichlet.isSignificant(mm)) {
-          DisplayDirichlet.plotEllipse(g2, mm.getMean(), dv);
+        if (DisplayClustering.isSignificant(mm)) {
+          DisplayClustering.plotEllipse(g2, mm.getMean(), dv);
         }
       }
     }
@@ -59,12 +59,12 @@ class DisplayASNDirichlet extends Displa
   
   public static void main(String[] args) {
     RandomUtils.useTestSeed();
-    DisplayDirichlet.generateSamples();
+    DisplayClustering.generateSamples();
     generateResults();
     new DisplayASNDirichlet();
   }
   
   static void generateResults() {
-    DisplayDirichlet.generateResults(new AsymmetricSampledNormalDistribution(new VectorWritable(new DenseVector(2))));
+    DisplayClustering.generateResults(new AsymmetricSampledNormalDistribution(new VectorWritable(new DenseVector(2))));
   }
 }

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNOutputState.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNOutputState.java?rev=962612&r1=962611&r2=962612&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNOutputState.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNOutputState.java Fri Jul  9 17:14:20 2010
@@ -26,11 +26,10 @@ import java.lang.reflect.InvocationTarge
 import java.util.ArrayList;
 import java.util.List;
 
-import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.mahout.clustering.dirichlet.models.AsymmetricSampledNormalModel;
 import org.apache.mahout.clustering.dirichlet.models.Model;
 import org.apache.mahout.clustering.dirichlet.models.NormalModelDistribution;
-import org.apache.mahout.clustering.kmeans.KMeansDriver;
 import org.apache.mahout.common.FileLineIterable;
 import org.apache.mahout.common.RandomUtils;
 import org.apache.mahout.math.AbstractVector;
@@ -38,7 +37,7 @@ import org.apache.mahout.math.DenseVecto
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
 
-class DisplayASNOutputState extends DisplayDirichlet {
+class DisplayASNOutputState extends DisplayClustering {
   
   DisplayASNOutputState() {
     initialize();
@@ -52,16 +51,16 @@ class DisplayASNOutputState extends Disp
     Graphics2D g2 = (Graphics2D) g;
     
     Vector dv = new DenseVector(2);
-    int i = DisplayDirichlet.result.size() - 1;
+    int i = DisplayClustering.result.size() - 1;
     for (Model<VectorWritable>[] models : result) {
       g2.setStroke(new BasicStroke(i == 0 ? 3 : 1));
-      g2.setColor(COLORS[Math.min(DisplayDirichlet.COLORS.length - 1, i--)]);
+      g2.setColor(COLORS[Math.min(DisplayClustering.COLORS.length - 1, i--)]);
       for (Model<VectorWritable> m : models) {
         AsymmetricSampledNormalModel mm = (AsymmetricSampledNormalModel) m;
         dv.set(0, mm.getStdDev().get(0) * 3);
         dv.set(1, mm.getStdDev().get(1) * 3);
-        if (DisplayDirichlet.isSignificant(mm)) {
-          DisplayDirichlet.plotEllipse(g2, mm.getMean(), dv);
+        if (DisplayClustering.isSignificant(mm)) {
+          DisplayClustering.plotEllipse(g2, mm.getMean(), dv);
         }
       }
     }
@@ -87,13 +86,13 @@ class DisplayASNOutputState extends Disp
   private static void getSamples() throws IOException {
     File f = new File("input");
     for (File g : f.listFiles()) {
-      DisplayDirichlet.SAMPLE_DATA.addAll(readFile(g.getCanonicalPath()));
+      DisplayClustering.SAMPLE_DATA.addAll(readFile(g.getCanonicalPath()));
     }
   }
   
   private static void getResults() throws IOException, InvocationTargetException, NoSuchMethodException {
     result = new ArrayList<Model<VectorWritable>[]>();
-    JobConf conf = new JobConf(KMeansDriver.class);
+    Configuration conf = new Configuration();
     conf.set(DirichletDriver.MODEL_FACTORY_KEY,
              "org.apache.mahout.clustering.dirichlet.models.AsymmetricSampledNormalDistribution");
     conf.set(DirichletDriver.MODEL_PROTOTYPE_KEY, "org.apache.mahout.math.DenseVector");
@@ -104,7 +103,7 @@ class DisplayASNOutputState extends Disp
     for (File g : f.listFiles()) {
       conf.set(DirichletDriver.STATE_IN_KEY, g.getCanonicalPath());
       DirichletState<VectorWritable> dirichletState = DirichletMapper.getDirichletState(conf);
-      DisplayDirichlet.result.add(dirichletState.getModels());
+      DisplayClustering.result.add(dirichletState.getModels());
     }
   }
   
@@ -116,7 +115,7 @@ class DisplayASNOutputState extends Disp
   }
   
   static void generateResults() {
-    DisplayDirichlet.generateResults(new NormalModelDistribution(new VectorWritable(new DenseVector(2))));
+    DisplayClustering.generateResults(new NormalModelDistribution(new VectorWritable(new DenseVector(2))));
   }
   
 }

Copied: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayClustering.java (from r961880, mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayDirichlet.java)
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayClustering.java?p2=mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayClustering.java&p1=mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayDirichlet.java&r1=961880&r2=962612&rev=962612&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayDirichlet.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayClustering.java Fri Jul  9 17:14:20 2010
@@ -39,42 +39,46 @@ import org.apache.mahout.math.VectorWrit
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-public class DisplayDirichlet extends Frame {
-  
-  private static final Logger log = LoggerFactory.getLogger(DisplayDirichlet.class);
-  
+public class DisplayClustering extends Frame {
+
+  private static final Logger log = LoggerFactory.getLogger(DisplayClustering.class);
+
   private static final List<Vector> SAMPLE_PARAMS = new ArrayList<Vector>();
-  
+
   protected static final int DS = 72; // default scale = 72 pixels per inch
-  
+
   protected static final int SIZE = 8; // screen size in inches
-  
+
   protected static final List<VectorWritable> SAMPLE_DATA = new ArrayList<VectorWritable>();
-  
-  protected static final double SIGNIFICANCE = 0.05;
-  
-  protected static final Color[] COLORS = {Color.red, Color.orange, Color.yellow, Color.green, Color.blue,
-                                           Color.magenta, Color.lightGray};
-  
+
+  protected static double SIGNIFICANCE = 0.05;
+
+  protected static final Color[] COLORS = { Color.red, Color.orange, Color.yellow, Color.green, Color.blue, Color.magenta,
+      Color.lightGray };
+
   protected static List<Model<VectorWritable>[]> result;
-  
+
   protected int res; // screen resolution
-  
+
+  protected static double ALPHA_0 = 1.0;
+
   protected static int k = 12;
-  
-  public DisplayDirichlet() {
+
+  protected static int numIterations = 20;
+
+  public DisplayClustering() {
     initialize();
   }
-  
+
   public void initialize() {
     // Get screen resolution
     res = Toolkit.getDefaultToolkit().getScreenResolution();
-    
+
     // Set Frame size in inches
     this.setSize(SIZE * res, SIZE * res);
     this.setVisible(true);
-    this.setTitle("Dirichlet Process Sample Data");
-    
+    this.setTitle("Asymmetric Sample Data");
+
     // Window listener to terminate program.
     this.addWindowListener(new WindowAdapter() {
       @Override
@@ -83,13 +87,13 @@ public class DisplayDirichlet extends Fr
       }
     });
   }
-  
+
   public static void main(String[] args) throws Exception {
     RandomUtils.useTestSeed();
-    generateSamples();
-    new DisplayDirichlet();
+    generate2dSamples();
+    new DisplayClustering();
   }
-  
+
   // Override the paint() method
   @Override
   public void paint(Graphics g) {
@@ -108,18 +112,18 @@ public class DisplayDirichlet extends Fr
       plotEllipse(g2, v, dv);
     }
   }
-  
+
   public void plotSampleData(Graphics g) {
     Graphics2D g2 = (Graphics2D) g;
     double sx = (double) res / DS;
     g2.setTransform(AffineTransform.getScaleInstance(sx, sx));
-    
+
     // plot the axes
     g2.setColor(Color.BLACK);
     Vector dv = new DenseVector(2).assign(SIZE / 2.0);
     plotRectangle(g2, new DenseVector(2).assign(2), dv);
     plotRectangle(g2, new DenseVector(2).assign(-2), dv);
-    
+
     // plot the sample data
     g2.setColor(Color.DARK_GRAY);
     dv.assign(0.03);
@@ -127,7 +131,7 @@ public class DisplayDirichlet extends Fr
       plotRectangle(g2, v.get(), dv);
     }
   }
-  
+
   /**
    * Draw a rectangle on the graphics context
    * 
@@ -139,7 +143,7 @@ public class DisplayDirichlet extends Fr
    *          a Vector of rectangle dimensions
    */
   public static void plotRectangle(Graphics2D g2, Vector v, Vector dv) {
-    double[] flip = {1, -1};
+    double[] flip = { 1, -1 };
     Vector v2 = v.times(new DenseVector(flip));
     v2 = v2.minus(dv.divide(2));
     int h = SIZE / 2;
@@ -147,7 +151,7 @@ public class DisplayDirichlet extends Fr
     double y = v2.get(1) + h;
     g2.draw(new Rectangle2D.Double(x * DS, y * DS, dv.get(0) * DS, dv.get(1) * DS));
   }
-  
+
   /**
    * Draw an ellipse on the graphics context
    * 
@@ -159,7 +163,7 @@ public class DisplayDirichlet extends Fr
    *          a Vector of ellipse dimensions
    */
   public static void plotEllipse(Graphics2D g2, Vector v, Vector dv) {
-    double[] flip = {1, -1};
+    double[] flip = { 1, -1 };
     Vector v2 = v.times(new DenseVector(flip));
     v2 = v2.minus(dv.divide(2));
     int h = SIZE / 2;
@@ -167,7 +171,7 @@ public class DisplayDirichlet extends Fr
     double y = v2.get(1) + h;
     g2.draw(new Ellipse2D.Double(x * DS, y * DS, dv.get(0) * DS, dv.get(1) * DS));
   }
-  
+
   private static void printModels(List<Model<VectorWritable>[]> results, int significant) {
     int row = 0;
     StringBuilder models = new StringBuilder();
@@ -184,19 +188,19 @@ public class DisplayDirichlet extends Fr
     models.append('\n');
     log.info(models.toString());
   }
-  
+
   public static void generateSamples() {
     generateSamples(500, 1, 1, 3);
     generateSamples(300, 1, 0, 0.5);
     generateSamples(300, 0, 2, 0.1);
   }
-  
+
   public static void generate2dSamples() {
     generate2dSamples(500, 1, 1, 3, 1);
     generate2dSamples(300, 1, 0, 0.5, 1);
     generate2dSamples(300, 0, 2, 0.1, 0.5);
   }
-  
+
   /**
    * Generate random samples and add them to the sampleData
    * 
@@ -210,15 +214,15 @@ public class DisplayDirichlet extends Fr
    *          double standard deviation of the samples
    */
   private static void generateSamples(int num, double mx, double my, double sd) {
-    double[] params = {mx, my, sd, sd};
+    double[] params = { mx, my, sd, sd };
     SAMPLE_PARAMS.add(new DenseVector(params));
-    log.info("Generating {} samples m=[{}, {}] sd={}", new Object[] {num, mx, my, sd});
+    log.info("Generating {} samples m=[{}, {}] sd={}", new Object[] { num, mx, my, sd });
     for (int i = 0; i < num; i++) {
-      SAMPLE_DATA.add(new VectorWritable(new DenseVector(new double[] {UncommonDistributions.rNorm(mx, sd),
-          UncommonDistributions.rNorm(my, sd)})));
+      SAMPLE_DATA.add(new VectorWritable(new DenseVector(new double[] { UncommonDistributions.rNorm(mx, sd),
+          UncommonDistributions.rNorm(my, sd) })));
     }
   }
-  
+
   /**
    * Generate random samples and add them to the sampleData
    * 
@@ -234,25 +238,23 @@ public class DisplayDirichlet extends Fr
    *          double y-value standard deviation of the samples
    */
   private static void generate2dSamples(int num, double mx, double my, double sdx, double sdy) {
-    double[] params = {mx, my, sdx, sdy};
+    double[] params = { mx, my, sdx, sdy };
     SAMPLE_PARAMS.add(new DenseVector(params));
-    log.info("Generating {} samples m=[{}, {}] sd=[{}, {}]", new Object[] {num, mx, my, sdx, sdy});
+    log.info("Generating {} samples m=[{}, {}] sd=[{}, {}]", new Object[] { num, mx, my, sdx, sdy });
     for (int i = 0; i < num; i++) {
-      SAMPLE_DATA
-          .add(new VectorWritable(new DenseVector(new double[] {UncommonDistributions.rNorm(mx, sdx),
-                                                                UncommonDistributions.rNorm(my, sdy)})));
+      SAMPLE_DATA.add(new VectorWritable(new DenseVector(new double[] { UncommonDistributions.rNorm(mx, sdx),
+          UncommonDistributions.rNorm(my, sdy) })));
     }
   }
-  
+
   public static void generateResults(ModelDistribution<VectorWritable> modelDist) {
-    DirichletClusterer<VectorWritable> dc = new DirichletClusterer<VectorWritable>(SAMPLE_DATA, modelDist,
-        1.0, k, 2, 2);
-    result = dc.cluster(20);
+    DirichletClusterer<VectorWritable> dc = new DirichletClusterer<VectorWritable>(SAMPLE_DATA, modelDist, ALPHA_0, k, 2, 2);
+    result = dc.cluster(numIterations);
     printModels(result, 5);
   }
-  
+
   public static boolean isSignificant(Model<VectorWritable> model) {
     return (double) model.count() / SAMPLE_DATA.size() > SIGNIFICANCE;
   }
-  
+
 }

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayNDirichlet.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayNDirichlet.java?rev=962612&r1=962611&r2=962612&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayNDirichlet.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayNDirichlet.java Fri Jul  9 17:14:20 2010
@@ -29,7 +29,7 @@ import org.apache.mahout.math.DenseVecto
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
 
-class DisplayNDirichlet extends DisplayDirichlet {
+class DisplayNDirichlet extends DisplayClustering {
   DisplayNDirichlet() {
     initialize();
     this.setTitle("Dirichlet Process Clusters - Normal Distribution (>"
@@ -42,15 +42,15 @@ class DisplayNDirichlet extends DisplayD
     Graphics2D g2 = (Graphics2D) g;
     
     Vector dv = new DenseVector(2);
-    int i = DisplayDirichlet.result.size() - 1;
+    int i = DisplayClustering.result.size() - 1;
     for (Model<VectorWritable>[] models : result) {
       g2.setStroke(new BasicStroke(i == 0 ? 3 : 1));
-      g2.setColor(COLORS[Math.min(DisplayDirichlet.COLORS.length - 1, i--)]);
+      g2.setColor(COLORS[Math.min(DisplayClustering.COLORS.length - 1, i--)]);
       for (Model<VectorWritable> m : models) {
         NormalModel mm = (NormalModel) m;
         dv.assign(mm.getStdDev() * 3);
-        if (DisplayDirichlet.isSignificant(mm)) {
-          DisplayDirichlet.plotEllipse(g2, mm.getMean(), dv);
+        if (DisplayClustering.isSignificant(mm)) {
+          DisplayClustering.plotEllipse(g2, mm.getMean(), dv);
         }
       }
     }
@@ -58,12 +58,12 @@ class DisplayNDirichlet extends DisplayD
   
   public static void main(String[] args) {
     RandomUtils.useTestSeed();
-    DisplayDirichlet.generateSamples();
+    DisplayClustering.generateSamples();
     generateResults();
     new DisplayNDirichlet();
   }
   
   static void generateResults() {
-    DisplayDirichlet.generateResults(new NormalModelDistribution(new VectorWritable(new DenseVector(2))));
+    DisplayClustering.generateResults(new NormalModelDistribution(new VectorWritable(new DenseVector(2))));
   }
 }

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayOutputState.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayOutputState.java?rev=962612&r1=962611&r2=962612&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayOutputState.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayOutputState.java Fri Jul  9 17:14:20 2010
@@ -26,11 +26,10 @@ import java.lang.reflect.InvocationTarge
 import java.util.ArrayList;
 import java.util.List;
 
-import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.mahout.clustering.dirichlet.models.Model;
 import org.apache.mahout.clustering.dirichlet.models.NormalModel;
 import org.apache.mahout.clustering.dirichlet.models.NormalModelDistribution;
-import org.apache.mahout.clustering.kmeans.KMeansDriver;
 import org.apache.mahout.common.FileLineIterable;
 import org.apache.mahout.common.RandomUtils;
 import org.apache.mahout.math.AbstractVector;
@@ -38,7 +37,7 @@ import org.apache.mahout.math.DenseVecto
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
 
-class DisplayOutputState extends DisplayDirichlet {
+class DisplayOutputState extends DisplayClustering {
   DisplayOutputState() {
     initialize();
     this.setTitle("Dirichlet Process Clusters - Map/Reduce Results (>"
@@ -51,15 +50,15 @@ class DisplayOutputState extends Display
     Graphics2D g2 = (Graphics2D) g;
     
     Vector dv = new DenseVector(2);
-    int i = DisplayDirichlet.result.size() - 1;
+    int i = DisplayClustering.result.size() - 1;
     for (Model<VectorWritable>[] models : result) {
       g2.setStroke(new BasicStroke(i == 0 ? 3 : 1));
-      g2.setColor(COLORS[Math.min(DisplayDirichlet.COLORS.length - 1, i--)]);
+      g2.setColor(COLORS[Math.min(DisplayClustering.COLORS.length - 1, i--)]);
       for (Model<VectorWritable> m : models) {
         NormalModel mm = (NormalModel) m;
         dv.assign(mm.getStdDev() * 3);
-        if (DisplayDirichlet.isSignificant(mm)) {
-          DisplayDirichlet.plotEllipse(g2, mm.getMean(), dv);
+        if (DisplayClustering.isSignificant(mm)) {
+          DisplayClustering.plotEllipse(g2, mm.getMean(), dv);
         }
       }
     }
@@ -85,13 +84,13 @@ class DisplayOutputState extends Display
   private static void getSamples() throws IOException {
     File f = new File("input");
     for (File g : f.listFiles()) {
-      DisplayDirichlet.SAMPLE_DATA.addAll(readFile(g.getCanonicalPath()));
+      DisplayClustering.SAMPLE_DATA.addAll(readFile(g.getCanonicalPath()));
     }
   }
   
   private static void getResults() throws IOException, InvocationTargetException, NoSuchMethodException {
     result = new ArrayList<Model<VectorWritable>[]>();
-    JobConf conf = new JobConf(KMeansDriver.class);
+    Configuration conf = new Configuration();
     conf.set(DirichletDriver.MODEL_FACTORY_KEY,
         "org.apache.mahout.clustering.dirichlet.models.SampledNormalDistribution");
     conf.set(DirichletDriver.MODEL_PROTOTYPE_KEY, "org.apache.mahout.math.DenseVector");
@@ -102,7 +101,7 @@ class DisplayOutputState extends Display
     for (File g : f.listFiles()) {
       conf.set(DirichletDriver.STATE_IN_KEY, g.getCanonicalPath());
       DirichletState<VectorWritable> dirichletState = DirichletMapper.getDirichletState(conf);
-      DisplayDirichlet.result.add(dirichletState.getModels());
+      DisplayClustering.result.add(dirichletState.getModels());
     }
   }
   
@@ -114,6 +113,6 @@ class DisplayOutputState extends Display
   }
   
   static void generateResults() {
-    DisplayDirichlet.generateResults(new NormalModelDistribution(new VectorWritable(new DenseVector(2))));
+    DisplayClustering.generateResults(new NormalModelDistribution(new VectorWritable(new DenseVector(2))));
   }
 }

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplaySNDirichlet.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplaySNDirichlet.java?rev=962612&r1=962611&r2=962612&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplaySNDirichlet.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplaySNDirichlet.java Fri Jul  9 17:14:20 2010
@@ -29,7 +29,7 @@ import org.apache.mahout.math.DenseVecto
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
 
-class DisplaySNDirichlet extends DisplayDirichlet {
+class DisplaySNDirichlet extends DisplayClustering {
   DisplaySNDirichlet() {
     initialize();
     this.setTitle("Dirichlet Process Clusters - Sampled Normal Distribution (>"
@@ -42,15 +42,15 @@ class DisplaySNDirichlet extends Display
     Graphics2D g2 = (Graphics2D) g;
     
     Vector dv = new DenseVector(2);
-    int i = DisplayDirichlet.result.size() - 1;
+    int i = DisplayClustering.result.size() - 1;
     for (Model<VectorWritable>[] models : result) {
       g2.setStroke(new BasicStroke(i == 0 ? 3 : 1));
-      g2.setColor(COLORS[Math.min(DisplayDirichlet.COLORS.length - 1, i--)]);
+      g2.setColor(COLORS[Math.min(DisplayClustering.COLORS.length - 1, i--)]);
       for (Model<VectorWritable> m : models) {
         NormalModel mm = (NormalModel) m;
         dv.assign(mm.getStdDev() * 3);
-        if (DisplayDirichlet.isSignificant(mm)) {
-          DisplayDirichlet.plotEllipse(g2, mm.getMean(), dv);
+        if (DisplayClustering.isSignificant(mm)) {
+          DisplayClustering.plotEllipse(g2, mm.getMean(), dv);
         }
       }
     }
@@ -58,12 +58,16 @@ class DisplaySNDirichlet extends Display
   
   public static void main(String[] args) {
     RandomUtils.useTestSeed();
-    DisplayDirichlet.generateSamples();
+    DisplayClustering.generateSamples();
+    numIterations = 40;
+    ALPHA_0 = 1.0;
+    k=20;
+    SIGNIFICANCE = 0.05;
     generateResults();
     new DisplaySNDirichlet();
   }
   
   static void generateResults() {
-    DisplayDirichlet.generateResults(new SampledNormalDistribution(new VectorWritable(new DenseVector(2))));
+    DisplayClustering.generateResults(new SampledNormalDistribution(new VectorWritable(new DenseVector(2))));
   }
 }

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/README.txt
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/README.txt?rev=962612&r1=962611&r2=962612&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/README.txt (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/README.txt Fri Jul  9 17:14:20 2010
@@ -15,13 +15,12 @@ DisplayDirichlet - generates 1000 sample
     colored (orange, yellow, green, blue, magenta) in order after which all earlier clusters are in
     light grey. This helps to visualize how the clusters converge upon a solution over multiple
     iterations.
-  * NOTE: by changing the UncommonDistributions.init(...) call in DisplayDirichlet, you can get
-    different behaviors. Removing the initialization altogether will use a random seed for each run.
+  * NOTE: by changing the parameter values (k, ALPHA_0, numIterations) and the display SIGNIFICANCE
+    you can obtain different results.
     
 DisplayOutputState - this program can be run after any of the SampledNormalDistribution M/R Dirichlet test 
   cases in TestMapReduce. It draws the points and the resulting clusters from the output directory in 
-  a manner similar to the above. By changing the initialization seed in TestMapReduce you can get 
-  different data points.
+  a manner similar to the above. 
 DisplayASNOutputState - similar to above but uses the AsymmetricSampledNormalDistribution.
 
   

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/fuzzykmeans/DisplayFuzzyKMeans.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/fuzzykmeans/DisplayFuzzyKMeans.java?rev=962612&r1=962611&r2=962612&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/fuzzykmeans/DisplayFuzzyKMeans.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/fuzzykmeans/DisplayFuzzyKMeans.java Fri Jul  9 17:14:20 2010
@@ -23,7 +23,7 @@ import java.awt.Graphics2D;
 import java.util.ArrayList;
 import java.util.List;
 
-import org.apache.mahout.clustering.dirichlet.DisplayDirichlet;
+import org.apache.mahout.clustering.dirichlet.DisplayClustering;
 import org.apache.mahout.common.RandomUtils;
 import org.apache.mahout.common.distance.DistanceMeasure;
 import org.apache.mahout.common.distance.ManhattanDistanceMeasure;
@@ -31,7 +31,7 @@ import org.apache.mahout.math.DenseVecto
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
 
-class DisplayFuzzyKMeans extends DisplayDirichlet {
+class DisplayFuzzyKMeans extends DisplayClustering {
 
   private static List<List<SoftCluster>> clusters;
   
@@ -48,11 +48,11 @@ class DisplayFuzzyKMeans extends Display
     int i = DisplayFuzzyKMeans.clusters.size() - 1;
     for (List<SoftCluster> cls : clusters) {
       g2.setStroke(new BasicStroke(i == 0 ? 3 : 1));
-      g2.setColor(COLORS[Math.min(DisplayDirichlet.COLORS.length - 1, i--)]);
+      g2.setColor(COLORS[Math.min(DisplayClustering.COLORS.length - 1, i--)]);
       for (SoftCluster cluster : cls) {
         // if (true || cluster.getWeightedPointTotal().zSum() > sampleData.size() * 0.05) {
         dv.assign(Math.max(cluster.std(), 0.3) * 3);
-        DisplayDirichlet.plotEllipse(g2, cluster.getCenter(), dv);
+        DisplayClustering.plotEllipse(g2, cluster.getCenter(), dv);
         // }
       }
     }
@@ -60,7 +60,7 @@ class DisplayFuzzyKMeans extends Display
   
   public static void main(String[] args) {
     RandomUtils.useTestSeed();
-    DisplayDirichlet.generateSamples();
+    DisplayClustering.generateSamples();
     List<Vector> points = new ArrayList<Vector>();
     for (VectorWritable sample : SAMPLE_DATA) {
       points.add((Vector) sample.get());

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/kmeans/DisplayKMeans.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/kmeans/DisplayKMeans.java?rev=962612&r1=962611&r2=962612&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/kmeans/DisplayKMeans.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/kmeans/DisplayKMeans.java Fri Jul  9 17:14:20 2010
@@ -23,7 +23,7 @@ import java.awt.Graphics2D;
 import java.util.ArrayList;
 import java.util.List;
 
-import org.apache.mahout.clustering.dirichlet.DisplayDirichlet;
+import org.apache.mahout.clustering.dirichlet.DisplayClustering;
 import org.apache.mahout.common.RandomUtils;
 import org.apache.mahout.common.distance.DistanceMeasure;
 import org.apache.mahout.common.distance.ManhattanDistanceMeasure;
@@ -31,7 +31,7 @@ import org.apache.mahout.math.DenseVecto
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
 
-class DisplayKMeans extends DisplayDirichlet {
+class DisplayKMeans extends DisplayClustering {
 
   private static List<List<Cluster>> clusters;
 
@@ -48,12 +48,12 @@ class DisplayKMeans extends DisplayDiric
     int i = DisplayKMeans.clusters.size() - 1;
     for (List<Cluster> cls : clusters) {
       g2.setStroke(new BasicStroke(i == 0 ? 3 : 1));
-      g2.setColor(COLORS[Math.min(DisplayDirichlet.COLORS.length - 1, i--)]);
+      g2.setColor(COLORS[Math.min(DisplayClustering.COLORS.length - 1, i--)]);
       for (Cluster cluster : cls) {
         // if (true || cluster.getNumPoints() > sampleData.size() * 0.05) {
         dv.assign(cluster.getStd() * 3);
         System.out.println(cluster.getCenter().asFormatString() + ' ' + dv.asFormatString());
-        DisplayDirichlet.plotEllipse(g2, cluster.getCenter(), dv);
+        DisplayClustering.plotEllipse(g2, cluster.getCenter(), dv);
         // }
       }
     }
@@ -61,7 +61,7 @@ class DisplayKMeans extends DisplayDiric
 
   public static void main(String[] args) {
     RandomUtils.useTestSeed();
-    DisplayDirichlet.generateSamples();
+    DisplayClustering.generateSamples();
     List<Vector> points = new ArrayList<Vector>();
     for (VectorWritable sample : SAMPLE_DATA) {
       points.add(sample.get());

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/meanshift/DisplayMeanShift.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/meanshift/DisplayMeanShift.java?rev=962612&r1=962611&r2=962612&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/meanshift/DisplayMeanShift.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/meanshift/DisplayMeanShift.java Fri Jul  9 17:14:20 2010
@@ -24,7 +24,7 @@ import java.awt.geom.AffineTransform;
 import java.util.ArrayList;
 import java.util.List;
 
-import org.apache.mahout.clustering.dirichlet.DisplayDirichlet;
+import org.apache.mahout.clustering.dirichlet.DisplayClustering;
 import org.apache.mahout.clustering.dirichlet.models.NormalModelDistribution;
 import org.apache.mahout.common.RandomUtils;
 import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
@@ -34,7 +34,7 @@ import org.apache.mahout.math.VectorWrit
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-final class DisplayMeanShift extends DisplayDirichlet {
+final class DisplayMeanShift extends DisplayClustering {
 
   private static final Logger log = LoggerFactory.getLogger(DisplayMeanShift.class);
 
@@ -60,37 +60,37 @@ final class DisplayMeanShift extends Dis
     Vector dv = new DenseVector(2).assign(SIZE / 2.0);
     Vector dv1 = new DenseVector(2).assign(t1);
     Vector dv2 = new DenseVector(2).assign(t2);
-    DisplayDirichlet.plotRectangle(g2, new DenseVector(2).assign(2), dv);
-    DisplayDirichlet.plotRectangle(g2, new DenseVector(2).assign(-2), dv);
+    DisplayClustering.plotRectangle(g2, new DenseVector(2).assign(2), dv);
+    DisplayClustering.plotRectangle(g2, new DenseVector(2).assign(-2), dv);
 
     // plot the sample data
     g2.setColor(Color.DARK_GRAY);
     dv.assign(0.03);
     for (VectorWritable v : SAMPLE_DATA) {
-      DisplayDirichlet.plotRectangle(g2, v.get(), dv);
+      DisplayClustering.plotRectangle(g2, v.get(), dv);
     }
     int i = 0;
     for (MeanShiftCanopy canopy : canopies) {
-      if (canopy.getBoundPoints().toList().size() > 0.015 * DisplayDirichlet.SAMPLE_DATA.size()) {
-        g2.setColor(COLORS[Math.min(i++, DisplayDirichlet.COLORS.length - 1)]);
+      if (canopy.getBoundPoints().toList().size() > 0.015 * DisplayClustering.SAMPLE_DATA.size()) {
+        g2.setColor(COLORS[Math.min(i++, DisplayClustering.COLORS.length - 1)]);
         int count = 0;
         Vector center = new DenseVector(2);
         for (int vix : canopy.getBoundPoints().toList()) {
           Vector v = SAMPLE_DATA.get(vix).get();
           count++;
           v.addTo(center);
-          DisplayDirichlet.plotRectangle(g2, v, dv);
+          DisplayClustering.plotRectangle(g2, v, dv);
         }
         center = center.divide(count);
-        DisplayDirichlet.plotEllipse(g2, center, dv1);
-        DisplayDirichlet.plotEllipse(g2, center, dv2);
+        DisplayClustering.plotEllipse(g2, center, dv1);
+        DisplayClustering.plotEllipse(g2, center, dv2);
       }
     }
   }
 
   public static void main(String[] args) {
     RandomUtils.useTestSeed();
-    DisplayDirichlet.generateSamples();
+    DisplayClustering.generateSamples();
     List<Vector> points = new ArrayList<Vector>();
     for (VectorWritable sample : SAMPLE_DATA) {
       points.add(sample.get());
@@ -105,6 +105,6 @@ final class DisplayMeanShift extends Dis
   }
 
   static void generateResults() {
-    DisplayDirichlet.generateResults(new NormalModelDistribution());
+    DisplayClustering.generateResults(new NormalModelDistribution());
   }
 }