You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by je...@apache.org on 2010/07/09 19:14:20 UTC
svn commit: r962612 - in
/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering: canopy/
dirichlet/ fuzzykmeans/ kmeans/ meanshift/
Author: jeastman
Date: Fri Jul 9 17:14:20 2010
New Revision: 962612
URL: http://svn.apache.org/viewvc?rev=962612&view=rev
Log:
Minor cleanup to the clustering display examples
Added:
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayClustering.java
- copied, changed from r961880, mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayDirichlet.java
Removed:
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayDirichlet.java
Modified:
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/canopy/DisplayCanopy.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/Display2dASNDirichlet.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNDirichlet.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNOutputState.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayNDirichlet.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayOutputState.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplaySNDirichlet.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/README.txt
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/fuzzykmeans/DisplayFuzzyKMeans.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/kmeans/DisplayKMeans.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/meanshift/DisplayMeanShift.java
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/canopy/DisplayCanopy.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/canopy/DisplayCanopy.java?rev=962612&r1=962611&r2=962612&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/canopy/DisplayCanopy.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/canopy/DisplayCanopy.java Fri Jul 9 17:14:20 2010
@@ -23,7 +23,7 @@ import java.awt.Graphics2D;
import java.util.ArrayList;
import java.util.List;
-import org.apache.mahout.clustering.dirichlet.DisplayDirichlet;
+import org.apache.mahout.clustering.dirichlet.DisplayClustering;
import org.apache.mahout.clustering.dirichlet.models.NormalModelDistribution;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.common.distance.ManhattanDistanceMeasure;
@@ -31,40 +31,45 @@ import org.apache.mahout.math.DenseVecto
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
-class DisplayCanopy extends DisplayDirichlet {
+class DisplayCanopy extends DisplayClustering {
+
+ private static final long serialVersionUID = 1L;
private static List<Canopy> canopies;
+
private static final double T1 = 3.0;
+
private static final double T2 = 1.6;
DisplayCanopy() {
initialize();
- this.setTitle("Canopy Clusters (> 5% of population)");
+ this.setTitle("Canopy Clusters (>" + (int) (SIGNIFICANCE * 100) + "% of population)");
}
-
+
@Override
public void paint(Graphics g) {
super.plotSampleData(g);
Graphics2D g2 = (Graphics2D) g;
Vector dv = new DenseVector(2);
for (Canopy canopy : canopies) {
- if (canopy.getNumPoints() > DisplayDirichlet.SAMPLE_DATA.size() * 0.05) {
+ if (canopy.getNumPoints() > DisplayClustering.SAMPLE_DATA.size() * SIGNIFICANCE) {
g2.setStroke(new BasicStroke(2));
g2.setColor(COLORS[1]);
dv.assign(T1);
Vector center = canopy.computeCentroid();
- DisplayDirichlet.plotEllipse(g2, center, dv);
+ DisplayClustering.plotEllipse(g2, center, dv);
g2.setStroke(new BasicStroke(3));
g2.setColor(COLORS[0]);
dv.assign(T2);
- DisplayDirichlet.plotEllipse(g2, center, dv);
+ DisplayClustering.plotEllipse(g2, center, dv);
}
}
}
-
+
public static void main(String[] args) {
RandomUtils.useTestSeed();
- DisplayDirichlet.generateSamples();
+ SIGNIFICANCE = 0.1;
+ DisplayClustering.generateSamples();
List<Vector> points = new ArrayList<Vector>();
for (VectorWritable sample : SAMPLE_DATA) {
points.add(sample.get());
@@ -73,8 +78,8 @@ class DisplayCanopy extends DisplayDiric
CanopyClusterer.updateCentroids(canopies);
new DisplayCanopy();
}
-
+
static void generateResults() {
- DisplayDirichlet.generateResults(new NormalModelDistribution());
+ DisplayClustering.generateResults(new NormalModelDistribution());
}
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/Display2dASNDirichlet.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/Display2dASNDirichlet.java?rev=962612&r1=962611&r2=962612&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/Display2dASNDirichlet.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/Display2dASNDirichlet.java Fri Jul 9 17:14:20 2010
@@ -29,7 +29,7 @@ import org.apache.mahout.math.DenseVecto
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
-class Display2dASNDirichlet extends DisplayDirichlet {
+class Display2dASNDirichlet extends DisplayClustering {
Display2dASNDirichlet() {
initialize();
this.setTitle("Dirichlet Process Clusters - 2-d Asymmetric Sampled Normal Distribution (>"
@@ -42,15 +42,15 @@ class Display2dASNDirichlet extends Disp
Graphics2D g2 = (Graphics2D) g;
Vector dv = new DenseVector(2);
- int i = DisplayDirichlet.result.size() - 1;
+ int i = DisplayClustering.result.size() - 1;
for (Model<VectorWritable>[] models : result) {
g2.setStroke(new BasicStroke(i == 0 ? 3 : 1));
- g2.setColor(COLORS[Math.min(DisplayDirichlet.COLORS.length - 1, i--)]);
+ g2.setColor(COLORS[Math.min(DisplayClustering.COLORS.length - 1, i--)]);
for (Model<VectorWritable> m : models) {
AsymmetricSampledNormalModel mm = (AsymmetricSampledNormalModel) m;
dv.assign(mm.getStdDev().times(3));
- if (DisplayDirichlet.isSignificant(mm)) {
- DisplayDirichlet.plotEllipse(g2, mm.getMean(), dv);
+ if (DisplayClustering.isSignificant(mm)) {
+ DisplayClustering.plotEllipse(g2, mm.getMean(), dv);
}
}
}
@@ -58,14 +58,15 @@ class Display2dASNDirichlet extends Disp
public static void main(String[] args) {
RandomUtils.useTestSeed();
- k = 10;
- DisplayDirichlet.generate2dSamples();
+ k = 20;
+ numIterations = 20;
+ DisplayClustering.generate2dSamples();
generateResults();
new Display2dASNDirichlet();
}
private static void generateResults() {
- DisplayDirichlet.generateResults(new AsymmetricSampledNormalDistribution(new VectorWritable(
+ DisplayClustering.generateResults(new AsymmetricSampledNormalDistribution(new VectorWritable(
new DenseVector(2))));
}
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNDirichlet.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNDirichlet.java?rev=962612&r1=962611&r2=962612&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNDirichlet.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNDirichlet.java Fri Jul 9 17:14:20 2010
@@ -29,7 +29,7 @@ import org.apache.mahout.math.DenseVecto
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
-class DisplayASNDirichlet extends DisplayDirichlet {
+class DisplayASNDirichlet extends DisplayClustering {
DisplayASNDirichlet() {
initialize();
this
@@ -43,15 +43,15 @@ class DisplayASNDirichlet extends Displa
Graphics2D g2 = (Graphics2D) g;
Vector dv = new DenseVector(2);
- int i = DisplayDirichlet.result.size() - 1;
+ int i = DisplayClustering.result.size() - 1;
for (Model<VectorWritable>[] models : result) {
g2.setStroke(new BasicStroke(i == 0 ? 3 : 1));
- g2.setColor(COLORS[Math.min(DisplayDirichlet.COLORS.length - 1, i--)]);
+ g2.setColor(COLORS[Math.min(DisplayClustering.COLORS.length - 1, i--)]);
for (Model<VectorWritable> m : models) {
AsymmetricSampledNormalModel mm = (AsymmetricSampledNormalModel) m;
dv.assign(mm.getStdDev().times(3));
- if (DisplayDirichlet.isSignificant(mm)) {
- DisplayDirichlet.plotEllipse(g2, mm.getMean(), dv);
+ if (DisplayClustering.isSignificant(mm)) {
+ DisplayClustering.plotEllipse(g2, mm.getMean(), dv);
}
}
}
@@ -59,12 +59,12 @@ class DisplayASNDirichlet extends Displa
public static void main(String[] args) {
RandomUtils.useTestSeed();
- DisplayDirichlet.generateSamples();
+ DisplayClustering.generateSamples();
generateResults();
new DisplayASNDirichlet();
}
static void generateResults() {
- DisplayDirichlet.generateResults(new AsymmetricSampledNormalDistribution(new VectorWritable(new DenseVector(2))));
+ DisplayClustering.generateResults(new AsymmetricSampledNormalDistribution(new VectorWritable(new DenseVector(2))));
}
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNOutputState.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNOutputState.java?rev=962612&r1=962611&r2=962612&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNOutputState.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayASNOutputState.java Fri Jul 9 17:14:20 2010
@@ -26,11 +26,10 @@ import java.lang.reflect.InvocationTarge
import java.util.ArrayList;
import java.util.List;
-import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.conf.Configuration;
import org.apache.mahout.clustering.dirichlet.models.AsymmetricSampledNormalModel;
import org.apache.mahout.clustering.dirichlet.models.Model;
import org.apache.mahout.clustering.dirichlet.models.NormalModelDistribution;
-import org.apache.mahout.clustering.kmeans.KMeansDriver;
import org.apache.mahout.common.FileLineIterable;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.math.AbstractVector;
@@ -38,7 +37,7 @@ import org.apache.mahout.math.DenseVecto
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
-class DisplayASNOutputState extends DisplayDirichlet {
+class DisplayASNOutputState extends DisplayClustering {
DisplayASNOutputState() {
initialize();
@@ -52,16 +51,16 @@ class DisplayASNOutputState extends Disp
Graphics2D g2 = (Graphics2D) g;
Vector dv = new DenseVector(2);
- int i = DisplayDirichlet.result.size() - 1;
+ int i = DisplayClustering.result.size() - 1;
for (Model<VectorWritable>[] models : result) {
g2.setStroke(new BasicStroke(i == 0 ? 3 : 1));
- g2.setColor(COLORS[Math.min(DisplayDirichlet.COLORS.length - 1, i--)]);
+ g2.setColor(COLORS[Math.min(DisplayClustering.COLORS.length - 1, i--)]);
for (Model<VectorWritable> m : models) {
AsymmetricSampledNormalModel mm = (AsymmetricSampledNormalModel) m;
dv.set(0, mm.getStdDev().get(0) * 3);
dv.set(1, mm.getStdDev().get(1) * 3);
- if (DisplayDirichlet.isSignificant(mm)) {
- DisplayDirichlet.plotEllipse(g2, mm.getMean(), dv);
+ if (DisplayClustering.isSignificant(mm)) {
+ DisplayClustering.plotEllipse(g2, mm.getMean(), dv);
}
}
}
@@ -87,13 +86,13 @@ class DisplayASNOutputState extends Disp
private static void getSamples() throws IOException {
File f = new File("input");
for (File g : f.listFiles()) {
- DisplayDirichlet.SAMPLE_DATA.addAll(readFile(g.getCanonicalPath()));
+ DisplayClustering.SAMPLE_DATA.addAll(readFile(g.getCanonicalPath()));
}
}
private static void getResults() throws IOException, InvocationTargetException, NoSuchMethodException {
result = new ArrayList<Model<VectorWritable>[]>();
- JobConf conf = new JobConf(KMeansDriver.class);
+ Configuration conf = new Configuration();
conf.set(DirichletDriver.MODEL_FACTORY_KEY,
"org.apache.mahout.clustering.dirichlet.models.AsymmetricSampledNormalDistribution");
conf.set(DirichletDriver.MODEL_PROTOTYPE_KEY, "org.apache.mahout.math.DenseVector");
@@ -104,7 +103,7 @@ class DisplayASNOutputState extends Disp
for (File g : f.listFiles()) {
conf.set(DirichletDriver.STATE_IN_KEY, g.getCanonicalPath());
DirichletState<VectorWritable> dirichletState = DirichletMapper.getDirichletState(conf);
- DisplayDirichlet.result.add(dirichletState.getModels());
+ DisplayClustering.result.add(dirichletState.getModels());
}
}
@@ -116,7 +115,7 @@ class DisplayASNOutputState extends Disp
}
static void generateResults() {
- DisplayDirichlet.generateResults(new NormalModelDistribution(new VectorWritable(new DenseVector(2))));
+ DisplayClustering.generateResults(new NormalModelDistribution(new VectorWritable(new DenseVector(2))));
}
}
Copied: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayClustering.java (from r961880, mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayDirichlet.java)
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayClustering.java?p2=mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayClustering.java&p1=mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayDirichlet.java&r1=961880&r2=962612&rev=962612&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayDirichlet.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayClustering.java Fri Jul 9 17:14:20 2010
@@ -39,42 +39,46 @@ import org.apache.mahout.math.VectorWrit
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-public class DisplayDirichlet extends Frame {
-
- private static final Logger log = LoggerFactory.getLogger(DisplayDirichlet.class);
-
+public class DisplayClustering extends Frame {
+
+ private static final Logger log = LoggerFactory.getLogger(DisplayClustering.class);
+
private static final List<Vector> SAMPLE_PARAMS = new ArrayList<Vector>();
-
+
protected static final int DS = 72; // default scale = 72 pixels per inch
-
+
protected static final int SIZE = 8; // screen size in inches
-
+
protected static final List<VectorWritable> SAMPLE_DATA = new ArrayList<VectorWritable>();
-
- protected static final double SIGNIFICANCE = 0.05;
-
- protected static final Color[] COLORS = {Color.red, Color.orange, Color.yellow, Color.green, Color.blue,
- Color.magenta, Color.lightGray};
-
+
+ protected static double SIGNIFICANCE = 0.05;
+
+ protected static final Color[] COLORS = { Color.red, Color.orange, Color.yellow, Color.green, Color.blue, Color.magenta,
+ Color.lightGray };
+
protected static List<Model<VectorWritable>[]> result;
-
+
protected int res; // screen resolution
-
+
+ protected static double ALPHA_0 = 1.0;
+
protected static int k = 12;
-
- public DisplayDirichlet() {
+
+ protected static int numIterations = 20;
+
+ public DisplayClustering() {
initialize();
}
-
+
public void initialize() {
// Get screen resolution
res = Toolkit.getDefaultToolkit().getScreenResolution();
-
+
// Set Frame size in inches
this.setSize(SIZE * res, SIZE * res);
this.setVisible(true);
- this.setTitle("Dirichlet Process Sample Data");
-
+ this.setTitle("Asymmetric Sample Data");
+
// Window listener to terminate program.
this.addWindowListener(new WindowAdapter() {
@Override
@@ -83,13 +87,13 @@ public class DisplayDirichlet extends Fr
}
});
}
-
+
public static void main(String[] args) throws Exception {
RandomUtils.useTestSeed();
- generateSamples();
- new DisplayDirichlet();
+ generate2dSamples();
+ new DisplayClustering();
}
-
+
// Override the paint() method
@Override
public void paint(Graphics g) {
@@ -108,18 +112,18 @@ public class DisplayDirichlet extends Fr
plotEllipse(g2, v, dv);
}
}
-
+
public void plotSampleData(Graphics g) {
Graphics2D g2 = (Graphics2D) g;
double sx = (double) res / DS;
g2.setTransform(AffineTransform.getScaleInstance(sx, sx));
-
+
// plot the axes
g2.setColor(Color.BLACK);
Vector dv = new DenseVector(2).assign(SIZE / 2.0);
plotRectangle(g2, new DenseVector(2).assign(2), dv);
plotRectangle(g2, new DenseVector(2).assign(-2), dv);
-
+
// plot the sample data
g2.setColor(Color.DARK_GRAY);
dv.assign(0.03);
@@ -127,7 +131,7 @@ public class DisplayDirichlet extends Fr
plotRectangle(g2, v.get(), dv);
}
}
-
+
/**
* Draw a rectangle on the graphics context
*
@@ -139,7 +143,7 @@ public class DisplayDirichlet extends Fr
* a Vector of rectangle dimensions
*/
public static void plotRectangle(Graphics2D g2, Vector v, Vector dv) {
- double[] flip = {1, -1};
+ double[] flip = { 1, -1 };
Vector v2 = v.times(new DenseVector(flip));
v2 = v2.minus(dv.divide(2));
int h = SIZE / 2;
@@ -147,7 +151,7 @@ public class DisplayDirichlet extends Fr
double y = v2.get(1) + h;
g2.draw(new Rectangle2D.Double(x * DS, y * DS, dv.get(0) * DS, dv.get(1) * DS));
}
-
+
/**
* Draw an ellipse on the graphics context
*
@@ -159,7 +163,7 @@ public class DisplayDirichlet extends Fr
* a Vector of ellipse dimensions
*/
public static void plotEllipse(Graphics2D g2, Vector v, Vector dv) {
- double[] flip = {1, -1};
+ double[] flip = { 1, -1 };
Vector v2 = v.times(new DenseVector(flip));
v2 = v2.minus(dv.divide(2));
int h = SIZE / 2;
@@ -167,7 +171,7 @@ public class DisplayDirichlet extends Fr
double y = v2.get(1) + h;
g2.draw(new Ellipse2D.Double(x * DS, y * DS, dv.get(0) * DS, dv.get(1) * DS));
}
-
+
private static void printModels(List<Model<VectorWritable>[]> results, int significant) {
int row = 0;
StringBuilder models = new StringBuilder();
@@ -184,19 +188,19 @@ public class DisplayDirichlet extends Fr
models.append('\n');
log.info(models.toString());
}
-
+
public static void generateSamples() {
generateSamples(500, 1, 1, 3);
generateSamples(300, 1, 0, 0.5);
generateSamples(300, 0, 2, 0.1);
}
-
+
public static void generate2dSamples() {
generate2dSamples(500, 1, 1, 3, 1);
generate2dSamples(300, 1, 0, 0.5, 1);
generate2dSamples(300, 0, 2, 0.1, 0.5);
}
-
+
/**
* Generate random samples and add them to the sampleData
*
@@ -210,15 +214,15 @@ public class DisplayDirichlet extends Fr
* double standard deviation of the samples
*/
private static void generateSamples(int num, double mx, double my, double sd) {
- double[] params = {mx, my, sd, sd};
+ double[] params = { mx, my, sd, sd };
SAMPLE_PARAMS.add(new DenseVector(params));
- log.info("Generating {} samples m=[{}, {}] sd={}", new Object[] {num, mx, my, sd});
+ log.info("Generating {} samples m=[{}, {}] sd={}", new Object[] { num, mx, my, sd });
for (int i = 0; i < num; i++) {
- SAMPLE_DATA.add(new VectorWritable(new DenseVector(new double[] {UncommonDistributions.rNorm(mx, sd),
- UncommonDistributions.rNorm(my, sd)})));
+ SAMPLE_DATA.add(new VectorWritable(new DenseVector(new double[] { UncommonDistributions.rNorm(mx, sd),
+ UncommonDistributions.rNorm(my, sd) })));
}
}
-
+
/**
* Generate random samples and add them to the sampleData
*
@@ -234,25 +238,23 @@ public class DisplayDirichlet extends Fr
* double y-value standard deviation of the samples
*/
private static void generate2dSamples(int num, double mx, double my, double sdx, double sdy) {
- double[] params = {mx, my, sdx, sdy};
+ double[] params = { mx, my, sdx, sdy };
SAMPLE_PARAMS.add(new DenseVector(params));
- log.info("Generating {} samples m=[{}, {}] sd=[{}, {}]", new Object[] {num, mx, my, sdx, sdy});
+ log.info("Generating {} samples m=[{}, {}] sd=[{}, {}]", new Object[] { num, mx, my, sdx, sdy });
for (int i = 0; i < num; i++) {
- SAMPLE_DATA
- .add(new VectorWritable(new DenseVector(new double[] {UncommonDistributions.rNorm(mx, sdx),
- UncommonDistributions.rNorm(my, sdy)})));
+ SAMPLE_DATA.add(new VectorWritable(new DenseVector(new double[] { UncommonDistributions.rNorm(mx, sdx),
+ UncommonDistributions.rNorm(my, sdy) })));
}
}
-
+
public static void generateResults(ModelDistribution<VectorWritable> modelDist) {
- DirichletClusterer<VectorWritable> dc = new DirichletClusterer<VectorWritable>(SAMPLE_DATA, modelDist,
- 1.0, k, 2, 2);
- result = dc.cluster(20);
+ DirichletClusterer<VectorWritable> dc = new DirichletClusterer<VectorWritable>(SAMPLE_DATA, modelDist, ALPHA_0, k, 2, 2);
+ result = dc.cluster(numIterations);
printModels(result, 5);
}
-
+
public static boolean isSignificant(Model<VectorWritable> model) {
return (double) model.count() / SAMPLE_DATA.size() > SIGNIFICANCE;
}
-
+
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayNDirichlet.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayNDirichlet.java?rev=962612&r1=962611&r2=962612&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayNDirichlet.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayNDirichlet.java Fri Jul 9 17:14:20 2010
@@ -29,7 +29,7 @@ import org.apache.mahout.math.DenseVecto
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
-class DisplayNDirichlet extends DisplayDirichlet {
+class DisplayNDirichlet extends DisplayClustering {
DisplayNDirichlet() {
initialize();
this.setTitle("Dirichlet Process Clusters - Normal Distribution (>"
@@ -42,15 +42,15 @@ class DisplayNDirichlet extends DisplayD
Graphics2D g2 = (Graphics2D) g;
Vector dv = new DenseVector(2);
- int i = DisplayDirichlet.result.size() - 1;
+ int i = DisplayClustering.result.size() - 1;
for (Model<VectorWritable>[] models : result) {
g2.setStroke(new BasicStroke(i == 0 ? 3 : 1));
- g2.setColor(COLORS[Math.min(DisplayDirichlet.COLORS.length - 1, i--)]);
+ g2.setColor(COLORS[Math.min(DisplayClustering.COLORS.length - 1, i--)]);
for (Model<VectorWritable> m : models) {
NormalModel mm = (NormalModel) m;
dv.assign(mm.getStdDev() * 3);
- if (DisplayDirichlet.isSignificant(mm)) {
- DisplayDirichlet.plotEllipse(g2, mm.getMean(), dv);
+ if (DisplayClustering.isSignificant(mm)) {
+ DisplayClustering.plotEllipse(g2, mm.getMean(), dv);
}
}
}
@@ -58,12 +58,12 @@ class DisplayNDirichlet extends DisplayD
public static void main(String[] args) {
RandomUtils.useTestSeed();
- DisplayDirichlet.generateSamples();
+ DisplayClustering.generateSamples();
generateResults();
new DisplayNDirichlet();
}
static void generateResults() {
- DisplayDirichlet.generateResults(new NormalModelDistribution(new VectorWritable(new DenseVector(2))));
+ DisplayClustering.generateResults(new NormalModelDistribution(new VectorWritable(new DenseVector(2))));
}
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayOutputState.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayOutputState.java?rev=962612&r1=962611&r2=962612&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayOutputState.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplayOutputState.java Fri Jul 9 17:14:20 2010
@@ -26,11 +26,10 @@ import java.lang.reflect.InvocationTarge
import java.util.ArrayList;
import java.util.List;
-import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.conf.Configuration;
import org.apache.mahout.clustering.dirichlet.models.Model;
import org.apache.mahout.clustering.dirichlet.models.NormalModel;
import org.apache.mahout.clustering.dirichlet.models.NormalModelDistribution;
-import org.apache.mahout.clustering.kmeans.KMeansDriver;
import org.apache.mahout.common.FileLineIterable;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.math.AbstractVector;
@@ -38,7 +37,7 @@ import org.apache.mahout.math.DenseVecto
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
-class DisplayOutputState extends DisplayDirichlet {
+class DisplayOutputState extends DisplayClustering {
DisplayOutputState() {
initialize();
this.setTitle("Dirichlet Process Clusters - Map/Reduce Results (>"
@@ -51,15 +50,15 @@ class DisplayOutputState extends Display
Graphics2D g2 = (Graphics2D) g;
Vector dv = new DenseVector(2);
- int i = DisplayDirichlet.result.size() - 1;
+ int i = DisplayClustering.result.size() - 1;
for (Model<VectorWritable>[] models : result) {
g2.setStroke(new BasicStroke(i == 0 ? 3 : 1));
- g2.setColor(COLORS[Math.min(DisplayDirichlet.COLORS.length - 1, i--)]);
+ g2.setColor(COLORS[Math.min(DisplayClustering.COLORS.length - 1, i--)]);
for (Model<VectorWritable> m : models) {
NormalModel mm = (NormalModel) m;
dv.assign(mm.getStdDev() * 3);
- if (DisplayDirichlet.isSignificant(mm)) {
- DisplayDirichlet.plotEllipse(g2, mm.getMean(), dv);
+ if (DisplayClustering.isSignificant(mm)) {
+ DisplayClustering.plotEllipse(g2, mm.getMean(), dv);
}
}
}
@@ -85,13 +84,13 @@ class DisplayOutputState extends Display
private static void getSamples() throws IOException {
File f = new File("input");
for (File g : f.listFiles()) {
- DisplayDirichlet.SAMPLE_DATA.addAll(readFile(g.getCanonicalPath()));
+ DisplayClustering.SAMPLE_DATA.addAll(readFile(g.getCanonicalPath()));
}
}
private static void getResults() throws IOException, InvocationTargetException, NoSuchMethodException {
result = new ArrayList<Model<VectorWritable>[]>();
- JobConf conf = new JobConf(KMeansDriver.class);
+ Configuration conf = new Configuration();
conf.set(DirichletDriver.MODEL_FACTORY_KEY,
"org.apache.mahout.clustering.dirichlet.models.SampledNormalDistribution");
conf.set(DirichletDriver.MODEL_PROTOTYPE_KEY, "org.apache.mahout.math.DenseVector");
@@ -102,7 +101,7 @@ class DisplayOutputState extends Display
for (File g : f.listFiles()) {
conf.set(DirichletDriver.STATE_IN_KEY, g.getCanonicalPath());
DirichletState<VectorWritable> dirichletState = DirichletMapper.getDirichletState(conf);
- DisplayDirichlet.result.add(dirichletState.getModels());
+ DisplayClustering.result.add(dirichletState.getModels());
}
}
@@ -114,6 +113,6 @@ class DisplayOutputState extends Display
}
static void generateResults() {
- DisplayDirichlet.generateResults(new NormalModelDistribution(new VectorWritable(new DenseVector(2))));
+ DisplayClustering.generateResults(new NormalModelDistribution(new VectorWritable(new DenseVector(2))));
}
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplaySNDirichlet.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplaySNDirichlet.java?rev=962612&r1=962611&r2=962612&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplaySNDirichlet.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/DisplaySNDirichlet.java Fri Jul 9 17:14:20 2010
@@ -29,7 +29,7 @@ import org.apache.mahout.math.DenseVecto
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
-class DisplaySNDirichlet extends DisplayDirichlet {
+class DisplaySNDirichlet extends DisplayClustering {
DisplaySNDirichlet() {
initialize();
this.setTitle("Dirichlet Process Clusters - Sampled Normal Distribution (>"
@@ -42,15 +42,15 @@ class DisplaySNDirichlet extends Display
Graphics2D g2 = (Graphics2D) g;
Vector dv = new DenseVector(2);
- int i = DisplayDirichlet.result.size() - 1;
+ int i = DisplayClustering.result.size() - 1;
for (Model<VectorWritable>[] models : result) {
g2.setStroke(new BasicStroke(i == 0 ? 3 : 1));
- g2.setColor(COLORS[Math.min(DisplayDirichlet.COLORS.length - 1, i--)]);
+ g2.setColor(COLORS[Math.min(DisplayClustering.COLORS.length - 1, i--)]);
for (Model<VectorWritable> m : models) {
NormalModel mm = (NormalModel) m;
dv.assign(mm.getStdDev() * 3);
- if (DisplayDirichlet.isSignificant(mm)) {
- DisplayDirichlet.plotEllipse(g2, mm.getMean(), dv);
+ if (DisplayClustering.isSignificant(mm)) {
+ DisplayClustering.plotEllipse(g2, mm.getMean(), dv);
}
}
}
@@ -58,12 +58,16 @@ class DisplaySNDirichlet extends Display
public static void main(String[] args) {
RandomUtils.useTestSeed();
- DisplayDirichlet.generateSamples();
+ DisplayClustering.generateSamples();
+ numIterations = 40;
+ ALPHA_0 = 1.0;
+ k=20;
+ SIGNIFICANCE = 0.05;
generateResults();
new DisplaySNDirichlet();
}
static void generateResults() {
- DisplayDirichlet.generateResults(new SampledNormalDistribution(new VectorWritable(new DenseVector(2))));
+ DisplayClustering.generateResults(new SampledNormalDistribution(new VectorWritable(new DenseVector(2))));
}
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/README.txt
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/README.txt?rev=962612&r1=962611&r2=962612&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/README.txt (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/dirichlet/README.txt Fri Jul 9 17:14:20 2010
@@ -15,13 +15,12 @@ DisplayDirichlet - generates 1000 sample
colored (orange, yellow, green, blue, magenta) in order after which all earlier clusters are in
light grey. This helps to visualize how the clusters converge upon a solution over multiple
iterations.
- * NOTE: by changing the UncommonDistributions.init(...) call in DisplayDirichlet, you can get
- different behaviors. Removing the initialization altogether will use a random seed for each run.
+ * NOTE: by changing the parameter values (k, ALPHA_0, numIterations) and the display SIGNIFICANCE
+ you can obtain different results.
DisplayOutputState - this program can be run after any of the SampledNormalDistribution M/R Dirichlet test
cases in TestMapReduce. It draws the points and the resulting clusters from the output directory in
- a manner similar to the above. By changing the initialization seed in TestMapReduce you can get
- different data points.
+ a manner similar to the above.
DisplayASNOutputState - similar to above but uses the AsymmetricSampledNormalDistribution.
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/fuzzykmeans/DisplayFuzzyKMeans.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/fuzzykmeans/DisplayFuzzyKMeans.java?rev=962612&r1=962611&r2=962612&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/fuzzykmeans/DisplayFuzzyKMeans.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/fuzzykmeans/DisplayFuzzyKMeans.java Fri Jul 9 17:14:20 2010
@@ -23,7 +23,7 @@ import java.awt.Graphics2D;
import java.util.ArrayList;
import java.util.List;
-import org.apache.mahout.clustering.dirichlet.DisplayDirichlet;
+import org.apache.mahout.clustering.dirichlet.DisplayClustering;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.common.distance.ManhattanDistanceMeasure;
@@ -31,7 +31,7 @@ import org.apache.mahout.math.DenseVecto
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
-class DisplayFuzzyKMeans extends DisplayDirichlet {
+class DisplayFuzzyKMeans extends DisplayClustering {
private static List<List<SoftCluster>> clusters;
@@ -48,11 +48,11 @@ class DisplayFuzzyKMeans extends Display
int i = DisplayFuzzyKMeans.clusters.size() - 1;
for (List<SoftCluster> cls : clusters) {
g2.setStroke(new BasicStroke(i == 0 ? 3 : 1));
- g2.setColor(COLORS[Math.min(DisplayDirichlet.COLORS.length - 1, i--)]);
+ g2.setColor(COLORS[Math.min(DisplayClustering.COLORS.length - 1, i--)]);
for (SoftCluster cluster : cls) {
// if (true || cluster.getWeightedPointTotal().zSum() > sampleData.size() * 0.05) {
dv.assign(Math.max(cluster.std(), 0.3) * 3);
- DisplayDirichlet.plotEllipse(g2, cluster.getCenter(), dv);
+ DisplayClustering.plotEllipse(g2, cluster.getCenter(), dv);
// }
}
}
@@ -60,7 +60,7 @@ class DisplayFuzzyKMeans extends Display
public static void main(String[] args) {
RandomUtils.useTestSeed();
- DisplayDirichlet.generateSamples();
+ DisplayClustering.generateSamples();
List<Vector> points = new ArrayList<Vector>();
for (VectorWritable sample : SAMPLE_DATA) {
points.add((Vector) sample.get());
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/kmeans/DisplayKMeans.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/kmeans/DisplayKMeans.java?rev=962612&r1=962611&r2=962612&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/kmeans/DisplayKMeans.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/kmeans/DisplayKMeans.java Fri Jul 9 17:14:20 2010
@@ -23,7 +23,7 @@ import java.awt.Graphics2D;
import java.util.ArrayList;
import java.util.List;
-import org.apache.mahout.clustering.dirichlet.DisplayDirichlet;
+import org.apache.mahout.clustering.dirichlet.DisplayClustering;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.common.distance.ManhattanDistanceMeasure;
@@ -31,7 +31,7 @@ import org.apache.mahout.math.DenseVecto
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
-class DisplayKMeans extends DisplayDirichlet {
+class DisplayKMeans extends DisplayClustering {
private static List<List<Cluster>> clusters;
@@ -48,12 +48,12 @@ class DisplayKMeans extends DisplayDiric
int i = DisplayKMeans.clusters.size() - 1;
for (List<Cluster> cls : clusters) {
g2.setStroke(new BasicStroke(i == 0 ? 3 : 1));
- g2.setColor(COLORS[Math.min(DisplayDirichlet.COLORS.length - 1, i--)]);
+ g2.setColor(COLORS[Math.min(DisplayClustering.COLORS.length - 1, i--)]);
for (Cluster cluster : cls) {
// if (true || cluster.getNumPoints() > sampleData.size() * 0.05) {
dv.assign(cluster.getStd() * 3);
System.out.println(cluster.getCenter().asFormatString() + ' ' + dv.asFormatString());
- DisplayDirichlet.plotEllipse(g2, cluster.getCenter(), dv);
+ DisplayClustering.plotEllipse(g2, cluster.getCenter(), dv);
// }
}
}
@@ -61,7 +61,7 @@ class DisplayKMeans extends DisplayDiric
public static void main(String[] args) {
RandomUtils.useTestSeed();
- DisplayDirichlet.generateSamples();
+ DisplayClustering.generateSamples();
List<Vector> points = new ArrayList<Vector>();
for (VectorWritable sample : SAMPLE_DATA) {
points.add(sample.get());
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/meanshift/DisplayMeanShift.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/meanshift/DisplayMeanShift.java?rev=962612&r1=962611&r2=962612&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/meanshift/DisplayMeanShift.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/meanshift/DisplayMeanShift.java Fri Jul 9 17:14:20 2010
@@ -24,7 +24,7 @@ import java.awt.geom.AffineTransform;
import java.util.ArrayList;
import java.util.List;
-import org.apache.mahout.clustering.dirichlet.DisplayDirichlet;
+import org.apache.mahout.clustering.dirichlet.DisplayClustering;
import org.apache.mahout.clustering.dirichlet.models.NormalModelDistribution;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
@@ -34,7 +34,7 @@ import org.apache.mahout.math.VectorWrit
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-final class DisplayMeanShift extends DisplayDirichlet {
+final class DisplayMeanShift extends DisplayClustering {
private static final Logger log = LoggerFactory.getLogger(DisplayMeanShift.class);
@@ -60,37 +60,37 @@ final class DisplayMeanShift extends Dis
Vector dv = new DenseVector(2).assign(SIZE / 2.0);
Vector dv1 = new DenseVector(2).assign(t1);
Vector dv2 = new DenseVector(2).assign(t2);
- DisplayDirichlet.plotRectangle(g2, new DenseVector(2).assign(2), dv);
- DisplayDirichlet.plotRectangle(g2, new DenseVector(2).assign(-2), dv);
+ DisplayClustering.plotRectangle(g2, new DenseVector(2).assign(2), dv);
+ DisplayClustering.plotRectangle(g2, new DenseVector(2).assign(-2), dv);
// plot the sample data
g2.setColor(Color.DARK_GRAY);
dv.assign(0.03);
for (VectorWritable v : SAMPLE_DATA) {
- DisplayDirichlet.plotRectangle(g2, v.get(), dv);
+ DisplayClustering.plotRectangle(g2, v.get(), dv);
}
int i = 0;
for (MeanShiftCanopy canopy : canopies) {
- if (canopy.getBoundPoints().toList().size() > 0.015 * DisplayDirichlet.SAMPLE_DATA.size()) {
- g2.setColor(COLORS[Math.min(i++, DisplayDirichlet.COLORS.length - 1)]);
+ if (canopy.getBoundPoints().toList().size() > 0.015 * DisplayClustering.SAMPLE_DATA.size()) {
+ g2.setColor(COLORS[Math.min(i++, DisplayClustering.COLORS.length - 1)]);
int count = 0;
Vector center = new DenseVector(2);
for (int vix : canopy.getBoundPoints().toList()) {
Vector v = SAMPLE_DATA.get(vix).get();
count++;
v.addTo(center);
- DisplayDirichlet.plotRectangle(g2, v, dv);
+ DisplayClustering.plotRectangle(g2, v, dv);
}
center = center.divide(count);
- DisplayDirichlet.plotEllipse(g2, center, dv1);
- DisplayDirichlet.plotEllipse(g2, center, dv2);
+ DisplayClustering.plotEllipse(g2, center, dv1);
+ DisplayClustering.plotEllipse(g2, center, dv2);
}
}
}
public static void main(String[] args) {
RandomUtils.useTestSeed();
- DisplayDirichlet.generateSamples();
+ DisplayClustering.generateSamples();
List<Vector> points = new ArrayList<Vector>();
for (VectorWritable sample : SAMPLE_DATA) {
points.add(sample.get());
@@ -105,6 +105,6 @@ final class DisplayMeanShift extends Dis
}
static void generateResults() {
- DisplayDirichlet.generateResults(new NormalModelDistribution());
+ DisplayClustering.generateResults(new NormalModelDistribution());
}
}