You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by je...@apache.org on 2012/06/01 16:48:23 UTC
svn commit: r1345214 -
/mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java
Author: jeastman
Date: Fri Jun 1 14:48:23 2012
New Revision: 1345214
URL: http://svn.apache.org/viewvc?rev=1345214&view=rev
Log:
MAHOUT-1020: fixed path names for testKmeans and testFuzzyKmeans that were causing representative points calculation to fail. CDbw results now look more reasonable.
Modified:
mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java
Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java?rev=1345214&r1=1345213&r2=1345214&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java (original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java Fri Jun 1 14:48:23 2012
@@ -27,6 +27,8 @@ import com.google.common.collect.Maps;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.mahout.clustering.AbstractCluster;
import org.apache.mahout.clustering.Cluster;
import org.apache.mahout.clustering.ClusteringTestUtils;
import org.apache.mahout.clustering.TestClusterEvaluator;
@@ -42,8 +44,12 @@ import org.apache.mahout.clustering.kmea
import org.apache.mahout.clustering.kmeans.TestKmeansClustering;
import org.apache.mahout.clustering.meanshift.MeanShiftCanopyDriver;
import org.apache.mahout.common.MahoutTestCase;
+import org.apache.mahout.common.Pair;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
+import org.apache.mahout.common.iterator.sequencefile.PathFilters;
+import org.apache.mahout.common.iterator.sequencefile.PathType;
+import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirIterable;
import org.apache.mahout.common.kernel.IKernelProfile;
import org.apache.mahout.common.kernel.TriangularKernelProfile;
import org.apache.mahout.math.DenseVector;
@@ -56,11 +62,9 @@ import org.slf4j.LoggerFactory;
public final class TestCDbwEvaluator extends MahoutTestCase {
- private static final double[][] REFERENCE = { {1, 1}, {2, 1}, {1, 2}, {2, 2},
- {3, 3}, {4, 4}, {5, 4}, {4, 5}, {5, 5}};
+ private static final double[][] REFERENCE = { {1, 1}, {2, 1}, {1, 2}, {2, 2}, {3, 3}, {4, 4}, {5, 4}, {4, 5}, {5, 5}};
- private static final Logger log = LoggerFactory
- .getLogger(TestClusterEvaluator.class);
+ private static final Logger log = LoggerFactory.getLogger(TestClusterEvaluator.class);
private Map<Integer,List<VectorWritable>> representativePoints;
@@ -93,8 +97,7 @@ public final class TestCDbwEvaluator ext
}
/**
- * Initialize synthetic data using 4 clusters dC units from origin having 4
- * representative points dP from each center
+ * Initialize synthetic data using 4 clusters dC units from origin having 4 representative points dP from each center
*
* @param dC
* a double cluster center offset
@@ -105,27 +108,19 @@ public final class TestCDbwEvaluator ext
*/
private void initData(double dC, double dP, DistanceMeasure measure) {
clusters = Lists.newArrayList();
- clusters.add(new Canopy(new DenseVector(new double[] {-dC, -dC}), 1,
- measure));
- clusters
- .add(new Canopy(new DenseVector(new double[] {-dC, dC}), 3, measure));
- clusters
- .add(new Canopy(new DenseVector(new double[] {dC, dC}), 5, measure));
- clusters
- .add(new Canopy(new DenseVector(new double[] {dC, -dC}), 7, measure));
+ clusters.add(new Canopy(new DenseVector(new double[] {-dC, -dC}), 1, measure));
+ clusters.add(new Canopy(new DenseVector(new double[] {-dC, dC}), 3, measure));
+ clusters.add(new Canopy(new DenseVector(new double[] {dC, dC}), 5, measure));
+ clusters.add(new Canopy(new DenseVector(new double[] {dC, -dC}), 7, measure));
representativePoints = Maps.newHashMap();
for (Cluster cluster : clusters) {
List<VectorWritable> points = Lists.newArrayList();
representativePoints.put(cluster.getId(), points);
points.add(new VectorWritable(cluster.getCenter().clone()));
- points.add(new VectorWritable(cluster.getCenter().plus(
- new DenseVector(new double[] {dP, dP}))));
- points.add(new VectorWritable(cluster.getCenter().plus(
- new DenseVector(new double[] {dP, -dP}))));
- points.add(new VectorWritable(cluster.getCenter().plus(
- new DenseVector(new double[] {-dP, -dP}))));
- points.add(new VectorWritable(cluster.getCenter().plus(
- new DenseVector(new double[] {-dP, dP}))));
+ points.add(new VectorWritable(cluster.getCenter().plus(new DenseVector(new double[] {dP, dP}))));
+ points.add(new VectorWritable(cluster.getCenter().plus(new DenseVector(new double[] {dP, -dP}))));
+ points.add(new VectorWritable(cluster.getCenter().plus(new DenseVector(new double[] {-dP, -dP}))));
+ points.add(new VectorWritable(cluster.getCenter().plus(new DenseVector(new double[] {-dP, dP}))));
}
}
@@ -143,11 +138,9 @@ public final class TestCDbwEvaluator ext
* @throws Exception
*/
private void generateSamples(int num, double mx, double my, double sd) {
- log.info("Generating {} samples m=[{}, {}] sd={}", new Object[] {num, mx,
- my, sd});
+ log.info("Generating {} samples m=[{}, {}] sd={}", new Object[] {num, mx, my, sd});
for (int i = 0; i < num; i++) {
- sampleData.add(new VectorWritable(new DenseVector(new double[] {
- UncommonDistributions.rNorm(mx, sd),
+ sampleData.add(new VectorWritable(new DenseVector(new double[] {UncommonDistributions.rNorm(mx, sd),
UncommonDistributions.rNorm(my, sd)})));
}
}
@@ -158,290 +151,239 @@ public final class TestCDbwEvaluator ext
generateSamples(300, 0, 2, 0.1);
}
+ private void printRepPoints(Path output, int numIterations) throws IOException {
+ for (int i = 0; i <= numIterations; i++) {
+ Path out = new Path(output, "representativePoints-" + i);
+ System.out.println("Representative Points for iteration " + i);
+ Configuration conf = new Configuration();
+ for (Pair<IntWritable,VectorWritable> record : new SequenceFileDirIterable<IntWritable,VectorWritable>(out,
+ PathType.LIST, PathFilters.logsCRCFilter(), null, true, conf)) {
+ System.out.println("\tC-" + record.getFirst().get() + ": "
+ + AbstractCluster.formatVector(record.getSecond().get(), null));
+ }
+ }
+ }
+
@Test
public void testCDbw0() throws IOException {
- ClusteringTestUtils.writePointsToFile(referenceData,
- getTestTempFilePath("testdata/file1"), fs, conf);
+ ClusteringTestUtils.writePointsToFile(referenceData, getTestTempFilePath("testdata/file1"), fs, conf);
DistanceMeasure measure = new EuclideanDistanceMeasure();
initData(1, 0.25, measure);
- CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters,
- measure);
- assertEquals("inter cluster density", 0.0, evaluator.interClusterDensity(),
- EPSILON);
- assertEquals("separation", 20.485281374238568, evaluator.separation(),
- EPSILON);
- assertEquals("intra cluster density", 0.8, evaluator.intraClusterDensity(),
- EPSILON);
+ CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters, measure);
+ assertEquals("inter cluster density", 0.0, evaluator.interClusterDensity(), EPSILON);
+ assertEquals("separation", 20.485281374238568, evaluator.separation(), EPSILON);
+ assertEquals("intra cluster density", 0.8, evaluator.intraClusterDensity(), EPSILON);
assertEquals("CDbw", 16.388225099390855, evaluator.getCDbw(), EPSILON);
}
@Test
public void testCDbw1() throws IOException {
- ClusteringTestUtils.writePointsToFile(referenceData,
- getTestTempFilePath("testdata/file1"), fs, conf);
+ ClusteringTestUtils.writePointsToFile(referenceData, getTestTempFilePath("testdata/file1"), fs, conf);
DistanceMeasure measure = new EuclideanDistanceMeasure();
initData(1, 0.5, measure);
- CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters,
- measure);
- assertEquals("inter cluster density", 1.2, evaluator.interClusterDensity(),
- EPSILON);
- assertEquals("separation", 6.207661022496537, evaluator.separation(),
- EPSILON);
- assertEquals("intra cluster density", 0.4, evaluator.intraClusterDensity(),
- EPSILON);
+ CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters, measure);
+ assertEquals("inter cluster density", 1.2, evaluator.interClusterDensity(), EPSILON);
+ assertEquals("separation", 6.207661022496537, evaluator.separation(), EPSILON);
+ assertEquals("intra cluster density", 0.4, evaluator.intraClusterDensity(), EPSILON);
assertEquals("CDbw", 2.483064408998615, evaluator.getCDbw(), EPSILON);
}
@Test
public void testCDbw2() throws IOException {
- ClusteringTestUtils.writePointsToFile(referenceData,
- getTestTempFilePath("testdata/file1"), fs, conf);
+ ClusteringTestUtils.writePointsToFile(referenceData, getTestTempFilePath("testdata/file1"), fs, conf);
DistanceMeasure measure = new EuclideanDistanceMeasure();
initData(1, 0.75, measure);
- CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters,
- measure);
- assertEquals("inter cluster density", 0.682842712474619,
- evaluator.interClusterDensity(), EPSILON);
- assertEquals("separation", 4.0576740025245694, evaluator.separation(),
- EPSILON);
- assertEquals("intra cluster density", 0.26666666666666666,
- evaluator.intraClusterDensity(), EPSILON);
+ CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters, measure);
+ assertEquals("inter cluster density", 0.682842712474619, evaluator.interClusterDensity(), EPSILON);
+ assertEquals("separation", 4.0576740025245694, evaluator.separation(), EPSILON);
+ assertEquals("intra cluster density", 0.26666666666666666, evaluator.intraClusterDensity(), EPSILON);
assertEquals("CDbw", 1.0820464006732184, evaluator.getCDbw(), EPSILON);
}
@Test
public void testEmptyCluster() throws IOException {
- ClusteringTestUtils.writePointsToFile(referenceData,
- getTestTempFilePath("testdata/file1"), fs, conf);
+ ClusteringTestUtils.writePointsToFile(referenceData, getTestTempFilePath("testdata/file1"), fs, conf);
DistanceMeasure measure = new EuclideanDistanceMeasure();
initData(1, 0.25, measure);
- Canopy cluster = new Canopy(new DenseVector(new double[] {10, 10}), 19,
- measure);
+ Canopy cluster = new Canopy(new DenseVector(new double[] {10, 10}), 19, measure);
clusters.add(cluster);
List<VectorWritable> points = Lists.newArrayList();
representativePoints.put(cluster.getId(), points);
- CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters,
- measure);
- assertEquals("inter cluster density", 0.0, evaluator.interClusterDensity(),
- EPSILON);
- assertEquals("separation", 20.485281374238568, evaluator.separation(),
- EPSILON);
- assertEquals("intra cluster density", 0.8, evaluator.intraClusterDensity(),
- EPSILON);
+ CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters, measure);
+ assertEquals("inter cluster density", 0.0, evaluator.interClusterDensity(), EPSILON);
+ assertEquals("separation", 20.485281374238568, evaluator.separation(), EPSILON);
+ assertEquals("intra cluster density", 0.8, evaluator.intraClusterDensity(), EPSILON);
assertEquals("CDbw", 16.388225099390855, evaluator.getCDbw(), EPSILON);
}
@Test
public void testSingleValueCluster() throws IOException {
- ClusteringTestUtils.writePointsToFile(referenceData,
- getTestTempFilePath("testdata/file1"), fs, conf);
+ ClusteringTestUtils.writePointsToFile(referenceData, getTestTempFilePath("testdata/file1"), fs, conf);
DistanceMeasure measure = new EuclideanDistanceMeasure();
initData(1, 0.25, measure);
- Canopy cluster = new Canopy(new DenseVector(new double[] {0, 0}), 19,
- measure);
+ Canopy cluster = new Canopy(new DenseVector(new double[] {0, 0}), 19, measure);
clusters.add(cluster);
List<VectorWritable> points = Lists.newArrayList();
- points.add(new VectorWritable(cluster.getCenter().plus(
- new DenseVector(new double[] {1, 1}))));
+ points.add(new VectorWritable(cluster.getCenter().plus(new DenseVector(new double[] {1, 1}))));
representativePoints.put(cluster.getId(), points);
- CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters,
- measure);
- assertEquals("inter cluster density", 0.0, evaluator.interClusterDensity(),
- EPSILON);
- assertEquals("separation", 20.485281374238568, evaluator.separation(),
- EPSILON);
- assertEquals("intra cluster density", 0.8, evaluator.intraClusterDensity(),
- EPSILON);
+ CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters, measure);
+ assertEquals("inter cluster density", 0.0, evaluator.interClusterDensity(), EPSILON);
+ assertEquals("separation", 20.485281374238568, evaluator.separation(), EPSILON);
+ assertEquals("intra cluster density", 0.8, evaluator.intraClusterDensity(), EPSILON);
assertEquals("CDbw", 16.388225099390855, evaluator.getCDbw(), EPSILON);
}
/**
- * Representative points extraction will duplicate the cluster center if the
- * cluster has no assigned points. These clusters should be ignored like empty
- * clusters above
+ * Representative points extraction will duplicate the cluster center if the cluster has no assigned points. These
+ * clusters should be ignored like empty clusters above
*
* @throws IOException
*/
@Test
public void testAllSameValueCluster() throws IOException {
- ClusteringTestUtils.writePointsToFile(referenceData,
- getTestTempFilePath("testdata/file1"), fs, conf);
+ ClusteringTestUtils.writePointsToFile(referenceData, getTestTempFilePath("testdata/file1"), fs, conf);
DistanceMeasure measure = new EuclideanDistanceMeasure();
initData(1, 0.25, measure);
- Canopy cluster = new Canopy(new DenseVector(new double[] {0, 0}), 19,
- measure);
+ Canopy cluster = new Canopy(new DenseVector(new double[] {0, 0}), 19, measure);
clusters.add(cluster);
List<VectorWritable> points = Lists.newArrayList();
points.add(new VectorWritable(cluster.getCenter()));
points.add(new VectorWritable(cluster.getCenter()));
points.add(new VectorWritable(cluster.getCenter()));
representativePoints.put(cluster.getId(), points);
- CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters,
- measure);
- assertEquals("inter cluster density", 0.0, evaluator.interClusterDensity(),
- EPSILON);
- assertEquals("separation", 20.485281374238568, evaluator.separation(),
- EPSILON);
- assertEquals("intra cluster density", 0.8, evaluator.intraClusterDensity(),
- EPSILON);
+ CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters, measure);
+ assertEquals("inter cluster density", 0.0, evaluator.interClusterDensity(), EPSILON);
+ assertEquals("separation", 20.485281374238568, evaluator.separation(), EPSILON);
+ assertEquals("intra cluster density", 0.8, evaluator.intraClusterDensity(), EPSILON);
assertEquals("CDbw", 16.388225099390855, evaluator.getCDbw(), EPSILON);
}
/**
- * Clustering can produce very, very tight clusters that can cause the std
- * calculation to fail. These clusters should be processed correctly.
+ * Clustering can produce very, very tight clusters that can cause the std calculation to fail. These clusters should
+ * be processed correctly.
*
* @throws IOException
*/
@Test
public void testAlmostSameValueCluster() throws IOException {
- ClusteringTestUtils.writePointsToFile(referenceData,
- getTestTempFilePath("testdata/file1"), fs, conf);
+ ClusteringTestUtils.writePointsToFile(referenceData, getTestTempFilePath("testdata/file1"), fs, conf);
DistanceMeasure measure = new EuclideanDistanceMeasure();
initData(1, 0.25, measure);
- Canopy cluster = new Canopy(new DenseVector(new double[] {0, 0}), 19,
- measure);
+ Canopy cluster = new Canopy(new DenseVector(new double[] {0, 0}), 19, measure);
clusters.add(cluster);
List<VectorWritable> points = Lists.newArrayList();
- Vector delta = new DenseVector(new double[] { 0, Double.MIN_NORMAL });
+ Vector delta = new DenseVector(new double[] {0, Double.MIN_NORMAL});
points.add(new VectorWritable(delta.clone()));
points.add(new VectorWritable(delta.clone()));
points.add(new VectorWritable(delta.clone()));
points.add(new VectorWritable(delta.clone()));
points.add(new VectorWritable(delta.clone()));
representativePoints.put(cluster.getId(), points);
- CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters,
- measure);
- assertEquals("inter cluster density", 0.0, evaluator.interClusterDensity(),
- EPSILON);
- assertEquals("separation", 28.970562748477143, evaluator.separation(),
- EPSILON);
- assertEquals("intra cluster density", 1.8, evaluator.intraClusterDensity(),
- EPSILON);
+ CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters, measure);
+ assertEquals("inter cluster density", 0.0, evaluator.interClusterDensity(), EPSILON);
+ assertEquals("separation", 28.970562748477143, evaluator.separation(), EPSILON);
+ assertEquals("intra cluster density", 1.8, evaluator.intraClusterDensity(), EPSILON);
assertEquals("CDbw", 52.147012947258865, evaluator.getCDbw(), EPSILON);
}
@Test
public void testCanopy() throws Exception {
- ClusteringTestUtils.writePointsToFile(sampleData,
- getTestTempFilePath("testdata/file1"), fs, conf);
+ ClusteringTestUtils.writePointsToFile(sampleData, getTestTempFilePath("testdata/file1"), fs, conf);
DistanceMeasure measure = new EuclideanDistanceMeasure();
- CanopyDriver.run(new Configuration(), testdata, output, measure, 3.1, 2.1,
- true, 0.0, true);
+ CanopyDriver.run(new Configuration(), testdata, output, measure, 3.1, 2.1, true, 0.0, true);
int numIterations = 10;
Path clustersIn = new Path(output, "clusters-0-final");
- RepresentativePointsDriver.run(conf, clustersIn, new Path(output,
- "clusteredPoints"), output, measure, numIterations, true);
+ RepresentativePointsDriver.run(conf, clustersIn, new Path(output, "clusteredPoints"), output, measure,
+ numIterations, true);
CDbwEvaluator evaluator = new CDbwEvaluator(conf, clustersIn);
// printRepPoints(numIterations);
// now print out the Results
System.out.println("Canopy CDbw = " + evaluator.getCDbw());
- System.out.println("Intra-cluster density = "
- + evaluator.intraClusterDensity());
- System.out.println("Inter-cluster density = "
- + evaluator.interClusterDensity());
+ System.out.println("Intra-cluster density = " + evaluator.intraClusterDensity());
+ System.out.println("Inter-cluster density = " + evaluator.interClusterDensity());
System.out.println("Separation = " + evaluator.separation());
}
@Test
public void testKmeans() throws Exception {
- ClusteringTestUtils.writePointsToFile(sampleData,
- getTestTempFilePath("testdata/file1"), fs, conf);
+ ClusteringTestUtils.writePointsToFile(sampleData, getTestTempFilePath("testdata/file1"), fs, conf);
DistanceMeasure measure = new EuclideanDistanceMeasure();
// now run the Canopy job to prime kMeans canopies
- CanopyDriver.run(new Configuration(), testdata, output, measure, 3.1, 2.1,
- false, 0.0, true);
+ CanopyDriver.run(new Configuration(), testdata, output, measure, 3.1, 2.1, false, 0.0, true);
// now run the KMeans job
Path kmeansOutput = new Path(output, "kmeans");
- KMeansDriver.run(testdata, new Path(output, "clusters-0-final"), kmeansOutput, measure,
- 0.001, 10, true, 0.0, true);
+ KMeansDriver.run(testdata, new Path(output, "clusters-0-final"), kmeansOutput, measure, 0.001, 10, true, 0.0, true);
int numIterations = 10;
- Path clustersIn = new Path(output, "clusters-2");
- RepresentativePointsDriver.run(conf, clustersIn, new Path(output,
- "clusteredPoints"), kmeansOutput, measure, numIterations, true);
+ Path clustersIn = new Path(kmeansOutput, "clusters-10-final");
+ RepresentativePointsDriver.run(conf, clustersIn, new Path(kmeansOutput, "clusteredPoints"), kmeansOutput, measure,
+ numIterations, true);
CDbwEvaluator evaluator = new CDbwEvaluator(conf, clustersIn);
- // printRepPoints(numIterations);
+ printRepPoints(kmeansOutput, numIterations);
// now print out the Results
System.out.println("K-Means CDbw = " + evaluator.getCDbw());
- System.out.println("Intra-cluster density = "
- + evaluator.intraClusterDensity());
- System.out.println("Inter-cluster density = "
- + evaluator.interClusterDensity());
+ System.out.println("Intra-cluster density = " + evaluator.intraClusterDensity());
+ System.out.println("Inter-cluster density = " + evaluator.interClusterDensity());
System.out.println("Separation = " + evaluator.separation());
}
@Test
public void testFuzzyKmeans() throws Exception {
- ClusteringTestUtils.writePointsToFile(sampleData,
- getTestTempFilePath("testdata/file1"), fs, conf);
+ ClusteringTestUtils.writePointsToFile(sampleData, getTestTempFilePath("testdata/file1"), fs, conf);
DistanceMeasure measure = new EuclideanDistanceMeasure();
// now run the Canopy job to prime kMeans canopies
- CanopyDriver.run(new Configuration(), testdata, output, measure, 3.1, 2.1,
- false, 0.0, true);
+ CanopyDriver.run(new Configuration(), testdata, output, measure, 3.1, 2.1, false, 0.0, true);
Path fuzzyKMeansOutput = new Path(output, "fuzzyk");
- // now run the KMeans job
- FuzzyKMeansDriver.run(testdata, new Path(output, "clusters-0-final"), fuzzyKMeansOutput ,
- measure, 0.001, 10, 2, true, true, 0, true);
+ // now run the KMeans job
+ FuzzyKMeansDriver.run(testdata, new Path(output, "clusters-0-final"), fuzzyKMeansOutput, measure, 0.001, 10, 2,
+ true, true, 0, true);
int numIterations = 10;
- Path clustersIn = new Path(output, "clusters-4");
- RepresentativePointsDriver.run(conf, clustersIn, new Path(output,
- "clusteredPoints"), fuzzyKMeansOutput, measure, numIterations, true);
+ Path clustersIn = new Path(fuzzyKMeansOutput, "clusters-4");
+ RepresentativePointsDriver.run(conf, clustersIn, new Path(fuzzyKMeansOutput, "clusteredPoints"), fuzzyKMeansOutput, measure,
+ numIterations, true);
CDbwEvaluator evaluator = new CDbwEvaluator(conf, clustersIn);
- // printRepPoints(numIterations);
+ printRepPoints(fuzzyKMeansOutput, numIterations);
// now print out the Results
System.out.println("Fuzzy K-Means CDbw = " + evaluator.getCDbw());
- System.out.println("Intra-cluster density = "
- + evaluator.intraClusterDensity());
- System.out.println("Inter-cluster density = "
- + evaluator.interClusterDensity());
+ System.out.println("Intra-cluster density = " + evaluator.intraClusterDensity());
+ System.out.println("Inter-cluster density = " + evaluator.interClusterDensity());
System.out.println("Separation = " + evaluator.separation());
}
@Test
public void testMeanShift() throws Exception {
- ClusteringTestUtils.writePointsToFile(sampleData,
- getTestTempFilePath("testdata/file1"), fs, conf);
+ ClusteringTestUtils.writePointsToFile(sampleData, getTestTempFilePath("testdata/file1"), fs, conf);
DistanceMeasure measure = new EuclideanDistanceMeasure();
IKernelProfile kernelProfile = new TriangularKernelProfile();
- MeanShiftCanopyDriver.run(conf, testdata, output, measure, kernelProfile,
- 2.1, 1.0, 0.001, 10, false, true, true);
+ MeanShiftCanopyDriver.run(conf, testdata, output, measure, kernelProfile, 2.1, 1.0, 0.001, 10, false, true, true);
int numIterations = 10;
Path clustersIn = new Path(output, "clusters-2");
- RepresentativePointsDriver.run(conf, clustersIn, new Path(output,
- "clusteredPoints"), output, measure, numIterations, true);
+ RepresentativePointsDriver.run(conf, clustersIn, new Path(output, "clusteredPoints"), output, measure,
+ numIterations, true);
CDbwEvaluator evaluator = new CDbwEvaluator(conf, clustersIn);
// printRepPoints(numIterations);
// now print out the Results
System.out.println("Mean Shift CDbw = " + evaluator.getCDbw());
- System.out.println("Intra-cluster density = "
- + evaluator.intraClusterDensity());
- System.out.println("Inter-cluster density = "
- + evaluator.interClusterDensity());
+ System.out.println("Intra-cluster density = " + evaluator.intraClusterDensity());
+ System.out.println("Inter-cluster density = " + evaluator.interClusterDensity());
System.out.println("Separation = " + evaluator.separation());
}
@Test
public void testDirichlet() throws Exception {
- ClusteringTestUtils.writePointsToFile(sampleData,
- getTestTempFilePath("testdata/file1"), fs, conf);
- DistributionDescription description = new DistributionDescription(
- GaussianClusterDistribution.class.getName(),
+ ClusteringTestUtils.writePointsToFile(sampleData, getTestTempFilePath("testdata/file1"), fs, conf);
+ DistributionDescription description = new DistributionDescription(GaussianClusterDistribution.class.getName(),
DenseVector.class.getName(), null, 2);
- DirichletDriver.run(new Configuration(), testdata, output, description, 15, 5, 1.0, true,
- true, (double) 0, true);
+ DirichletDriver.run(new Configuration(), testdata, output, description, 15, 5, 1.0, true, true, (double) 0, true);
int numIterations = 10;
Path clustersIn = new Path(output, "clusters-0");
- RepresentativePointsDriver.run(conf, clustersIn, new Path(output,
- "clusteredPoints"), output, new EuclideanDistanceMeasure(),
- numIterations, true);
+ RepresentativePointsDriver.run(conf, clustersIn, new Path(output, "clusteredPoints"), output,
+ new EuclideanDistanceMeasure(), numIterations, true);
CDbwEvaluator evaluator = new CDbwEvaluator(conf, clustersIn);
- // printRepPoints(numIterations);
+ printRepPoints(output, numIterations);
// now print out the Results
System.out.println("Dirichlet CDbw = " + evaluator.getCDbw());
- System.out.println("Intra-cluster density = "
- + evaluator.intraClusterDensity());
- System.out.println("Inter-cluster density = "
- + evaluator.interClusterDensity());
+ System.out.println("Intra-cluster density = " + evaluator.intraClusterDensity());
+ System.out.println("Inter-cluster density = " + evaluator.interClusterDensity());
System.out.println("Separation = " + evaluator.separation());
}